/* ** This file contains all sources (including headers) to the LEMON ** LALR(1) parser generator. The sources have been combined into a ** single file to make it easy to include LEMON in the source tree ** and Makefile of another program. ** ** The author of this program disclaims copyright. */ #include #include #include #include #include #include #define ISSPACE(X) isspace((unsigned char)(X)) #define ISDIGIT(X) isdigit((unsigned char)(X)) #define ISALNUM(X) isalnum((unsigned char)(X)) #define ISALPHA(X) isalpha((unsigned char)(X)) #define ISUPPER(X) isupper((unsigned char)(X)) #define ISLOWER(X) islower((unsigned char)(X)) #ifndef __WIN32__ # if defined(_WIN32) || defined(WIN32) # define __WIN32__ # endif #endif #ifdef __WIN32__ #ifdef __cplusplus extern "C" { #endif extern int access(const char *path, int mode); #ifdef __cplusplus } #endif #else #include #endif /* #define PRIVATE static */ #define PRIVATE #ifdef TEST #define MAXRHS 5 /* Set low to exercise exception code */ #else #define MAXRHS 1000 #endif extern void memory_error(); static int showPrecedenceConflict = 0; static char *msort(char*,char**,int(*)(const char*,const char*)); /* ** Compilers are getting increasingly pedantic about type conversions ** as C evolves ever closer to Ada.... To work around the latest problems ** we have to define the following variant of strlen(). */ #define lemonStrlen(X) ((int)strlen(X)) /* ** Header on the linked list of memory allocations. */ typedef struct MemChunk MemChunk; struct MemChunk { MemChunk *pNext; size_t sz; /* Actually memory follows */ }; /* ** Global linked list of all memory allocations. */ static MemChunk *memChunkList = 0; /* ** Wrappers around malloc(), calloc(), realloc() and free(). ** ** All memory allocations are kept on a doubly-linked list. The ** lemon_free_all() function can be called prior to exit to clean ** up any memory leaks. ** ** This is not necessary. But compilers and getting increasingly ** fussy about memory leaks, even in command-line programs like Lemon ** where they do not matter. So this code is provided to hush the ** warnings. */ static void *lemon_malloc(size_t nByte){ MemChunk *p; if( nByte<0 ) return 0; p = malloc( nByte + sizeof(MemChunk) ); if( p==0 ){ fprintf(stderr, "Out of memory. Failed to allocate %lld bytes.\n", (long long int)nByte); exit(1); } p->pNext = memChunkList; p->sz = nByte; memChunkList = p; return (void*)&p[1]; } static void *lemon_calloc(size_t nElem, size_t sz){ void *p = lemon_malloc(nElem*sz); memset(p, 0, nElem*sz); return p; } static void lemon_free(void *pOld){ if( pOld ){ MemChunk *p = (MemChunk*)pOld; p--; memset(pOld, 0, p->sz); } } static void *lemon_realloc(void *pOld, size_t nNew){ void *pNew; MemChunk *p; if( pOld==0 ) return lemon_malloc(nNew); p = (MemChunk*)pOld; p--; if( p->sz>=nNew ) return pOld; pNew = lemon_malloc( nNew ); memcpy(pNew, pOld, p->sz); return pNew; } /* Free all outstanding memory allocations. ** Do this right before exiting. */ static void lemon_free_all(void){ while( memChunkList ){ MemChunk *pNext = memChunkList->pNext; free( memChunkList ); memChunkList = pNext; } } /* ** Compilers are starting to complain about the use of sprintf() and strcpy(), ** saying they are unsafe. So we define our own versions of those routines too. ** ** There are three routines here: lemon_sprintf(), lemon_vsprintf(), and ** lemon_addtext(). The first two are replacements for sprintf() and vsprintf(). ** The third is a helper routine for vsnprintf() that adds texts to the end of a ** buffer, making sure the buffer is always zero-terminated. ** ** The string formatter is a minimal subset of stdlib sprintf() supporting only ** a few simply conversions: ** ** %d ** %s ** %.*s ** */ static void lemon_addtext( char *zBuf, /* The buffer to which text is added */ int *pnUsed, /* Slots of the buffer used so far */ const char *zIn, /* Text to add */ int nIn, /* Bytes of text to add. -1 to use strlen() */ int iWidth /* Field width. Negative to left justify */ ){ if( nIn<0 ) for(nIn=0; zIn[nIn]; nIn++){} while( iWidth>nIn ){ zBuf[(*pnUsed)++] = ' '; iWidth--; } if( nIn==0 ) return; memcpy(&zBuf[*pnUsed], zIn, nIn); *pnUsed += nIn; while( (-iWidth)>nIn ){ zBuf[(*pnUsed)++] = ' '; iWidth++; } zBuf[*pnUsed] = 0; } static int lemon_vsprintf(char *str, const char *zFormat, va_list ap){ int i, j, k, c; int nUsed = 0; const char *z; char zTemp[50]; str[0] = 0; for(i=j=0; (c = zFormat[i])!=0; i++){ if( c=='%' ){ int iWidth = 0; lemon_addtext(str, &nUsed, &zFormat[j], i-j, 0); c = zFormat[++i]; if( ISDIGIT(c) || (c=='-' && ISDIGIT(zFormat[i+1])) ){ if( c=='-' ) i++; while( ISDIGIT(zFormat[i]) ) iWidth = iWidth*10 + zFormat[i++] - '0'; if( c=='-' ) iWidth = -iWidth; c = zFormat[i]; } if( c=='d' ){ int v = va_arg(ap, int); if( v<0 ){ lemon_addtext(str, &nUsed, "-", 1, iWidth); v = -v; }else if( v==0 ){ lemon_addtext(str, &nUsed, "0", 1, iWidth); } k = 0; while( v>0 ){ k++; zTemp[sizeof(zTemp)-k] = (v%10) + '0'; v /= 10; } lemon_addtext(str, &nUsed, &zTemp[sizeof(zTemp)-k], k, iWidth); }else if( c=='s' ){ z = va_arg(ap, const char*); lemon_addtext(str, &nUsed, z, -1, iWidth); }else if( c=='.' && memcmp(&zFormat[i], ".*s", 3)==0 ){ i += 2; k = va_arg(ap, int); z = va_arg(ap, const char*); lemon_addtext(str, &nUsed, z, k, iWidth); }else if( c=='%' ){ lemon_addtext(str, &nUsed, "%", 1, 0); }else{ fprintf(stderr, "illegal format\n"); exit(1); } j = i+1; } } lemon_addtext(str, &nUsed, &zFormat[j], i-j, 0); return nUsed; } static int lemon_sprintf(char *str, const char *format, ...){ va_list ap; int rc; va_start(ap, format); rc = lemon_vsprintf(str, format, ap); va_end(ap); return rc; } static void lemon_strcpy(char *dest, const char *src){ while( (*(dest++) = *(src++))!=0 ){} } static void lemon_strcat(char *dest, const char *src){ while( *dest ) dest++; lemon_strcpy(dest, src); } /* a few forward declarations... */ struct rule; struct lemon; struct action; static struct action *Action_new(void); static struct action *Action_sort(struct action *); /********** From the file "build.h" ************************************/ void FindRulePrecedences(struct lemon*); void FindFirstSets(struct lemon*); void FindStates(struct lemon*); void FindLinks(struct lemon*); void FindFollowSets(struct lemon*); void FindActions(struct lemon*); /********* From the file "configlist.h" *********************************/ void Configlist_init(void); struct config *Configlist_add(struct rule *, int); struct config *Configlist_addbasis(struct rule *, int); void Configlist_closure(struct lemon *); void Configlist_sort(void); void Configlist_sortbasis(void); struct config *Configlist_return(void); struct config *Configlist_basis(void); void Configlist_eat(struct config *); void Configlist_reset(void); /********* From the file "error.h" ***************************************/ void ErrorMsg(const char *, int,const char *, ...); /****** From the file "option.h" ******************************************/ enum option_type { OPT_FLAG=1, OPT_INT, OPT_DBL, OPT_STR, OPT_FFLAG, OPT_FINT, OPT_FDBL, OPT_FSTR}; struct s_options { enum option_type type; const char *label; char *arg; const char *message; }; int OptInit(char**,struct s_options*,FILE*); int OptNArgs(void); char *OptArg(int); void OptPrint(void); /******** From the file "parse.h" *****************************************/ void Parse(struct lemon *lemp); /********* From the file "plink.h" ***************************************/ struct plink *Plink_new(void); void Plink_add(struct plink **, struct config *); void Plink_copy(struct plink **, struct plink *); void Plink_delete(struct plink *); /********** From the file "report.h" *************************************/ void Reprint(struct lemon *); void ReportOutput(struct lemon *); void ReportTable(struct lemon *, int, int); void ReportHeader(struct lemon *); void CompressTables(struct lemon *); void ResortStates(struct lemon *); /********** From the file "set.h" ****************************************/ void SetSize(int); /* All sets will be of size N */ char *SetNew(void); /* A new set for element 0..N */ void SetFree(char*); /* Deallocate a set */ int SetAdd(char*,int); /* Add element to a set */ int SetUnion(char *,char *); /* A <- A U B, thru element N */ #define SetFind(X,Y) (X[Y]) /* True if Y is in set X */ /********** From the file "struct.h" *************************************/ /* ** Principal data structures for the LEMON parser generator. */ typedef enum {LEMON_FALSE=0, LEMON_TRUE} Boolean; /* Symbols (terminals and nonterminals) of the grammar are stored ** in the following: */ enum symbol_type { TERMINAL, NONTERMINAL, MULTITERMINAL }; enum e_assoc { LEFT, RIGHT, NONE, UNK }; struct symbol { const char *name; /* Name of the symbol */ int index; /* Index number for this symbol */ enum symbol_type type; /* Symbols are all either TERMINALS or NTs */ struct rule *rule; /* Linked list of rules of this (if an NT) */ struct symbol *fallback; /* fallback token in case this token doesn't parse */ int prec; /* Precedence if defined (-1 otherwise) */ enum e_assoc assoc; /* Associativity if precedence is defined */ char *firstset; /* First-set for all rules of this symbol */ Boolean lambda; /* True if NT and can generate an empty string */ int useCnt; /* Number of times used */ char *datatype; /* The data type of information held by this ** object. Only used if type==NONTERMINAL */ int dtnum; /* The data type number. In the parser, the value ** stack is a union. The .yy%d element of this ** union is the correct data type for this object */ int bContent; /* True if this symbol ever carries content - if ** it is ever more than just syntax */ /* The following fields are used by MULTITERMINALs only */ int nsubsym; /* Number of constituent symbols in the MULTI */ struct symbol **subsym; /* Array of constituent symbols */ }; /* Each production rule in the grammar is stored in the following ** structure. */ struct rule { struct symbol *lhs; /* Left-hand side of the rule */ const char *lhsalias; /* Alias for the LHS (NULL if none) */ int lhsStart; /* True if left-hand side is the start symbol */ int ruleline; /* Line number for the rule */ int nrhs; /* Number of RHS symbols */ struct symbol **rhs; /* The RHS symbols */ const char **rhsalias; /* An alias for each RHS symbol (NULL if none) */ int line; /* Line number at which code begins */ const char *code; /* The code executed when this rule is reduced */ const char *codePrefix; /* Setup code before code[] above */ const char *codeSuffix; /* Breakdown code after code[] above */ struct symbol *precsym; /* Precedence symbol for this rule */ int index; /* An index number for this rule */ int iRule; /* Rule number as used in the generated tables */ Boolean noCode; /* True if this rule has no associated C code */ Boolean codeEmitted; /* True if the code has been emitted already */ Boolean canReduce; /* True if this rule is ever reduced */ Boolean doesReduce; /* Reduce actions occur after optimization */ Boolean neverReduce; /* Reduce is theoretically possible, but prevented ** by actions or other outside implementation */ struct rule *nextlhs; /* Next rule with the same LHS */ struct rule *next; /* Next rule in the global list */ }; /* A configuration is a production rule of the grammar together with ** a mark (dot) showing how much of that rule has been processed so far. ** Configurations also contain a follow-set which is a list of terminal ** symbols which are allowed to immediately follow the end of the rule. ** Every configuration is recorded as an instance of the following: */ enum cfgstatus { COMPLETE, INCOMPLETE }; struct config { struct rule *rp; /* The rule upon which the configuration is based */ int dot; /* The parse point */ char *fws; /* Follow-set for this configuration only */ struct plink *fplp; /* Follow-set forward propagation links */ struct plink *bplp; /* Follow-set backwards propagation links */ struct state *stp; /* Pointer to state which contains this */ enum cfgstatus status; /* used during followset and shift computations */ struct config *next; /* Next configuration in the state */ struct config *bp; /* The next basis configuration */ }; enum e_action { SHIFT, ACCEPT, REDUCE, ERROR, SSCONFLICT, /* A shift/shift conflict */ SRCONFLICT, /* Was a reduce, but part of a conflict */ RRCONFLICT, /* Was a reduce, but part of a conflict */ SH_RESOLVED, /* Was a shift. Precedence resolved conflict */ RD_RESOLVED, /* Was reduce. Precedence resolved conflict */ NOT_USED, /* Deleted by compression */ SHIFTREDUCE /* Shift first, then reduce */ }; /* Every shift or reduce operation is stored as one of the following */ struct action { struct symbol *sp; /* The look-ahead symbol */ enum e_action type; union { struct state *stp; /* The new state, if a shift */ struct rule *rp; /* The rule, if a reduce */ } x; struct symbol *spOpt; /* SHIFTREDUCE optimization to this symbol */ struct action *next; /* Next action for this state */ }; /* Each state of the generated parser's finite state machine ** is encoded as an instance of the following structure. */ struct state { struct config *bp; /* The basis configurations for this state */ struct config *cfp; /* All configurations in this set */ int statenum; /* Sequential number for this state */ struct action *ap; /* List of actions for this state */ int nTknAct, nNtAct; /* Number of actions on terminals and nonterminals */ int iTknOfst, iNtOfst; /* yy_action[] offset for terminals and nonterms */ int iDfltReduce; /* Default action is to REDUCE by this rule */ struct rule *pDfltReduce;/* The default REDUCE rule. */ int autoReduce; /* True if this is an auto-reduce state */ }; #define NO_OFFSET (-2147483647) /* A followset propagation link indicates that the contents of one ** configuration followset should be propagated to another whenever ** the first changes. */ struct plink { struct config *cfp; /* The configuration to which linked */ struct plink *next; /* The next propagate link */ }; /* The state vector for the entire parser generator is recorded as ** follows. (LEMON uses no global variables and makes little use of ** static variables. Fields in the following structure can be thought ** of as begin global variables in the program.) */ struct lemon { struct state **sorted; /* Table of states sorted by state number */ struct rule *rule; /* List of all rules */ struct rule *startRule; /* First rule */ int nstate; /* Number of states */ int nxstate; /* nstate with tail degenerate states removed */ int nrule; /* Number of rules */ int nruleWithAction; /* Number of rules with actions */ int nsymbol; /* Number of terminal and nonterminal symbols */ int nterminal; /* Number of terminal symbols */ int minShiftReduce; /* Minimum shift-reduce action value */ int errAction; /* Error action value */ int accAction; /* Accept action value */ int noAction; /* No-op action value */ int minReduce; /* Minimum reduce action */ int maxAction; /* Maximum action value of any kind */ struct symbol **symbols; /* Sorted array of pointers to symbols */ int errorcnt; /* Number of errors */ struct symbol *errsym; /* The error symbol */ struct symbol *wildcard; /* Token that matches anything */ char *name; /* Name of the generated parser */ char *arg; /* Declaration of the 3rd argument to parser */ char *ctx; /* Declaration of 2nd argument to constructor */ char *tokentype; /* Type of terminal symbols in the parser stack */ char *vartype; /* The default type of non-terminal symbols */ char *start; /* Name of the start symbol for the grammar */ char *stacksize; /* Size of the parser stack */ char *include; /* Code to put at the start of the C file */ char *error; /* Code to execute when an error is seen */ char *overflow; /* Code to execute on a stack overflow */ char *failure; /* Code to execute on parser failure */ char *accept; /* Code to execute when the parser excepts */ char *extracode; /* Code appended to the generated file */ char *filename; /* Name of the input file */ char *outname; /* Name of the current output file */ char *tokenprefix; /* A prefix added to token names in the .h file */ int nconflict; /* Number of parsing conflicts */ int nactiontab; /* Number of entries in the yy_action[] table */ int nlookaheadtab; /* Number of entries in yy_lookahead[] */ int tablesize; /* Total table size of all tables in bytes */ int basisflag; /* Print only basis configurations */ int printPreprocessed; /* Show preprocessor output on stdout */ int has_fallback; /* True if any %fallback is seen in the grammar */ int nolinenosflag; /* True if #line statements should not be printed */ int argc; /* Number of command-line arguments */ char **argv; /* Command-line arguments */ }; #define MemoryCheck(X) if((X)==0){ \ extern void memory_error(); \ memory_error(); \ } /**************** From the file "table.h" *********************************/ /* ** All code in this file has been automatically generated ** from a specification in the file ** "table.q" ** by the associative array code building program "aagen". ** Do not edit this file! Instead, edit the specification ** file, then rerun aagen. */ /* ** Code for processing tables in the LEMON parser generator. */ /* Routines for handling a strings */ const char *Strsafe(const char *); void Strsafe_init(void); int Strsafe_insert(const char *); const char *Strsafe_find(const char *); /* Routines for handling symbols of the grammar */ struct symbol *Symbol_new(const char *); int Symbolcmpp(const void *, const void *); void Symbol_init(void); int Symbol_insert(struct symbol *, const char *); struct symbol *Symbol_find(const char *); int Symbol_count(void); struct symbol **Symbol_arrayof(void); /* Routines to manage the state table */ int Configcmp(const char *, const char *); struct state *State_new(void); void State_init(void); int State_insert(struct state *, struct config *); struct state *State_find(struct config *); struct state **State_arrayof(void); /* Routines used for efficiency in Configlist_add */ void Configtable_init(void); int Configtable_insert(struct config *); struct config *Configtable_find(struct config *); void Configtable_clear(int(*)(struct config *)); /****************** From the file "action.c" *******************************/ /* ** Routines processing parser actions in the LEMON parser generator. */ /* Allocate a new parser action */ static struct action *Action_new(void){ static struct action *actionfreelist = 0; struct action *newaction; if( actionfreelist==0 ){ int i; int amt = 100; actionfreelist = (struct action *)lemon_calloc(amt, sizeof(struct action)); if( actionfreelist==0 ){ fprintf(stderr,"Unable to allocate memory for a new parser action."); exit(1); } for(i=0; inext; return newaction; } /* Compare two actions for sorting purposes. Return negative, zero, or ** positive if the first action is less than, equal to, or greater than ** the first */ static int actioncmp( struct action *ap1, struct action *ap2 ){ int rc; rc = ap1->sp->index - ap2->sp->index; if( rc==0 ){ rc = (int)ap1->type - (int)ap2->type; } if( rc==0 && (ap1->type==REDUCE || ap1->type==SHIFTREDUCE) ){ rc = ap1->x.rp->index - ap2->x.rp->index; } if( rc==0 ){ rc = (int) (ap2 - ap1); } return rc; } /* Sort parser actions */ static struct action *Action_sort( struct action *ap ){ ap = (struct action *)msort((char *)ap,(char **)&ap->next, (int(*)(const char*,const char*))actioncmp); return ap; } void Action_add( struct action **app, enum e_action type, struct symbol *sp, char *arg ){ struct action *newaction; newaction = Action_new(); newaction->next = *app; *app = newaction; newaction->type = type; newaction->sp = sp; newaction->spOpt = 0; if( type==SHIFT ){ newaction->x.stp = (struct state *)arg; }else{ newaction->x.rp = (struct rule *)arg; } } /********************** New code to implement the "acttab" module ***********/ /* ** This module implements routines use to construct the yy_action[] table. */ /* ** The state of the yy_action table under construction is an instance of ** the following structure. ** ** The yy_action table maps the pair (state_number, lookahead) into an ** action_number. The table is an array of integers pairs. The state_number ** determines an initial offset into the yy_action array. The lookahead ** value is then added to this initial offset to get an index X into the ** yy_action array. If the aAction[X].lookahead equals the value of the ** of the lookahead input, then the value of the action_number output is ** aAction[X].action. If the lookaheads do not match then the ** default action for the state_number is returned. ** ** All actions associated with a single state_number are first entered ** into aLookahead[] using multiple calls to acttab_action(). Then the ** actions for that single state_number are placed into the aAction[] ** array with a single call to acttab_insert(). The acttab_insert() call ** also resets the aLookahead[] array in preparation for the next ** state number. */ struct lookahead_action { int lookahead; /* Value of the lookahead token */ int action; /* Action to take on the given lookahead */ }; typedef struct acttab acttab; struct acttab { int nAction; /* Number of used slots in aAction[] */ int nActionAlloc; /* Slots allocated for aAction[] */ struct lookahead_action *aAction, /* The yy_action[] table under construction */ *aLookahead; /* A single new transaction set */ int mnLookahead; /* Minimum aLookahead[].lookahead */ int mnAction; /* Action associated with mnLookahead */ int mxLookahead; /* Maximum aLookahead[].lookahead */ int nLookahead; /* Used slots in aLookahead[] */ int nLookaheadAlloc; /* Slots allocated in aLookahead[] */ int nterminal; /* Number of terminal symbols */ int nsymbol; /* total number of symbols */ }; /* Return the number of entries in the yy_action table */ #define acttab_lookahead_size(X) ((X)->nAction) /* The value for the N-th entry in yy_action */ #define acttab_yyaction(X,N) ((X)->aAction[N].action) /* The value for the N-th entry in yy_lookahead */ #define acttab_yylookahead(X,N) ((X)->aAction[N].lookahead) /* Free all memory associated with the given acttab */ void acttab_free(acttab *p){ lemon_free( p->aAction ); lemon_free( p->aLookahead ); lemon_free( p ); } /* Allocate a new acttab structure */ acttab *acttab_alloc(int nsymbol, int nterminal){ acttab *p = (acttab *) lemon_calloc( 1, sizeof(*p) ); if( p==0 ){ fprintf(stderr,"Unable to allocate memory for a new acttab."); exit(1); } memset(p, 0, sizeof(*p)); p->nsymbol = nsymbol; p->nterminal = nterminal; return p; } /* Add a new action to the current transaction set. ** ** This routine is called once for each lookahead for a particular ** state. */ void acttab_action(acttab *p, int lookahead, int action){ if( p->nLookahead>=p->nLookaheadAlloc ){ p->nLookaheadAlloc += 25; p->aLookahead = (struct lookahead_action *) lemon_realloc( p->aLookahead, sizeof(p->aLookahead[0])*p->nLookaheadAlloc ); if( p->aLookahead==0 ){ fprintf(stderr,"malloc failed\n"); exit(1); } } if( p->nLookahead==0 ){ p->mxLookahead = lookahead; p->mnLookahead = lookahead; p->mnAction = action; }else{ if( p->mxLookaheadmxLookahead = lookahead; if( p->mnLookahead>lookahead ){ p->mnLookahead = lookahead; p->mnAction = action; } } p->aLookahead[p->nLookahead].lookahead = lookahead; p->aLookahead[p->nLookahead].action = action; p->nLookahead++; } /* ** Add the transaction set built up with prior calls to acttab_action() ** into the current action table. Then reset the transaction set back ** to an empty set in preparation for a new round of acttab_action() calls. ** ** Return the offset into the action table of the new transaction. ** ** If the makeItSafe parameter is true, then the offset is chosen so that ** it is impossible to overread the yy_lookaside[] table regardless of ** the lookaside token. This is done for the terminal symbols, as they ** come from external inputs and can contain syntax errors. When makeItSafe ** is false, there is more flexibility in selecting offsets, resulting in ** a smaller table. For non-terminal symbols, which are never syntax errors, ** makeItSafe can be false. */ int acttab_insert(acttab *p, int makeItSafe){ int i, j, k, n, end; assert( p->nLookahead>0 ); /* Make sure we have enough space to hold the expanded action table ** in the worst case. The worst case occurs if the transaction set ** must be appended to the current action table */ n = p->nsymbol + 1; if( p->nAction + n >= p->nActionAlloc ){ int oldAlloc = p->nActionAlloc; p->nActionAlloc = p->nAction + n + p->nActionAlloc + 20; p->aAction = (struct lookahead_action *) lemon_realloc( p->aAction, sizeof(p->aAction[0])*p->nActionAlloc); if( p->aAction==0 ){ fprintf(stderr,"malloc failed\n"); exit(1); } for(i=oldAlloc; inActionAlloc; i++){ p->aAction[i].lookahead = -1; p->aAction[i].action = -1; } } /* Scan the existing action table looking for an offset that is a ** duplicate of the current transaction set. Fall out of the loop ** if and when the duplicate is found. ** ** i is the index in p->aAction[] where p->mnLookahead is inserted. */ end = makeItSafe ? p->mnLookahead : 0; for(i=p->nAction-1; i>=end; i--){ if( p->aAction[i].lookahead==p->mnLookahead ){ /* All lookaheads and actions in the aLookahead[] transaction ** must match against the candidate aAction[i] entry. */ if( p->aAction[i].action!=p->mnAction ) continue; for(j=0; jnLookahead; j++){ k = p->aLookahead[j].lookahead - p->mnLookahead + i; if( k<0 || k>=p->nAction ) break; if( p->aLookahead[j].lookahead!=p->aAction[k].lookahead ) break; if( p->aLookahead[j].action!=p->aAction[k].action ) break; } if( jnLookahead ) continue; /* No possible lookahead value that is not in the aLookahead[] ** transaction is allowed to match aAction[i] */ n = 0; for(j=0; jnAction; j++){ if( p->aAction[j].lookahead<0 ) continue; if( p->aAction[j].lookahead==j+p->mnLookahead-i ) n++; } if( n==p->nLookahead ){ break; /* An exact match is found at offset i */ } } } /* If no existing offsets exactly match the current transaction, find an ** an empty offset in the aAction[] table in which we can add the ** aLookahead[] transaction. */ if( inAction, which means the ** transaction will be appended. */ i = makeItSafe ? p->mnLookahead : 0; for(; inActionAlloc - p->mxLookahead; i++){ if( p->aAction[i].lookahead<0 ){ for(j=0; jnLookahead; j++){ k = p->aLookahead[j].lookahead - p->mnLookahead + i; if( k<0 ) break; if( p->aAction[k].lookahead>=0 ) break; } if( jnLookahead ) continue; for(j=0; jnAction; j++){ if( p->aAction[j].lookahead==j+p->mnLookahead-i ) break; } if( j==p->nAction ){ break; /* Fits in empty slots */ } } } } /* Insert transaction set at index i. */ #if 0 printf("Acttab:"); for(j=0; jnLookahead; j++){ printf(" %d", p->aLookahead[j].lookahead); } printf(" inserted at %d\n", i); #endif for(j=0; jnLookahead; j++){ k = p->aLookahead[j].lookahead - p->mnLookahead + i; p->aAction[k] = p->aLookahead[j]; if( k>=p->nAction ) p->nAction = k+1; } if( makeItSafe && i+p->nterminal>=p->nAction ) p->nAction = i+p->nterminal+1; p->nLookahead = 0; /* Return the offset that is added to the lookahead in order to get the ** index into yy_action of the action */ return i - p->mnLookahead; } /* ** Return the size of the action table without the trailing syntax error ** entries. */ int acttab_action_size(acttab *p){ int n = p->nAction; while( n>0 && p->aAction[n-1].lookahead<0 ){ n--; } return n; } /********************** From the file "build.c" *****************************/ /* ** Routines to construction the finite state machine for the LEMON ** parser generator. */ /* Find a precedence symbol of every rule in the grammar. ** ** Those rules which have a precedence symbol coded in the input ** grammar using the "[symbol]" construct will already have the ** rp->precsym field filled. Other rules take as their precedence ** symbol the first RHS symbol with a defined precedence. If there ** are not RHS symbols with a defined precedence, the precedence ** symbol field is left blank. */ void FindRulePrecedences(struct lemon *xp) { struct rule *rp; for(rp=xp->rule; rp; rp=rp->next){ if( rp->precsym==0 ){ int i, j; for(i=0; inrhs && rp->precsym==0; i++){ struct symbol *sp = rp->rhs[i]; if( sp->type==MULTITERMINAL ){ for(j=0; jnsubsym; j++){ if( sp->subsym[j]->prec>=0 ){ rp->precsym = sp->subsym[j]; break; } } }else if( sp->prec>=0 ){ rp->precsym = rp->rhs[i]; } } } } return; } /* Find all nonterminals which will generate the empty string. ** Then go back and compute the first sets of every nonterminal. ** The first set is the set of all terminal symbols which can begin ** a string generated by that nonterminal. */ void FindFirstSets(struct lemon *lemp) { int i, j; struct rule *rp; int progress; for(i=0; insymbol; i++){ lemp->symbols[i]->lambda = LEMON_FALSE; } for(i=lemp->nterminal; insymbol; i++){ lemp->symbols[i]->firstset = SetNew(); } /* First compute all lambdas */ do{ progress = 0; for(rp=lemp->rule; rp; rp=rp->next){ if( rp->lhs->lambda ) continue; for(i=0; inrhs; i++){ struct symbol *sp = rp->rhs[i]; assert( sp->type==NONTERMINAL || sp->lambda==LEMON_FALSE ); if( sp->lambda==LEMON_FALSE ) break; } if( i==rp->nrhs ){ rp->lhs->lambda = LEMON_TRUE; progress = 1; } } }while( progress ); /* Now compute all first sets */ do{ struct symbol *s1, *s2; progress = 0; for(rp=lemp->rule; rp; rp=rp->next){ s1 = rp->lhs; for(i=0; inrhs; i++){ s2 = rp->rhs[i]; if( s2->type==TERMINAL ){ progress += SetAdd(s1->firstset,s2->index); break; }else if( s2->type==MULTITERMINAL ){ for(j=0; jnsubsym; j++){ progress += SetAdd(s1->firstset,s2->subsym[j]->index); } break; }else if( s1==s2 ){ if( s1->lambda==LEMON_FALSE ) break; }else{ progress += SetUnion(s1->firstset,s2->firstset); if( s2->lambda==LEMON_FALSE ) break; } } } }while( progress ); return; } /* Compute all LR(0) states for the grammar. Links ** are added to between some states so that the LR(1) follow sets ** can be computed later. */ PRIVATE struct state *getstate(struct lemon *); /* forward reference */ void FindStates(struct lemon *lemp) { struct symbol *sp; struct rule *rp; Configlist_init(); /* Find the start symbol */ if( lemp->start ){ sp = Symbol_find(lemp->start); if( sp==0 ){ ErrorMsg(lemp->filename,0, "The specified start symbol \"%s\" is not " "in a nonterminal of the grammar. \"%s\" will be used as the start " "symbol instead.",lemp->start,lemp->startRule->lhs->name); lemp->errorcnt++; sp = lemp->startRule->lhs; } }else if( lemp->startRule ){ sp = lemp->startRule->lhs; }else{ ErrorMsg(lemp->filename,0,"Internal error - no start rule\n"); exit(1); } /* Make sure the start symbol doesn't occur on the right-hand side of ** any rule. Report an error if it does. (YACC would generate a new ** start symbol in this case.) */ for(rp=lemp->rule; rp; rp=rp->next){ int i; for(i=0; inrhs; i++){ if( rp->rhs[i]==sp ){ /* FIX ME: Deal with multiterminals */ ErrorMsg(lemp->filename,0, "The start symbol \"%s\" occurs on the " "right-hand side of a rule. This will result in a parser which " "does not work properly.",sp->name); lemp->errorcnt++; } } } /* The basis configuration set for the first state ** is all rules which have the start symbol as their ** left-hand side */ for(rp=sp->rule; rp; rp=rp->nextlhs){ struct config *newcfp; rp->lhsStart = 1; newcfp = Configlist_addbasis(rp,0); SetAdd(newcfp->fws,0); } /* Compute the first state. All other states will be ** computed automatically during the computation of the first one. ** The returned pointer to the first state is not used. */ (void)getstate(lemp); return; } /* Return a pointer to a state which is described by the configuration ** list which has been built from calls to Configlist_add. */ PRIVATE void buildshifts(struct lemon *, struct state *); /* Forwd ref */ PRIVATE struct state *getstate(struct lemon *lemp) { struct config *cfp, *bp; struct state *stp; /* Extract the sorted basis of the new state. The basis was constructed ** by prior calls to "Configlist_addbasis()". */ Configlist_sortbasis(); bp = Configlist_basis(); /* Get a state with the same basis */ stp = State_find(bp); if( stp ){ /* A state with the same basis already exists! Copy all the follow-set ** propagation links from the state under construction into the ** preexisting state, then return a pointer to the preexisting state */ struct config *x, *y; for(x=bp, y=stp->bp; x && y; x=x->bp, y=y->bp){ Plink_copy(&y->bplp,x->bplp); Plink_delete(x->fplp); x->fplp = x->bplp = 0; } cfp = Configlist_return(); Configlist_eat(cfp); }else{ /* This really is a new state. Construct all the details */ Configlist_closure(lemp); /* Compute the configuration closure */ Configlist_sort(); /* Sort the configuration closure */ cfp = Configlist_return(); /* Get a pointer to the config list */ stp = State_new(); /* A new state structure */ MemoryCheck(stp); stp->bp = bp; /* Remember the configuration basis */ stp->cfp = cfp; /* Remember the configuration closure */ stp->statenum = lemp->nstate++; /* Every state gets a sequence number */ stp->ap = 0; /* No actions, yet. */ State_insert(stp,stp->bp); /* Add to the state table */ buildshifts(lemp,stp); /* Recursively compute successor states */ } return stp; } /* ** Return true if two symbols are the same. */ int same_symbol(struct symbol *a, struct symbol *b) { int i; if( a==b ) return 1; if( a->type!=MULTITERMINAL ) return 0; if( b->type!=MULTITERMINAL ) return 0; if( a->nsubsym!=b->nsubsym ) return 0; for(i=0; insubsym; i++){ if( a->subsym[i]!=b->subsym[i] ) return 0; } return 1; } /* Construct all successor states to the given state. A "successor" ** state is any state which can be reached by a shift action. */ PRIVATE void buildshifts(struct lemon *lemp, struct state *stp) { struct config *cfp; /* For looping thru the config closure of "stp" */ struct config *bcfp; /* For the inner loop on config closure of "stp" */ struct config *newcfg; /* */ struct symbol *sp; /* Symbol following the dot in configuration "cfp" */ struct symbol *bsp; /* Symbol following the dot in configuration "bcfp" */ struct state *newstp; /* A pointer to a successor state */ /* Each configuration becomes complete after it contributes to a successor ** state. Initially, all configurations are incomplete */ for(cfp=stp->cfp; cfp; cfp=cfp->next) cfp->status = INCOMPLETE; /* Loop through all configurations of the state "stp" */ for(cfp=stp->cfp; cfp; cfp=cfp->next){ if( cfp->status==COMPLETE ) continue; /* Already used by inner loop */ if( cfp->dot>=cfp->rp->nrhs ) continue; /* Can't shift this config */ Configlist_reset(); /* Reset the new config set */ sp = cfp->rp->rhs[cfp->dot]; /* Symbol after the dot */ /* For every configuration in the state "stp" which has the symbol "sp" ** following its dot, add the same configuration to the basis set under ** construction but with the dot shifted one symbol to the right. */ for(bcfp=cfp; bcfp; bcfp=bcfp->next){ if( bcfp->status==COMPLETE ) continue; /* Already used */ if( bcfp->dot>=bcfp->rp->nrhs ) continue; /* Can't shift this one */ bsp = bcfp->rp->rhs[bcfp->dot]; /* Get symbol after dot */ if( !same_symbol(bsp,sp) ) continue; /* Must be same as for "cfp" */ bcfp->status = COMPLETE; /* Mark this config as used */ newcfg = Configlist_addbasis(bcfp->rp,bcfp->dot+1); Plink_add(&newcfg->bplp,bcfp); } /* Get a pointer to the state described by the basis configuration set ** constructed in the preceding loop */ newstp = getstate(lemp); /* The state "newstp" is reached from the state "stp" by a shift action ** on the symbol "sp" */ if( sp->type==MULTITERMINAL ){ int i; for(i=0; insubsym; i++){ Action_add(&stp->ap,SHIFT,sp->subsym[i],(char*)newstp); } }else{ Action_add(&stp->ap,SHIFT,sp,(char *)newstp); } } } /* ** Construct the propagation links */ void FindLinks(struct lemon *lemp) { int i; struct config *cfp, *other; struct state *stp; struct plink *plp; /* Housekeeping detail: ** Add to every propagate link a pointer back to the state to ** which the link is attached. */ for(i=0; instate; i++){ stp = lemp->sorted[i]; for(cfp=stp?stp->cfp:0; cfp; cfp=cfp->next){ cfp->stp = stp; } } /* Convert all backlinks into forward links. Only the forward ** links are used in the follow-set computation. */ for(i=0; instate; i++){ stp = lemp->sorted[i]; for(cfp=stp?stp->cfp:0; cfp; cfp=cfp->next){ for(plp=cfp->bplp; plp; plp=plp->next){ other = plp->cfp; Plink_add(&other->fplp,cfp); } } } } /* Compute all followsets. ** ** A followset is the set of all symbols which can come immediately ** after a configuration. */ void FindFollowSets(struct lemon *lemp) { int i; struct config *cfp; struct plink *plp; int progress; int change; for(i=0; instate; i++){ assert( lemp->sorted[i]!=0 ); for(cfp=lemp->sorted[i]->cfp; cfp; cfp=cfp->next){ cfp->status = INCOMPLETE; } } do{ progress = 0; for(i=0; instate; i++){ assert( lemp->sorted[i]!=0 ); for(cfp=lemp->sorted[i]->cfp; cfp; cfp=cfp->next){ if( cfp->status==COMPLETE ) continue; for(plp=cfp->fplp; plp; plp=plp->next){ change = SetUnion(plp->cfp->fws,cfp->fws); if( change ){ plp->cfp->status = INCOMPLETE; progress = 1; } } cfp->status = COMPLETE; } } }while( progress ); } static int resolve_conflict(struct action *,struct action *); /* Compute the reduce actions, and resolve conflicts. */ void FindActions(struct lemon *lemp) { int i,j; struct config *cfp; struct state *stp; struct symbol *sp; struct rule *rp; /* Add all of the reduce actions ** A reduce action is added for each element of the followset of ** a configuration which has its dot at the extreme right. */ for(i=0; instate; i++){ /* Loop over all states */ stp = lemp->sorted[i]; for(cfp=stp->cfp; cfp; cfp=cfp->next){ /* Loop over all configurations */ if( cfp->rp->nrhs==cfp->dot ){ /* Is dot at extreme right? */ for(j=0; jnterminal; j++){ if( SetFind(cfp->fws,j) ){ /* Add a reduce action to the state "stp" which will reduce by the ** rule "cfp->rp" if the lookahead symbol is "lemp->symbols[j]" */ Action_add(&stp->ap,REDUCE,lemp->symbols[j],(char *)cfp->rp); } } } } } /* Add the accepting token */ if( lemp->start ){ sp = Symbol_find(lemp->start); if( sp==0 ){ if( lemp->startRule==0 ){ fprintf(stderr, "internal error on source line %d: no start rule\n", __LINE__); exit(1); } sp = lemp->startRule->lhs; } }else{ sp = lemp->startRule->lhs; } /* Add to the first state (which is always the starting state of the ** finite state machine) an action to ACCEPT if the lookahead is the ** start nonterminal. */ Action_add(&lemp->sorted[0]->ap,ACCEPT,sp,0); /* Resolve conflicts */ for(i=0; instate; i++){ struct action *ap, *nap; stp = lemp->sorted[i]; /* assert( stp->ap ); */ stp->ap = Action_sort(stp->ap); for(ap=stp->ap; ap && ap->next; ap=ap->next){ for(nap=ap->next; nap && nap->sp==ap->sp; nap=nap->next){ /* The two actions "ap" and "nap" have the same lookahead. ** Figure out which one should be used */ lemp->nconflict += resolve_conflict(ap,nap); } } } /* Report an error for each rule that can never be reduced. */ for(rp=lemp->rule; rp; rp=rp->next) rp->canReduce = LEMON_FALSE; for(i=0; instate; i++){ struct action *ap; for(ap=lemp->sorted[i]->ap; ap; ap=ap->next){ if( ap->type==REDUCE ) ap->x.rp->canReduce = LEMON_TRUE; } } for(rp=lemp->rule; rp; rp=rp->next){ if( rp->canReduce ) continue; ErrorMsg(lemp->filename,rp->ruleline,"This rule can not be reduced.\n"); lemp->errorcnt++; } } /* Resolve a conflict between the two given actions. If the ** conflict can't be resolved, return non-zero. ** ** NO LONGER TRUE: ** To resolve a conflict, first look to see if either action ** is on an error rule. In that case, take the action which ** is not associated with the error rule. If neither or both ** actions are associated with an error rule, then try to ** use precedence to resolve the conflict. ** ** If either action is a SHIFT, then it must be apx. This ** function won't work if apx->type==REDUCE and apy->type==SHIFT. */ static int resolve_conflict( struct action *apx, struct action *apy ){ struct symbol *spx, *spy; int errcnt = 0; assert( apx->sp==apy->sp ); /* Otherwise there would be no conflict */ if( apx->type==SHIFT && apy->type==SHIFT ){ apy->type = SSCONFLICT; errcnt++; } if( apx->type==SHIFT && apy->type==REDUCE ){ spx = apx->sp; spy = apy->x.rp->precsym; if( spy==0 || spx->prec<0 || spy->prec<0 ){ /* Not enough precedence information. */ apy->type = SRCONFLICT; errcnt++; }else if( spx->prec>spy->prec ){ /* higher precedence wins */ apy->type = RD_RESOLVED; }else if( spx->precprec ){ apx->type = SH_RESOLVED; }else if( spx->prec==spy->prec && spx->assoc==RIGHT ){ /* Use operator */ apy->type = RD_RESOLVED; /* associativity */ }else if( spx->prec==spy->prec && spx->assoc==LEFT ){ /* to break tie */ apx->type = SH_RESOLVED; }else{ assert( spx->prec==spy->prec && spx->assoc==NONE ); apx->type = ERROR; } }else if( apx->type==REDUCE && apy->type==REDUCE ){ spx = apx->x.rp->precsym; spy = apy->x.rp->precsym; if( spx==0 || spy==0 || spx->prec<0 || spy->prec<0 || spx->prec==spy->prec ){ apy->type = RRCONFLICT; errcnt++; }else if( spx->prec>spy->prec ){ apy->type = RD_RESOLVED; }else if( spx->precprec ){ apx->type = RD_RESOLVED; } }else{ assert( apx->type==SH_RESOLVED || apx->type==RD_RESOLVED || apx->type==SSCONFLICT || apx->type==SRCONFLICT || apx->type==RRCONFLICT || apy->type==SH_RESOLVED || apy->type==RD_RESOLVED || apy->type==SSCONFLICT || apy->type==SRCONFLICT || apy->type==RRCONFLICT ); /* The REDUCE/SHIFT case cannot happen because SHIFTs come before ** REDUCEs on the list. If we reach this point it must be because ** the parser conflict had already been resolved. */ } return errcnt; } /********************* From the file "configlist.c" *************************/ /* ** Routines to processing a configuration list and building a state ** in the LEMON parser generator. */ static struct config *freelist = 0; /* List of free configurations */ static struct config *current = 0; /* Top of list of configurations */ static struct config **currentend = 0; /* Last on list of configs */ static struct config *basis = 0; /* Top of list of basis configs */ static struct config **basisend = 0; /* End of list of basis configs */ /* Return a pointer to a new configuration */ PRIVATE struct config *newconfig(void){ return (struct config*)lemon_calloc(1, sizeof(struct config)); } /* The configuration "old" is no longer used */ PRIVATE void deleteconfig(struct config *old) { old->next = freelist; freelist = old; } /* Initialized the configuration list builder */ void Configlist_init(void){ current = 0; currentend = ¤t; basis = 0; basisend = &basis; Configtable_init(); return; } /* Initialized the configuration list builder */ void Configlist_reset(void){ current = 0; currentend = ¤t; basis = 0; basisend = &basis; Configtable_clear(0); return; } /* Add another configuration to the configuration list */ struct config *Configlist_add( struct rule *rp, /* The rule */ int dot /* Index into the RHS of the rule where the dot goes */ ){ struct config *cfp, model; assert( currentend!=0 ); model.rp = rp; model.dot = dot; cfp = Configtable_find(&model); if( cfp==0 ){ cfp = newconfig(); cfp->rp = rp; cfp->dot = dot; cfp->fws = SetNew(); cfp->stp = 0; cfp->fplp = cfp->bplp = 0; cfp->next = 0; cfp->bp = 0; *currentend = cfp; currentend = &cfp->next; Configtable_insert(cfp); } return cfp; } /* Add a basis configuration to the configuration list */ struct config *Configlist_addbasis(struct rule *rp, int dot) { struct config *cfp, model; assert( basisend!=0 ); assert( currentend!=0 ); model.rp = rp; model.dot = dot; cfp = Configtable_find(&model); if( cfp==0 ){ cfp = newconfig(); cfp->rp = rp; cfp->dot = dot; cfp->fws = SetNew(); cfp->stp = 0; cfp->fplp = cfp->bplp = 0; cfp->next = 0; cfp->bp = 0; *currentend = cfp; currentend = &cfp->next; *basisend = cfp; basisend = &cfp->bp; Configtable_insert(cfp); } return cfp; } /* Compute the closure of the configuration list */ void Configlist_closure(struct lemon *lemp) { struct config *cfp, *newcfp; struct rule *rp, *newrp; struct symbol *sp, *xsp; int i, dot; assert( currentend!=0 ); for(cfp=current; cfp; cfp=cfp->next){ rp = cfp->rp; dot = cfp->dot; if( dot>=rp->nrhs ) continue; sp = rp->rhs[dot]; if( sp->type==NONTERMINAL ){ if( sp->rule==0 && sp!=lemp->errsym ){ ErrorMsg(lemp->filename,rp->line,"Nonterminal \"%s\" has no rules.", sp->name); lemp->errorcnt++; } for(newrp=sp->rule; newrp; newrp=newrp->nextlhs){ newcfp = Configlist_add(newrp,0); for(i=dot+1; inrhs; i++){ xsp = rp->rhs[i]; if( xsp->type==TERMINAL ){ SetAdd(newcfp->fws,xsp->index); break; }else if( xsp->type==MULTITERMINAL ){ int k; for(k=0; knsubsym; k++){ SetAdd(newcfp->fws, xsp->subsym[k]->index); } break; }else{ SetUnion(newcfp->fws,xsp->firstset); if( xsp->lambda==LEMON_FALSE ) break; } } if( i==rp->nrhs ) Plink_add(&cfp->fplp,newcfp); } } } return; } /* Sort the configuration list */ void Configlist_sort(void){ current = (struct config*)msort((char*)current,(char**)&(current->next), Configcmp); currentend = 0; return; } /* Sort the basis configuration list */ void Configlist_sortbasis(void){ basis = (struct config*)msort((char*)current,(char**)&(current->bp), Configcmp); basisend = 0; return; } /* Return a pointer to the head of the configuration list and ** reset the list */ struct config *Configlist_return(void){ struct config *old; old = current; current = 0; currentend = 0; return old; } /* Return a pointer to the head of the configuration list and ** reset the list */ struct config *Configlist_basis(void){ struct config *old; old = basis; basis = 0; basisend = 0; return old; } /* Free all elements of the given configuration list */ void Configlist_eat(struct config *cfp) { struct config *nextcfp; for(; cfp; cfp=nextcfp){ nextcfp = cfp->next; assert( cfp->fplp==0 ); assert( cfp->bplp==0 ); if( cfp->fws ) SetFree(cfp->fws); deleteconfig(cfp); } return; } /***************** From the file "error.c" *********************************/ /* ** Code for printing error message. */ void ErrorMsg(const char *filename, int lineno, const char *format, ...){ va_list ap; fprintf(stderr, "%s:%d: ", filename, lineno); va_start(ap, format); vfprintf(stderr,format,ap); va_end(ap); fprintf(stderr, "\n"); } /**************** From the file "main.c" ************************************/ /* ** Main program file for the LEMON parser generator. */ /* Report an out-of-memory condition and abort. This function ** is used mostly by the "MemoryCheck" macro in struct.h */ void memory_error(void){ fprintf(stderr,"Out of memory. Aborting...\n"); exit(1); } static int nDefine = 0; /* Number of -D options on the command line */ static int nDefineUsed = 0; /* Number of -D options actually used */ static char **azDefine = 0; /* Name of the -D macros */ static char *bDefineUsed = 0; /* True for every -D macro actually used */ /* This routine is called with the argument to each -D command-line option. ** Add the macro defined to the azDefine array. */ static void handle_D_option(char *z){ char **paz; nDefine++; azDefine = (char **) lemon_realloc(azDefine, sizeof(azDefine[0])*nDefine); if( azDefine==0 ){ fprintf(stderr,"out of memory\n"); exit(1); } bDefineUsed = (char*)lemon_realloc(bDefineUsed, nDefine); if( bDefineUsed==0 ){ fprintf(stderr,"out of memory\n"); exit(1); } bDefineUsed[nDefine-1] = 0; paz = &azDefine[nDefine-1]; *paz = (char *) lemon_malloc( lemonStrlen(z)+1 ); if( *paz==0 ){ fprintf(stderr,"out of memory\n"); exit(1); } lemon_strcpy(*paz, z); for(z=*paz; *z && *z!='='; z++){} *z = 0; } /* Rember the name of the output directory */ static char *outputDir = NULL; static void handle_d_option(char *z){ outputDir = (char *) lemon_malloc( lemonStrlen(z)+1 ); if( outputDir==0 ){ fprintf(stderr,"out of memory\n"); exit(1); } lemon_strcpy(outputDir, z); } static char *user_templatename = NULL; static void handle_T_option(char *z){ user_templatename = (char *) lemon_malloc( lemonStrlen(z)+1 ); if( user_templatename==0 ){ memory_error(); } lemon_strcpy(user_templatename, z); } /* Merge together to lists of rules ordered by rule.iRule */ static struct rule *Rule_merge(struct rule *pA, struct rule *pB){ struct rule *pFirst = 0; struct rule **ppPrev = &pFirst; while( pA && pB ){ if( pA->iRuleiRule ){ *ppPrev = pA; ppPrev = &pA->next; pA = pA->next; }else{ *ppPrev = pB; ppPrev = &pB->next; pB = pB->next; } } if( pA ){ *ppPrev = pA; }else{ *ppPrev = pB; } return pFirst; } /* ** Sort a list of rules in order of increasing iRule value */ static struct rule *Rule_sort(struct rule *rp){ unsigned int i; struct rule *pNext; struct rule *x[32]; memset(x, 0, sizeof(x)); while( rp ){ pNext = rp->next; rp->next = 0; for(i=0; iindex = i; qsort(lem.symbols,lem.nsymbol,sizeof(struct symbol*), Symbolcmpp); for(i=0; iindex = i; while( lem.symbols[i-1]->type==MULTITERMINAL ){ i--; } assert( strcmp(lem.symbols[i-1]->name,"{default}")==0 ); lem.nsymbol = i - 1; for(i=1; ISUPPER(lem.symbols[i]->name[0]); i++); lem.nterminal = i; /* Assign sequential rule numbers. Start with 0. Put rules that have no ** reduce action C-code associated with them last, so that the switch() ** statement that selects reduction actions will have a smaller jump table. */ for(i=0, rp=lem.rule; rp; rp=rp->next){ rp->iRule = rp->code ? i++ : -1; } lem.nruleWithAction = i; for(rp=lem.rule; rp; rp=rp->next){ if( rp->iRule<0 ) rp->iRule = i++; } lem.startRule = lem.rule; lem.rule = Rule_sort(lem.rule); /* Generate a reprint of the grammar, if requested on the command line */ if( rpflag ){ Reprint(&lem); }else{ /* Initialize the size for all follow and first sets */ SetSize(lem.nterminal+1); /* Find the precedence for every production rule (that has one) */ FindRulePrecedences(&lem); /* Compute the lambda-nonterminals and the first-sets for every ** nonterminal */ FindFirstSets(&lem); /* Compute all LR(0) states. Also record follow-set propagation ** links so that the follow-set can be computed later */ lem.nstate = 0; FindStates(&lem); lem.sorted = State_arrayof(); /* Tie up loose ends on the propagation links */ FindLinks(&lem); /* Compute the follow set of every reducible configuration */ FindFollowSets(&lem); /* Compute the action tables */ FindActions(&lem); /* Compress the action tables */ if( compress==0 ) CompressTables(&lem); /* Reorder and renumber the states so that states with fewer choices ** occur at the end. This is an optimization that helps make the ** generated parser tables smaller. */ if( noResort==0 ) ResortStates(&lem); /* Generate a report of the parser generated. (the "y.output" file) */ if( !quiet ) ReportOutput(&lem); /* Generate the source code for the parser */ ReportTable(&lem, mhflag, sqlFlag); /* Produce a header file for use by the scanner. (This step is ** omitted if the "-m" option is used because makeheaders will ** generate the file for us.) */ if( !mhflag ) ReportHeader(&lem); } if( statistics ){ printf("Parser statistics:\n"); stats_line("terminal symbols", lem.nterminal); stats_line("non-terminal symbols", lem.nsymbol - lem.nterminal); stats_line("total symbols", lem.nsymbol); stats_line("rules", lem.nrule); stats_line("states", lem.nxstate); stats_line("conflicts", lem.nconflict); stats_line("action table entries", lem.nactiontab); stats_line("lookahead table entries", lem.nlookaheadtab); stats_line("total table size (bytes)", lem.tablesize); } if( lem.nconflict > 0 ){ fprintf(stderr,"%d parsing conflicts.\n",lem.nconflict); } /* return 0 on success, 1 on failure. */ exitcode = ((lem.errorcnt > 0) || (lem.nconflict > 0)) ? 1 : 0; lemon_free_all(); exit(exitcode); return (exitcode); } /******************** From the file "msort.c" *******************************/ /* ** A generic merge-sort program. ** ** USAGE: ** Let "ptr" be a pointer to some structure which is at the head of ** a null-terminated list. Then to sort the list call: ** ** ptr = msort(ptr,&(ptr->next),cmpfnc); ** ** In the above, "cmpfnc" is a pointer to a function which compares ** two instances of the structure and returns an integer, as in ** strcmp. The second argument is a pointer to the pointer to the ** second element of the linked list. This address is used to compute ** the offset to the "next" field within the structure. The offset to ** the "next" field must be constant for all structures in the list. ** ** The function returns a new pointer which is the head of the list ** after sorting. ** ** ALGORITHM: ** Merge-sort. */ /* ** Return a pointer to the next structure in the linked list. */ #define NEXT(A) (*(char**)(((char*)A)+offset)) /* ** Inputs: ** a: A sorted, null-terminated linked list. (May be null). ** b: A sorted, null-terminated linked list. (May be null). ** cmp: A pointer to the comparison function. ** offset: Offset in the structure to the "next" field. ** ** Return Value: ** A pointer to the head of a sorted list containing the elements ** of both a and b. ** ** Side effects: ** The "next" pointers for elements in the lists a and b are ** changed. */ static char *merge( char *a, char *b, int (*cmp)(const char*,const char*), int offset ){ char *ptr, *head; if( a==0 ){ head = b; }else if( b==0 ){ head = a; }else{ if( (*cmp)(a,b)<=0 ){ ptr = a; a = NEXT(a); }else{ ptr = b; b = NEXT(b); } head = ptr; while( a && b ){ if( (*cmp)(a,b)<=0 ){ NEXT(ptr) = a; ptr = a; a = NEXT(a); }else{ NEXT(ptr) = b; ptr = b; b = NEXT(b); } } if( a ) NEXT(ptr) = a; else NEXT(ptr) = b; } return head; } /* ** Inputs: ** list: Pointer to a singly-linked list of structures. ** next: Pointer to pointer to the second element of the list. ** cmp: A comparison function. ** ** Return Value: ** A pointer to the head of a sorted list containing the elements ** originally in list. ** ** Side effects: ** The "next" pointers for elements in list are changed. */ #define LISTSIZE 30 static char *msort( char *list, char **next, int (*cmp)(const char*,const char*) ){ unsigned long offset; char *ep; char *set[LISTSIZE]; int i; offset = (unsigned long)((char*)next - (char*)list); for(i=0; istate = WAITING_FOR_DECL_KEYWORD; }else if( ISLOWER(x[0]) ){ psp->lhs = Symbol_new(x); psp->nrhs = 0; psp->lhsalias = 0; psp->state = WAITING_FOR_ARROW; }else if( x[0]=='{' ){ if( psp->prevrule==0 ){ ErrorMsg(psp->filename,psp->tokenlineno, "There is no prior rule upon which to attach the code " "fragment which begins on this line."); psp->errorcnt++; }else if( psp->prevrule->code!=0 ){ ErrorMsg(psp->filename,psp->tokenlineno, "Code fragment beginning on this line is not the first " "to follow the previous rule."); psp->errorcnt++; }else if( strcmp(x, "{NEVER-REDUCE")==0 ){ psp->prevrule->neverReduce = 1; }else{ psp->prevrule->line = psp->tokenlineno; psp->prevrule->code = &x[1]; psp->prevrule->noCode = 0; } }else if( x[0]=='[' ){ psp->state = PRECEDENCE_MARK_1; }else{ ErrorMsg(psp->filename,psp->tokenlineno, "Token \"%s\" should be either \"%%\" or a nonterminal name.", x); psp->errorcnt++; } break; case PRECEDENCE_MARK_1: if( !ISUPPER(x[0]) ){ ErrorMsg(psp->filename,psp->tokenlineno, "The precedence symbol must be a terminal."); psp->errorcnt++; }else if( psp->prevrule==0 ){ ErrorMsg(psp->filename,psp->tokenlineno, "There is no prior rule to assign precedence \"[%s]\".",x); psp->errorcnt++; }else if( psp->prevrule->precsym!=0 ){ ErrorMsg(psp->filename,psp->tokenlineno, "Precedence mark on this line is not the first " "to follow the previous rule."); psp->errorcnt++; }else{ psp->prevrule->precsym = Symbol_new(x); } psp->state = PRECEDENCE_MARK_2; break; case PRECEDENCE_MARK_2: if( x[0]!=']' ){ ErrorMsg(psp->filename,psp->tokenlineno, "Missing \"]\" on precedence mark."); psp->errorcnt++; } psp->state = WAITING_FOR_DECL_OR_RULE; break; case WAITING_FOR_ARROW: if( x[0]==':' && x[1]==':' && x[2]=='=' ){ psp->state = IN_RHS; }else if( x[0]=='(' ){ psp->state = LHS_ALIAS_1; }else{ ErrorMsg(psp->filename,psp->tokenlineno, "Expected to see a \":\" following the LHS symbol \"%s\".", psp->lhs->name); psp->errorcnt++; psp->state = RESYNC_AFTER_RULE_ERROR; } break; case LHS_ALIAS_1: if( ISALPHA(x[0]) ){ psp->lhsalias = x; psp->state = LHS_ALIAS_2; }else{ ErrorMsg(psp->filename,psp->tokenlineno, "\"%s\" is not a valid alias for the LHS \"%s\"\n", x,psp->lhs->name); psp->errorcnt++; psp->state = RESYNC_AFTER_RULE_ERROR; } break; case LHS_ALIAS_2: if( x[0]==')' ){ psp->state = LHS_ALIAS_3; }else{ ErrorMsg(psp->filename,psp->tokenlineno, "Missing \")\" following LHS alias name \"%s\".",psp->lhsalias); psp->errorcnt++; psp->state = RESYNC_AFTER_RULE_ERROR; } break; case LHS_ALIAS_3: if( x[0]==':' && x[1]==':' && x[2]=='=' ){ psp->state = IN_RHS; }else{ ErrorMsg(psp->filename,psp->tokenlineno, "Missing \"->\" following: \"%s(%s)\".", psp->lhs->name,psp->lhsalias); psp->errorcnt++; psp->state = RESYNC_AFTER_RULE_ERROR; } break; case IN_RHS: if( x[0]=='.' ){ struct rule *rp; rp = (struct rule *)lemon_calloc( sizeof(struct rule) + sizeof(struct symbol*)*psp->nrhs + sizeof(char*)*psp->nrhs, 1); if( rp==0 ){ ErrorMsg(psp->filename,psp->tokenlineno, "Can't allocate enough memory for this rule."); psp->errorcnt++; psp->prevrule = 0; }else{ int i; rp->ruleline = psp->tokenlineno; rp->rhs = (struct symbol**)&rp[1]; rp->rhsalias = (const char**)&(rp->rhs[psp->nrhs]); for(i=0; inrhs; i++){ rp->rhs[i] = psp->rhs[i]; rp->rhsalias[i] = psp->alias[i]; if( rp->rhsalias[i]!=0 ){ rp->rhs[i]->bContent = 1; } } rp->lhs = psp->lhs; rp->lhsalias = psp->lhsalias; rp->nrhs = psp->nrhs; rp->code = 0; rp->noCode = 1; rp->precsym = 0; rp->index = psp->gp->nrule++; rp->nextlhs = rp->lhs->rule; rp->lhs->rule = rp; rp->next = 0; if( psp->firstrule==0 ){ psp->firstrule = psp->lastrule = rp; }else{ psp->lastrule->next = rp; psp->lastrule = rp; } psp->prevrule = rp; } psp->state = WAITING_FOR_DECL_OR_RULE; }else if( ISALPHA(x[0]) ){ if( psp->nrhs>=MAXRHS ){ ErrorMsg(psp->filename,psp->tokenlineno, "Too many symbols on RHS of rule beginning at \"%s\".", x); psp->errorcnt++; psp->state = RESYNC_AFTER_RULE_ERROR; }else{ psp->rhs[psp->nrhs] = Symbol_new(x); psp->alias[psp->nrhs] = 0; psp->nrhs++; } }else if( (x[0]=='|' || x[0]=='/') && psp->nrhs>0 && ISUPPER(x[1]) ){ struct symbol *msp = psp->rhs[psp->nrhs-1]; if( msp->type!=MULTITERMINAL ){ struct symbol *origsp = msp; msp = (struct symbol *) lemon_calloc(1,sizeof(*msp)); memset(msp, 0, sizeof(*msp)); msp->type = MULTITERMINAL; msp->nsubsym = 1; msp->subsym = (struct symbol**)lemon_calloc(1,sizeof(struct symbol*)); msp->subsym[0] = origsp; msp->name = origsp->name; psp->rhs[psp->nrhs-1] = msp; } msp->nsubsym++; msp->subsym = (struct symbol **) lemon_realloc(msp->subsym, sizeof(struct symbol*)*msp->nsubsym); msp->subsym[msp->nsubsym-1] = Symbol_new(&x[1]); if( ISLOWER(x[1]) || ISLOWER(msp->subsym[0]->name[0]) ){ ErrorMsg(psp->filename,psp->tokenlineno, "Cannot form a compound containing a non-terminal"); psp->errorcnt++; } }else if( x[0]=='(' && psp->nrhs>0 ){ psp->state = RHS_ALIAS_1; }else{ ErrorMsg(psp->filename,psp->tokenlineno, "Illegal character on RHS of rule: \"%s\".",x); psp->errorcnt++; psp->state = RESYNC_AFTER_RULE_ERROR; } break; case RHS_ALIAS_1: if( ISALPHA(x[0]) ){ psp->alias[psp->nrhs-1] = x; psp->state = RHS_ALIAS_2; }else{ ErrorMsg(psp->filename,psp->tokenlineno, "\"%s\" is not a valid alias for the RHS symbol \"%s\"\n", x,psp->rhs[psp->nrhs-1]->name); psp->errorcnt++; psp->state = RESYNC_AFTER_RULE_ERROR; } break; case RHS_ALIAS_2: if( x[0]==')' ){ psp->state = IN_RHS; }else{ ErrorMsg(psp->filename,psp->tokenlineno, "Missing \")\" following LHS alias name \"%s\".",psp->lhsalias); psp->errorcnt++; psp->state = RESYNC_AFTER_RULE_ERROR; } break; case WAITING_FOR_DECL_KEYWORD: if( ISALPHA(x[0]) ){ psp->declkeyword = x; psp->declargslot = 0; psp->decllinenoslot = 0; psp->insertLineMacro = 1; psp->state = WAITING_FOR_DECL_ARG; if( strcmp(x,"name")==0 ){ psp->declargslot = &(psp->gp->name); psp->insertLineMacro = 0; }else if( strcmp(x,"include")==0 ){ psp->declargslot = &(psp->gp->include); }else if( strcmp(x,"code")==0 ){ psp->declargslot = &(psp->gp->extracode); }else if( strcmp(x,"token_prefix")==0 ){ psp->declargslot = &psp->gp->tokenprefix; psp->insertLineMacro = 0; }else if( strcmp(x,"syntax_error")==0 ){ psp->declargslot = &(psp->gp->error); }else if( strcmp(x,"parse_accept")==0 ){ psp->declargslot = &(psp->gp->accept); }else if( strcmp(x,"parse_failure")==0 ){ psp->declargslot = &(psp->gp->failure); }else if( strcmp(x,"stack_overflow")==0 ){ psp->declargslot = &(psp->gp->overflow); }else if( strcmp(x,"extra_argument")==0 ){ psp->declargslot = &(psp->gp->arg); psp->insertLineMacro = 0; }else if( strcmp(x,"extra_context")==0 ){ psp->declargslot = &(psp->gp->ctx); psp->insertLineMacro = 0; }else if( strcmp(x,"token_type")==0 ){ psp->declargslot = &(psp->gp->tokentype); psp->insertLineMacro = 0; }else if( strcmp(x,"default_type")==0 ){ psp->declargslot = &(psp->gp->vartype); psp->insertLineMacro = 0; }else if( strcmp(x,"stack_size")==0 ){ psp->declargslot = &(psp->gp->stacksize); psp->insertLineMacro = 0; }else if( strcmp(x,"start_symbol")==0 ){ psp->declargslot = &(psp->gp->start); psp->insertLineMacro = 0; }else if( strcmp(x,"left")==0 ){ psp->preccounter++; psp->declassoc = LEFT; psp->state = WAITING_FOR_PRECEDENCE_SYMBOL; }else if( strcmp(x,"right")==0 ){ psp->preccounter++; psp->declassoc = RIGHT; psp->state = WAITING_FOR_PRECEDENCE_SYMBOL; }else if( strcmp(x,"nonassoc")==0 ){ psp->preccounter++; psp->declassoc = NONE; psp->state = WAITING_FOR_PRECEDENCE_SYMBOL; }else if( strcmp(x,"type")==0 ){ psp->state = WAITING_FOR_DATATYPE_SYMBOL; }else if( strcmp(x,"fallback")==0 ){ psp->fallback = 0; psp->state = WAITING_FOR_FALLBACK_ID; }else if( strcmp(x,"token")==0 ){ psp->state = WAITING_FOR_TOKEN_NAME; }else if( strcmp(x,"wildcard")==0 ){ psp->state = WAITING_FOR_WILDCARD_ID; }else if( strcmp(x,"token_class")==0 ){ psp->state = WAITING_FOR_CLASS_ID; }else{ ErrorMsg(psp->filename,psp->tokenlineno, "Unknown declaration keyword: \"%%%s\".",x); psp->errorcnt++; psp->state = RESYNC_AFTER_DECL_ERROR; } }else{ ErrorMsg(psp->filename,psp->tokenlineno, "Illegal declaration keyword: \"%s\".",x); psp->errorcnt++; psp->state = RESYNC_AFTER_DECL_ERROR; } break; case WAITING_FOR_DATATYPE_SYMBOL: if( !ISALPHA(x[0]) ){ ErrorMsg(psp->filename,psp->tokenlineno, "Symbol name missing after %%type keyword"); psp->errorcnt++; psp->state = RESYNC_AFTER_DECL_ERROR; }else{ struct symbol *sp = Symbol_find(x); if((sp) && (sp->datatype)){ ErrorMsg(psp->filename,psp->tokenlineno, "Symbol %%type \"%s\" already defined", x); psp->errorcnt++; psp->state = RESYNC_AFTER_DECL_ERROR; }else{ if (!sp){ sp = Symbol_new(x); } psp->declargslot = &sp->datatype; psp->insertLineMacro = 0; psp->state = WAITING_FOR_DECL_ARG; } } break; case WAITING_FOR_PRECEDENCE_SYMBOL: if( x[0]=='.' ){ psp->state = WAITING_FOR_DECL_OR_RULE; }else if( ISUPPER(x[0]) ){ struct symbol *sp; sp = Symbol_new(x); if( sp->prec>=0 ){ ErrorMsg(psp->filename,psp->tokenlineno, "Symbol \"%s\" has already be given a precedence.",x); psp->errorcnt++; }else{ sp->prec = psp->preccounter; sp->assoc = psp->declassoc; } }else{ ErrorMsg(psp->filename,psp->tokenlineno, "Can't assign a precedence to \"%s\".",x); psp->errorcnt++; } break; case WAITING_FOR_DECL_ARG: if( x[0]=='{' || x[0]=='\"' || ISALNUM(x[0]) ){ const char *zOld, *zNew; char *zBuf, *z; int nOld, n, nLine = 0, nNew, nBack; int addLineMacro; char zLine[50]; zNew = x; if( zNew[0]=='"' || zNew[0]=='{' ) zNew++; nNew = lemonStrlen(zNew); if( *psp->declargslot ){ zOld = *psp->declargslot; }else{ zOld = ""; } nOld = lemonStrlen(zOld); n = nOld + nNew + 20; addLineMacro = !psp->gp->nolinenosflag && psp->insertLineMacro && psp->tokenlineno>1 && (psp->decllinenoslot==0 || psp->decllinenoslot[0]!=0); if( addLineMacro ){ for(z=psp->filename, nBack=0; *z; z++){ if( *z=='\\' ) nBack++; } lemon_sprintf(zLine, "//line %d ", psp->tokenlineno); nLine = lemonStrlen(zLine); n += nLine + lemonStrlen(psp->filename) + nBack; } *psp->declargslot = (char *) lemon_realloc(*psp->declargslot, n); zBuf = *psp->declargslot + nOld; if( addLineMacro ){ if( nOld && zBuf[-1]!='\n' ){ *(zBuf++) = '\n'; } memcpy(zBuf, zLine, nLine); zBuf += nLine; *(zBuf++) = '"'; for(z=psp->filename; *z; z++){ if( *z=='\\' ){ *(zBuf++) = '\\'; } *(zBuf++) = *z; } *(zBuf++) = '"'; *(zBuf++) = '\n'; } if( psp->decllinenoslot && psp->decllinenoslot[0]==0 ){ psp->decllinenoslot[0] = psp->tokenlineno; } memcpy(zBuf, zNew, nNew); zBuf += nNew; *zBuf = 0; psp->state = WAITING_FOR_DECL_OR_RULE; }else{ ErrorMsg(psp->filename,psp->tokenlineno, "Illegal argument to %%%s: %s",psp->declkeyword,x); psp->errorcnt++; psp->state = RESYNC_AFTER_DECL_ERROR; } break; case WAITING_FOR_FALLBACK_ID: if( x[0]=='.' ){ psp->state = WAITING_FOR_DECL_OR_RULE; }else if( !ISUPPER(x[0]) ){ ErrorMsg(psp->filename, psp->tokenlineno, "%%fallback argument \"%s\" should be a token", x); psp->errorcnt++; }else{ struct symbol *sp = Symbol_new(x); if( psp->fallback==0 ){ psp->fallback = sp; }else if( sp->fallback ){ ErrorMsg(psp->filename, psp->tokenlineno, "More than one fallback assigned to token %s", x); psp->errorcnt++; }else{ sp->fallback = psp->fallback; psp->gp->has_fallback = 1; } } break; case WAITING_FOR_TOKEN_NAME: /* Tokens do not have to be declared before use. But they can be ** in order to control their assigned integer number. The number for ** each token is assigned when it is first seen. So by including ** ** %token ONE TWO THREE. ** ** early in the grammar file, that assigns small consecutive values ** to each of the tokens ONE TWO and THREE. */ if( x[0]=='.' ){ psp->state = WAITING_FOR_DECL_OR_RULE; }else if( !ISUPPER(x[0]) ){ ErrorMsg(psp->filename, psp->tokenlineno, "%%token argument \"%s\" should be a token", x); psp->errorcnt++; }else{ (void)Symbol_new(x); } break; case WAITING_FOR_WILDCARD_ID: if( x[0]=='.' ){ psp->state = WAITING_FOR_DECL_OR_RULE; }else if( !ISUPPER(x[0]) ){ ErrorMsg(psp->filename, psp->tokenlineno, "%%wildcard argument \"%s\" should be a token", x); psp->errorcnt++; }else{ struct symbol *sp = Symbol_new(x); if( psp->gp->wildcard==0 ){ psp->gp->wildcard = sp; }else{ ErrorMsg(psp->filename, psp->tokenlineno, "Extra wildcard to token: %s", x); psp->errorcnt++; } } break; case WAITING_FOR_CLASS_ID: if( !ISLOWER(x[0]) ){ ErrorMsg(psp->filename, psp->tokenlineno, "%%token_class must be followed by an identifier: %s", x); psp->errorcnt++; psp->state = RESYNC_AFTER_DECL_ERROR; }else if( Symbol_find(x) ){ ErrorMsg(psp->filename, psp->tokenlineno, "Symbol \"%s\" already used", x); psp->errorcnt++; psp->state = RESYNC_AFTER_DECL_ERROR; }else{ psp->tkclass = Symbol_new(x); psp->tkclass->type = MULTITERMINAL; psp->state = WAITING_FOR_CLASS_TOKEN; } break; case WAITING_FOR_CLASS_TOKEN: if( x[0]=='.' ){ psp->state = WAITING_FOR_DECL_OR_RULE; }else if( ISUPPER(x[0]) || ((x[0]=='|' || x[0]=='/') && ISUPPER(x[1])) ){ struct symbol *msp = psp->tkclass; msp->nsubsym++; msp->subsym = (struct symbol **) lemon_realloc(msp->subsym, sizeof(struct symbol*)*msp->nsubsym); if( !ISUPPER(x[0]) ) x++; msp->subsym[msp->nsubsym-1] = Symbol_new(x); }else{ ErrorMsg(psp->filename, psp->tokenlineno, "%%token_class argument \"%s\" should be a token", x); psp->errorcnt++; psp->state = RESYNC_AFTER_DECL_ERROR; } break; case RESYNC_AFTER_RULE_ERROR: /* if( x[0]=='.' ) psp->state = WAITING_FOR_DECL_OR_RULE; ** break; */ case RESYNC_AFTER_DECL_ERROR: if( x[0]=='.' ) psp->state = WAITING_FOR_DECL_OR_RULE; if( x[0]=='%' ) psp->state = WAITING_FOR_DECL_KEYWORD; break; } } /* The text in the input is part of the argument to an %ifdef or %ifndef. ** Evaluate the text as a boolean expression. Return true or false. */ static int eval_preprocessor_boolean(char *z, int lineno){ int neg = 0; int res = 0; int okTerm = 1; int i; for(i=0; z[i]!=0; i++){ if( ISSPACE(z[i]) ) continue; if( z[i]=='!' ){ if( !okTerm ) goto pp_syntax_error; neg = !neg; continue; } if( z[i]=='|' && z[i+1]=='|' ){ if( okTerm ) goto pp_syntax_error; if( res ) return 1; i++; okTerm = 1; continue; } if( z[i]=='&' && z[i+1]=='&' ){ if( okTerm ) goto pp_syntax_error; if( !res ) return 0; i++; okTerm = 1; continue; } if( z[i]=='(' ){ int k; int n = 1; if( !okTerm ) goto pp_syntax_error; for(k=i+1; z[k]; k++){ if( z[k]==')' ){ n--; if( n==0 ){ z[k] = 0; res = eval_preprocessor_boolean(&z[i+1], -1); z[k] = ')'; if( res<0 ){ i = i-res; goto pp_syntax_error; } i = k; break; } }else if( z[k]=='(' ){ n++; }else if( z[k]==0 ){ i = k; goto pp_syntax_error; } } if( neg ){ res = !res; neg = 0; } okTerm = 0; continue; } if( ISALPHA(z[i]) ){ int j, k, n; if( !okTerm ) goto pp_syntax_error; for(k=i+1; ISALNUM(z[k]) || z[k]=='_'; k++){} n = k - i; res = 0; for(j=0; j0 ){ fprintf(stderr, "%%if syntax error on line %d.\n", lineno); fprintf(stderr, " %.*s <-- syntax error here\n", i+1, z); exit(1); }else{ return -(i+1); } } /* Run the preprocessor over the input file text. The global variables ** azDefine[0] through azDefine[nDefine-1] contains the names of all defined ** macros. This routine looks for "%ifdef" and "%ifndef" and "%endif" and ** comments them out. Text in between is also commented out as appropriate. */ static void preprocess_input(char *z){ int i, j, k; int exclude = 0; int start = 0; int lineno = 1; int start_lineno = 1; for(i=0; z[i]; i++){ if( z[i]=='\n' ) lineno++; if( z[i]!='%' || (i>0 && z[i-1]!='\n') ) continue; if( strncmp(&z[i],"%endif",6)==0 && ISSPACE(z[i+6]) ){ if( exclude ){ exclude--; if( exclude==0 ){ for(j=start; jfilename; ps.errorcnt = 0; ps.state = INITIALIZE; /* Begin by reading the input file */ fp = fopen(ps.filename,"rb"); if( fp==0 ){ ErrorMsg(ps.filename,0,"Can't open this file for reading."); gp->errorcnt++; return; } fseek(fp,0,2); filesize = ftell(fp); rewind(fp); filebuf = (char *)lemon_malloc( filesize+1 ); if( filesize>100000000 || filebuf==0 ){ ErrorMsg(ps.filename,0,"Input file too large."); lemon_free(filebuf); gp->errorcnt++; fclose(fp); return; } if( fread(filebuf,1,filesize,fp)!=filesize ){ ErrorMsg(ps.filename,0,"Can't read in all %d bytes of this file.", filesize); lemon_free(filebuf); gp->errorcnt++; fclose(fp); return; } fclose(fp); filebuf[filesize] = 0; /* Make an initial pass through the file to handle %ifdef and %ifndef */ preprocess_input(filebuf); if( gp->printPreprocessed ){ printf("%s\n", filebuf); return; } /* Now scan the text of the input file */ lineno = 1; for(cp=filebuf; (c= *cp)!=0; ){ if( c=='\n' ) lineno++; /* Keep track of the line number */ if( ISSPACE(c) ){ cp++; continue; } /* Skip all white space */ if( c=='/' && cp[1]=='/' ){ /* Skip C++ style comments */ cp+=2; while( (c= *cp)!=0 && c!='\n' ) cp++; continue; } if( c=='/' && cp[1]=='*' ){ /* Skip C style comments */ cp+=2; if( (*cp)=='/' ) cp++; while( (c= *cp)!=0 && (c!='/' || cp[-1]!='*') ){ if( c=='\n' ) lineno++; cp++; } if( c ) cp++; continue; } ps.tokenstart = cp; /* Mark the beginning of the token */ ps.tokenlineno = lineno; /* Linenumber on which token begins */ if( c=='\"' ){ /* String literals */ cp++; while( (c= *cp)!=0 && c!='\"' ){ if( c=='\n' ) lineno++; cp++; } if( c==0 ){ ErrorMsg(ps.filename,startline, "String starting on this line is not terminated before " "the end of the file."); ps.errorcnt++; nextcp = cp; }else{ nextcp = cp+1; } }else if( c=='{' ){ /* A block of C code */ int level; cp++; for(level=1; (c= *cp)!=0 && (level>1 || c!='}'); cp++){ if( c=='\n' ) lineno++; else if( c=='{' ) level++; else if( c=='}' ) level--; else if( c=='/' && cp[1]=='*' ){ /* Skip comments */ int prevc; cp = &cp[2]; prevc = 0; while( (c= *cp)!=0 && (c!='/' || prevc!='*') ){ if( c=='\n' ) lineno++; prevc = c; cp++; } }else if( c=='/' && cp[1]=='/' ){ /* Skip C++ style comments too */ cp = &cp[2]; while( (c= *cp)!=0 && c!='\n' ) cp++; if( c ) lineno++; }else if( c=='\'' || c=='\"' ){ /* String a character literals */ int startchar, prevc; startchar = c; prevc = 0; for(cp++; (c= *cp)!=0 && (c!=startchar || prevc=='\\'); cp++){ if( c=='\n' ) lineno++; if( prevc=='\\' ) prevc = 0; else prevc = c; } } } if( c==0 ){ ErrorMsg(ps.filename,ps.tokenlineno, "C code starting on this line is not terminated before " "the end of the file."); ps.errorcnt++; nextcp = cp; }else{ nextcp = cp+1; } }else if( ISALNUM(c) ){ /* Identifiers */ while( (c= *cp)!=0 && (ISALNUM(c) || c=='_') ) cp++; nextcp = cp; }else if( c==':' && cp[1]==':' && cp[2]=='=' ){ /* The operator "::=" */ cp += 3; nextcp = cp; }else if( (c=='/' || c=='|') && ISALPHA(cp[1]) ){ cp += 2; while( (c = *cp)!=0 && (ISALNUM(c) || c=='_') ) cp++; nextcp = cp; }else{ /* All other (one character) operators */ cp++; nextcp = cp; } c = *cp; *cp = 0; /* Null terminate the token */ parseonetoken(&ps); /* Parse the token */ *cp = (char)c; /* Restore the buffer */ cp = nextcp; } lemon_free(filebuf); /* Release the buffer after parsing */ gp->rule = ps.firstrule; gp->errorcnt = ps.errorcnt; } /*************************** From the file "plink.c" *********************/ /* ** Routines processing configuration follow-set propagation links ** in the LEMON parser generator. */ static struct plink *plink_freelist = 0; /* Allocate a new plink */ struct plink *Plink_new(void){ struct plink *newlink; if( plink_freelist==0 ){ int i; int amt = 100; plink_freelist = (struct plink *)lemon_calloc( amt, sizeof(struct plink) ); if( plink_freelist==0 ){ fprintf(stderr, "Unable to allocate memory for a new follow-set propagation link.\n"); exit(1); } for(i=0; inext; return newlink; } /* Add a plink to a plink list */ void Plink_add(struct plink **plpp, struct config *cfp) { struct plink *newlink; newlink = Plink_new(); newlink->next = *plpp; *plpp = newlink; newlink->cfp = cfp; } /* Transfer every plink on the list "from" to the list "to" */ void Plink_copy(struct plink **to, struct plink *from) { struct plink *nextpl; while( from ){ nextpl = from->next; from->next = *to; *to = from; from = nextpl; } } /* Delete every plink on the list */ void Plink_delete(struct plink *plp) { struct plink *nextpl; while( plp ){ nextpl = plp->next; plp->next = plink_freelist; plink_freelist = plp; plp = nextpl; } } /*********************** From the file "report.c" **************************/ /* ** Procedures for generating reports and tables in the LEMON parser generator. */ /* Generate a filename with the given suffix. */ PRIVATE char *file_makename(struct lemon *lemp, const char *suffix) { char *name; char *cp; char *filename = lemp->filename; int sz; if( outputDir ){ cp = strrchr(filename, '/'); if( cp ) filename = cp + 1; } sz = lemonStrlen(filename); sz += lemonStrlen(suffix); if( outputDir ) sz += lemonStrlen(outputDir) + 1; sz += 5; name = (char*)lemon_malloc( sz ); if( name==0 ){ fprintf(stderr,"Can't allocate space for a filename.\n"); exit(1); } name[0] = 0; if( outputDir ){ lemon_strcpy(name, outputDir); lemon_strcat(name, "/"); } lemon_strcat(name,filename); cp = strrchr(name,'.'); if( cp ) *cp = 0; lemon_strcat(name,suffix); return name; } /* Open a file with a name based on the name of the input file, ** but with a different (specified) suffix, and return a pointer ** to the stream */ PRIVATE FILE *file_open( struct lemon *lemp, const char *suffix, const char *mode ){ FILE *fp; if( lemp->outname ) lemon_free(lemp->outname); lemp->outname = file_makename(lemp, suffix); fp = fopen(lemp->outname,mode); if( fp==0 && *mode=='w' ){ fprintf(stderr,"Can't open file \"%s\".\n",lemp->outname); lemp->errorcnt++; return 0; } return fp; } /* Print the text of a rule */ void rule_print(FILE *out, struct rule *rp){ int i, j; fprintf(out, "%s",rp->lhs->name); /* if( rp->lhsalias ) fprintf(out,"(%s)",rp->lhsalias); */ fprintf(out," ::="); for(i=0; inrhs; i++){ struct symbol *sp = rp->rhs[i]; if( sp->type==MULTITERMINAL ){ fprintf(out," %s", sp->subsym[0]->name); for(j=1; jnsubsym; j++){ fprintf(out,"|%s", sp->subsym[j]->name); } }else{ fprintf(out," %s", sp->name); } /* if( rp->rhsalias[i] ) fprintf(out,"(%s)",rp->rhsalias[i]); */ } } /* Duplicate the input file without comments and without actions ** on rules */ void Reprint(struct lemon *lemp) { struct rule *rp; struct symbol *sp; int i, j, maxlen, len, ncolumns, skip; printf("// Reprint of input file \"%s\".\n// Symbols:\n",lemp->filename); maxlen = 10; for(i=0; insymbol; i++){ sp = lemp->symbols[i]; len = lemonStrlen(sp->name); if( len>maxlen ) maxlen = len; } ncolumns = 76/(maxlen+5); if( ncolumns<1 ) ncolumns = 1; skip = (lemp->nsymbol + ncolumns - 1)/ncolumns; for(i=0; insymbol; j+=skip){ sp = lemp->symbols[j]; assert( sp->index==j ); printf(" %3d %-*.*s",j,maxlen,maxlen,sp->name); } printf("\n"); } for(rp=lemp->rule; rp; rp=rp->next){ rule_print(stdout, rp); printf("."); if( rp->precsym ) printf(" [%s]",rp->precsym->name); /* if( rp->code ) printf("\n %s",rp->code); */ printf("\n"); } } /* Print a single rule. */ void RulePrint(FILE *fp, struct rule *rp, int iCursor){ struct symbol *sp; int i, j; fprintf(fp,"%s ::=",rp->lhs->name); for(i=0; i<=rp->nrhs; i++){ if( i==iCursor ) fprintf(fp," *"); if( i==rp->nrhs ) break; sp = rp->rhs[i]; if( sp->type==MULTITERMINAL ){ fprintf(fp," %s", sp->subsym[0]->name); for(j=1; jnsubsym; j++){ fprintf(fp,"|%s",sp->subsym[j]->name); } }else{ fprintf(fp," %s", sp->name); } } } /* Print the rule for a configuration. */ void ConfigPrint(FILE *fp, struct config *cfp){ RulePrint(fp, cfp->rp, cfp->dot); } /* #define TEST */ #if 0 /* Print a set */ PRIVATE void SetPrint(out,set,lemp) FILE *out; char *set; struct lemon *lemp; { int i; char *spacer; spacer = ""; fprintf(out,"%12s[",""); for(i=0; interminal; i++){ if( SetFind(set,i) ){ fprintf(out,"%s%s",spacer,lemp->symbols[i]->name); spacer = " "; } } fprintf(out,"]\n"); } /* Print a plink chain */ PRIVATE void PlinkPrint(out,plp,tag) FILE *out; struct plink *plp; char *tag; { while( plp ){ fprintf(out,"%12s%s (state %2d) ","",tag,plp->cfp->stp->statenum); ConfigPrint(out,plp->cfp); fprintf(out,"\n"); plp = plp->next; } } #endif /* Print an action to the given file descriptor. Return FALSE if ** nothing was actually printed. */ int PrintAction( struct action *ap, /* The action to print */ FILE *fp, /* Print the action here */ int indent /* Indent by this amount */ ){ int result = 1; switch( ap->type ){ case SHIFT: { struct state *stp = ap->x.stp; fprintf(fp,"%*s shift %-7d",indent,ap->sp->name,stp->statenum); break; } case REDUCE: { struct rule *rp = ap->x.rp; fprintf(fp,"%*s reduce %-7d",indent,ap->sp->name,rp->iRule); RulePrint(fp, rp, -1); break; } case SHIFTREDUCE: { struct rule *rp = ap->x.rp; fprintf(fp,"%*s shift-reduce %-7d",indent,ap->sp->name,rp->iRule); RulePrint(fp, rp, -1); break; } case ACCEPT: fprintf(fp,"%*s accept",indent,ap->sp->name); break; case ERROR: fprintf(fp,"%*s error",indent,ap->sp->name); break; case SRCONFLICT: case RRCONFLICT: fprintf(fp,"%*s reduce %-7d ** Parsing conflict **", indent,ap->sp->name,ap->x.rp->iRule); break; case SSCONFLICT: fprintf(fp,"%*s shift %-7d ** Parsing conflict **", indent,ap->sp->name,ap->x.stp->statenum); break; case SH_RESOLVED: if( showPrecedenceConflict ){ fprintf(fp,"%*s shift %-7d -- dropped by precedence", indent,ap->sp->name,ap->x.stp->statenum); }else{ result = 0; } break; case RD_RESOLVED: if( showPrecedenceConflict ){ fprintf(fp,"%*s reduce %-7d -- dropped by precedence", indent,ap->sp->name,ap->x.rp->iRule); }else{ result = 0; } break; case NOT_USED: result = 0; break; } if( result && ap->spOpt ){ fprintf(fp," /* because %s==%s */", ap->sp->name, ap->spOpt->name); } return result; } /* Generate the "*.out" log file */ void ReportOutput(struct lemon *lemp) { int i, n; struct state *stp; struct config *cfp; struct action *ap; struct rule *rp; FILE *fp; fp = file_open(lemp,".out","wb"); if( fp==0 ) return; for(i=0; inxstate; i++){ stp = lemp->sorted[i]; fprintf(fp,"State %d:\n",stp->statenum); if( lemp->basisflag ) cfp=stp->bp; else cfp=stp->cfp; while( cfp ){ char buf[20]; if( cfp->dot==cfp->rp->nrhs ){ lemon_sprintf(buf,"(%d)",cfp->rp->iRule); fprintf(fp," %5s ",buf); }else{ fprintf(fp," "); } ConfigPrint(fp,cfp); fprintf(fp,"\n"); #if 0 SetPrint(fp,cfp->fws,lemp); PlinkPrint(fp,cfp->fplp,"To "); PlinkPrint(fp,cfp->bplp,"From"); #endif if( lemp->basisflag ) cfp=cfp->bp; else cfp=cfp->next; } fprintf(fp,"\n"); for(ap=stp->ap; ap; ap=ap->next){ if( PrintAction(ap,fp,30) ) fprintf(fp,"\n"); } fprintf(fp,"\n"); } fprintf(fp, "----------------------------------------------------\n"); fprintf(fp, "Symbols:\n"); fprintf(fp, "The first-set of non-terminals is shown after the name.\n\n"); for(i=0; insymbol; i++){ int j; struct symbol *sp; sp = lemp->symbols[i]; fprintf(fp, " %3d: %s", i, sp->name); if( sp->type==NONTERMINAL ){ fprintf(fp, ":"); if( sp->lambda ){ fprintf(fp, " "); } for(j=0; jnterminal; j++){ if( sp->firstset && SetFind(sp->firstset, j) ){ fprintf(fp, " %s", lemp->symbols[j]->name); } } } if( sp->prec>=0 ) fprintf(fp," (precedence=%d)", sp->prec); fprintf(fp, "\n"); } fprintf(fp, "----------------------------------------------------\n"); fprintf(fp, "Syntax-only Symbols:\n"); fprintf(fp, "The following symbols never carry semantic content.\n\n"); for(i=n=0; insymbol; i++){ int w; struct symbol *sp = lemp->symbols[i]; if( sp->bContent ) continue; w = (int)strlen(sp->name); if( n>0 && n+w>75 ){ fprintf(fp,"\n"); n = 0; } if( n>0 ){ fprintf(fp, " "); n++; } fprintf(fp, "%s", sp->name); n += w; } if( n>0 ) fprintf(fp, "\n"); fprintf(fp, "----------------------------------------------------\n"); fprintf(fp, "Rules:\n"); for(rp=lemp->rule; rp; rp=rp->next){ fprintf(fp, "%4d: ", rp->iRule); rule_print(fp, rp); fprintf(fp,"."); if( rp->precsym ){ fprintf(fp," [%s precedence=%d]", rp->precsym->name, rp->precsym->prec); } fprintf(fp,"\n"); } fclose(fp); return; } /* Search for the file "name" which is in the same directory as ** the executable */ PRIVATE char *pathsearch(char *argv0, char *name, int modemask) { const char *pathlist; char *pathbufptr = 0; char *pathbuf = 0; char *path,*cp; char c; #ifdef __WIN32__ cp = strrchr(argv0,'\\'); #else cp = strrchr(argv0,'/'); #endif if( cp ){ c = *cp; *cp = 0; path = (char *)lemon_malloc( lemonStrlen(argv0) + lemonStrlen(name) + 2 ); if( path ) lemon_sprintf(path,"%s/%s",argv0,name); *cp = c; }else{ pathlist = getenv("PATH"); if( pathlist==0 ) pathlist = ".:/bin:/usr/bin"; pathbuf = (char *) lemon_malloc( lemonStrlen(pathlist) + 1 ); path = (char *)lemon_malloc( lemonStrlen(pathlist)+lemonStrlen(name)+2 ); if( (pathbuf != 0) && (path!=0) ){ pathbufptr = pathbuf; lemon_strcpy(pathbuf, pathlist); while( *pathbuf ){ cp = strchr(pathbuf,':'); if( cp==0 ) cp = &pathbuf[lemonStrlen(pathbuf)]; c = *cp; *cp = 0; lemon_sprintf(path,"%s/%s",pathbuf,name); *cp = c; if( c==0 ) pathbuf[0] = 0; else pathbuf = &cp[1]; if( access(path,modemask)==0 ) break; } } lemon_free(pathbufptr); } return path; } /* Given an action, compute the integer value for that action ** which is to be put in the action table of the generated machine. ** Return negative if no action should be generated. */ PRIVATE int compute_action(struct lemon *lemp, struct action *ap) { int act; switch( ap->type ){ case SHIFT: act = ap->x.stp->statenum; break; case SHIFTREDUCE: { /* Since a SHIFT is inherient after a prior REDUCE, convert any ** SHIFTREDUCE action with a nonterminal on the LHS into a simple ** REDUCE action: */ if( ap->sp->index>=lemp->nterminal && (lemp->errsym==0 || ap->sp->index!=lemp->errsym->index) ){ act = lemp->minReduce + ap->x.rp->iRule; }else{ act = lemp->minShiftReduce + ap->x.rp->iRule; } break; } case REDUCE: act = lemp->minReduce + ap->x.rp->iRule; break; case ERROR: act = lemp->errAction; break; case ACCEPT: act = lemp->accAction; break; default: act = -1; break; } return act; } #define LINESIZE 1000 /* The next cluster of routines are for reading the template file ** and writing the results to the generated parser */ /* The first function transfers data from "in" to "out" until ** a line is seen which begins with "%%". The line number is ** tracked. ** ** if name!=0, then any word that begin with "Parse" is changed to ** begin with *name instead. */ PRIVATE void tplt_xfer(char *name, FILE *in, FILE *out, int *lineno) { int i, iStart; char line[LINESIZE]; while( fgets(line,LINESIZE,in) && (line[0]!='%' || line[1]!='%') ){ (*lineno)++; iStart = 0; if( name ){ for(i=0; line[i]; i++){ if( line[i]=='P' && strncmp(&line[i],"Parse",5)==0 && (i==0 || !ISALPHA(line[i-1])) ){ if( i>iStart ) fprintf(out,"%.*s",i-iStart,&line[iStart]); fprintf(out,"%s",name); i += 4; iStart = i+1; } } } fprintf(out,"%s",&line[iStart]); } } /* Skip forward past the header of the template file to the first "%%" */ PRIVATE void tplt_skip_header(FILE *in, int *lineno) { char line[LINESIZE]; while( fgets(line,LINESIZE,in) && (line[0]!='%' || line[1]!='%') ){ (*lineno)++; } } /* The next function finds the template file and opens it, returning ** a pointer to the opened file. */ PRIVATE FILE *tplt_open(struct lemon *lemp) { static char templatename[] = "lempar.rs"; char buf[1000]; FILE *in; char *tpltname; char *toFree = 0; char *cp; /* first, see if user specified a template filename on the command line. */ if (user_templatename != 0) { if( access(user_templatename,004)==-1 ){ fprintf(stderr,"Can't find the parser driver template file \"%s\".\n", user_templatename); lemp->errorcnt++; return 0; } in = fopen(user_templatename,"rb"); if( in==0 ){ fprintf(stderr,"Can't open the template file \"%s\".\n", user_templatename); lemp->errorcnt++; return 0; } return in; } cp = strrchr(lemp->filename,'.'); if( cp ){ lemon_sprintf(buf,"%.*s.lt",(int)(cp-lemp->filename),lemp->filename); }else{ lemon_sprintf(buf,"%s.lt",lemp->filename); } if( access(buf,004)==0 ){ tpltname = buf; }else if( access(templatename,004)==0 ){ tpltname = templatename; }else{ toFree = tpltname = pathsearch(lemp->argv[0],templatename,0); } if( tpltname==0 ){ fprintf(stderr,"Can't find the parser driver template file \"%s\".\n", templatename); lemp->errorcnt++; return 0; } in = fopen(tpltname,"rb"); if( in==0 ){ fprintf(stderr,"Can't open the template file \"%s\".\n",tpltname); lemp->errorcnt++; } lemon_free(toFree); return in; } /* Print a #line directive line to the output file. */ PRIVATE void tplt_linedir(FILE *out, int lineno, char *filename) { fprintf(out,"//line %d \"",lineno); while( *filename ){ if( *filename == '\\' ) putc('\\',out); putc(*filename,out); filename++; } fprintf(out,"\"\n"); } /* Print a string to the file and keep the linenumber up to date */ PRIVATE void tplt_print(FILE *out, struct lemon *lemp, char *str, int *lineno) { if( str==0 ) return; while( *str ){ putc(*str,out); if( *str=='\n' ) (*lineno)++; str++; } if( str[-1]!='\n' ){ putc('\n',out); (*lineno)++; } if (!lemp->nolinenosflag) { //(*lineno)++; tplt_linedir(out,*lineno,lemp->outname); } return; } /* ** Append text to a dynamically allocated string. If zText is 0 then ** reset the string to be empty again. Always return the complete text ** of the string (which is overwritten with each call). ** ** n bytes of zText are stored. If n==0 then all of zText up to the first ** \000 terminator is stored. zText can contain up to two instances of ** %d. The values of p1 and p2 are written into the first and second ** %d. ** ** If n==-1, then the previous character is overwritten. */ PRIVATE char *append_str(const char *zText, int n, int p1, int p2){ static char empty[1] = { 0 }; static char *z = 0; static int alloced = 0; static int used = 0; int c; char zInt[40]; if( zText==0 ){ if( used==0 && z!=0 ) z[0] = 0; used = 0; return z; } if( n<=0 ){ if( n<0 ){ used += n; assert( used>=0 ); } n = lemonStrlen(zText); } if( (int) (n+sizeof(zInt)*2+used) >= alloced ){ alloced = n + sizeof(zInt)*2 + used + 200; z = (char *) lemon_realloc(z, alloced); } if( z==0 ) return empty; while( n-- > 0 ){ c = *(zText++); if( c=='%' && n>0 && zText[0]=='d' ){ lemon_sprintf(zInt, "%d", p1); p1 = p2; lemon_strcpy(&z[used], zInt); used += lemonStrlen(&z[used]); zText++; n--; }else{ z[used++] = (char)c; } } z[used] = 0; return z; } /* ** Write and transform the rp->code string so that symbols are expanded. ** Populate the rp->codePrefix and rp->codeSuffix strings, as appropriate. ** ** Return 1 if the expanded code requires that "yylhsminor" local variable ** to be defined. */ PRIVATE int translate_code(struct lemon *lemp, struct rule *rp){ char *cp, *xp; int i; int rc = 0; /* True if yylhsminor is used */ int dontUseRhs0 = 0; /* If true, use of left-most RHS label is illegal */ const char *zSkip = 0; /* The zOvwrt comment within rp->code, or NULL */ char lhsused = 0; /* True if the LHS element has been used */ char lhsdirect; /* True if LHS writes directly into stack */ char used[MAXRHS]; /* True for each RHS element which is used */ char zLhs[50]; /* Convert the LHS symbol into this string */ char zOvwrt[900]; /* Comment that to allow LHS to overwrite RHS */ char lhsaccess = 0; char lhswrite = 0; for(i=0; inrhs; i++) used[i] = 0; lhsused = 0; if( rp->code==0 ){ static char newlinestr[2] = { '\n', '\0' }; rp->code = newlinestr; rp->line = rp->ruleline; rp->noCode = 1; }else{ rp->noCode = 0; } if( rp->nrhs==0 ){ /* If there are no RHS symbols, then writing directly to the LHS is ok */ lhsdirect = 1; }else if( rp->rhsalias[0]==0 ){ /* The left-most RHS symbol has no value. LHS direct is ok. But ** we have to call the destructor on the RHS symbol first. */ lhsdirect = 1; }else if( rp->lhsalias==0 ){ /* There is no LHS value symbol. */ lhsdirect = 1; }else if( strcmp(rp->lhsalias,rp->rhsalias[0])==0 ){ /* The LHS symbol and the left-most RHS symbol are the same, so ** direct writing is allowed */ lhsdirect = 1; lhsused = 1; used[0] = 1; if( rp->lhs->dtnum!=rp->rhs[0]->dtnum ){ ErrorMsg(lemp->filename,rp->ruleline, "%s(%s) and %s(%s) share the same label but have " "different datatypes.", rp->lhs->name, rp->lhsalias, rp->rhs[0]->name, rp->rhsalias[0]); lemp->errorcnt++; } }else{ lemon_sprintf(zOvwrt, "/*%s-overwrites-%s*/", rp->lhsalias, rp->rhsalias[0]); zSkip = strstr(rp->code, zOvwrt); if( zSkip!=0 ){ /* The code contains a special comment that indicates that it is safe ** for the LHS label to overwrite left-most RHS label. */ lhsdirect = 1; }else{ lhsdirect = 0; } } if( lhsdirect ){ sprintf(zLhs, "self[%d]",1-rp->nrhs); }else{ rc = 1; sprintf(zLhs, "yylhsminor"); } append_str(0,0,0,0); /* This const cast is wrong but harmless, if we're careful. */ for(cp=(char *)rp->code; *cp; cp++){ if( cp==zSkip ){ append_str(zOvwrt,0,0,0); cp += lemonStrlen(zOvwrt)-1; dontUseRhs0 = 1; continue; } if( ISALPHA(*cp) && (cp==rp->code || (!ISALNUM(cp[-1]) && cp[-1]!='_')) ){ char saved; for(xp= &cp[1]; ISALNUM(*xp) || *xp=='_'; xp++); saved = *xp; *xp = 0; if( rp->lhsalias && strcmp(cp,rp->lhsalias)==0 ){ append_str(zLhs,0,0,0); cp = xp; lhsused = 1; lhsaccess = 1; }else{ for(i=0; inrhs; i++){ if( rp->rhsalias[i] && strcmp(cp,rp->rhsalias[i])==0 ){ if( i==0 && dontUseRhs0 ){ ErrorMsg(lemp->filename,rp->ruleline, "Label %s used after '%s'.", rp->rhsalias[0], zOvwrt); lemp->errorcnt++; }else if( cp!=rp->code && cp[-1]=='@' ){ /* If the argument is of the form @X then substituted ** the token number of X, not the value of X */ append_str("self[%d].major",-1,i-rp->nrhs+1,0); }else{ struct symbol *sp = rp->rhs[i]; int dtnum; if( sp->type==MULTITERMINAL ){ dtnum = sp->subsym[0]->dtnum; }else{ dtnum = sp->dtnum; } append_str("self.yy_move(%d).yy%d()",0,i-rp->nrhs+1, dtnum); } cp = xp; used[i] = 1; break; } } } *xp = saved; } if( lhsaccess ){ if( cp[0] == '=' ){ }else if ( !ISSPACE(*cp) ){ append_str(".yy%d_ref()",0,rp->lhs->dtnum,0); lhsaccess = 0; } } if( lhswrite ){ if( cp[0] == ';' ){ append_str(")",0,0,0); lhswrite = 0; } } if( lhsaccess ){ if( cp[0] == '=' ){ lhswrite = cp[1] != '='; if( lhswrite && lhsdirect ) { append_str(".minor",0,0,0); } } } append_str(cp, 1, 0, 0); if( lhsaccess ){ if( cp[0] == '=' ){ lhsaccess = 0; if( lhswrite ) { append_str(" YYMINORTYPE::yy%d(",0,rp->lhs->dtnum,0); } } } } /* End loop */ /* Main code generation completed */ cp = append_str(0,0,0,0); if( cp && cp[0] ) rp->code = Strsafe(cp); append_str(0,0,0,0); /* Check to make sure the LHS has been used */ if( rp->lhsalias && !lhsused ){ ErrorMsg(lemp->filename,rp->ruleline, "Label \"%s\" for \"%s(%s)\" is never used.", rp->lhsalias,rp->lhs->name,rp->lhsalias); lemp->errorcnt++; } /* ** Generate error messages for unused labels and duplicate labels. */ for(i=0; inrhs; i++){ if( rp->rhsalias[i] ){ if( i>0 ){ int j; if( rp->lhsalias && strcmp(rp->lhsalias,rp->rhsalias[i])==0 ){ ErrorMsg(lemp->filename,rp->ruleline, "%s(%s) has the same label as the LHS but is not the left-most " "symbol on the RHS.", rp->rhs[i]->name, rp->rhsalias[i]); lemp->errorcnt++; } for(j=0; jrhsalias[j] && strcmp(rp->rhsalias[j],rp->rhsalias[i])==0 ){ ErrorMsg(lemp->filename,rp->ruleline, "Label %s used for multiple symbols on the RHS of a rule.", rp->rhsalias[i]); lemp->errorcnt++; break; } } } if( !used[i] ){ ErrorMsg(lemp->filename,rp->ruleline, "Label %s for \"%s(%s)\" is never used.", rp->rhsalias[i],rp->rhs[i]->name,rp->rhsalias[i]); lemp->errorcnt++; } } } /* If unable to write LHS values directly into the stack, write the ** saved LHS value now. */ if( lhsdirect==0 ){ append_str(" self[%d].minor = ", 0, 1-rp->nrhs, 0); append_str(zLhs, 0, 0, 0); append_str(";\n", 0, 0, 0); } /* Suffix code generation complete */ cp = append_str(0,0,0,0); if( cp && cp[0] ){ rp->codeSuffix = Strsafe(cp); rp->noCode = 0; } return rc; } /* ** Generate code which executes when the rule "rp" is reduced. Write ** the code to "out". Make sure lineno stays up-to-date. */ PRIVATE void emit_code( FILE *out, struct rule *rp, struct lemon *lemp, int *lineno ){ const char *cp; /* Setup code prior to the #line directive */ if( rp->codePrefix && rp->codePrefix[0] ){ fprintf(out, "{%s", rp->codePrefix); for(cp=rp->codePrefix; *cp; cp++){ if( *cp=='\n' ) (*lineno)++; } } /* Generate code to do the reduce action */ if( rp->code ){ if( !lemp->nolinenosflag ){ (*lineno)++; tplt_linedir(out,rp->line,lemp->filename); } fprintf(out,"{%s",rp->code); for(cp=rp->code; *cp; cp++){ if( *cp=='\n' ) (*lineno)++; } fprintf(out,"}\n"); (*lineno)++; /*if( !lemp->nolinenosflag ){ (*lineno)++; tplt_linedir(out,*lineno,lemp->outname); }*/ } /* Generate breakdown code that occurs after the #line directive */ if( rp->codeSuffix && rp->codeSuffix[0] ){ fprintf(out, "%s", rp->codeSuffix); for(cp=rp->codeSuffix; *cp; cp++){ if( *cp=='\n' ) (*lineno)++; } } if( rp->codePrefix ){ fprintf(out, "}\n"); (*lineno)++; } return; } /* ** Print the definition of the union used for the parser's data stack. ** This union contains fields for every possible data type for tokens ** and nonterminals. In the process of computing and printing this ** union, also set the ".dtnum" field of every terminal and nonterminal ** symbol. */ void print_stack_union( FILE *out, /* The output stream */ struct lemon *lemp, /* The main info structure for this parser */ int *plineno /* Pointer to the line number */ ){ int lineno; /* The line number of the output */ char **types; /* A hash table of datatypes */ int arraysize; /* Size of the "types" array */ int maxdtlength; /* Maximum length of any ".datatype" field. */ char *stddt; /* Standardized name for a datatype */ int i,j; /* Loop counters */ unsigned hash; /* For hashing the name of a type */ const char *name; /* Name of the parser */ /* Allocate and initialize types[] and allocate stddt[] */ arraysize = lemp->nsymbol * 2; types = (char**)lemon_calloc( arraysize, sizeof(char*) ); if( types==0 ){ fprintf(stderr,"Out of memory.\n"); exit(1); } for(i=0; ivartype ){ maxdtlength = lemonStrlen(lemp->vartype); } for(i=0; insymbol; i++){ int len; struct symbol *sp = lemp->symbols[i]; if( sp->datatype==0 ) continue; len = lemonStrlen(sp->datatype); if( len>maxdtlength ) maxdtlength = len; } stddt = (char*)lemon_malloc( maxdtlength*2 + 1 ); if( stddt==0 ){ fprintf(stderr,"Out of memory.\n"); exit(1); } /* Build a hash table of datatypes. The ".dtnum" field of each symbol ** is filled in with the hash index plus 1. A ".dtnum" value of 0 is ** used for terminal symbols. If there is no %default_type defined then ** 0 is also used as the .dtnum value for nonterminals which do not specify ** a datatype using the %type directive. */ for(i=0; insymbol; i++){ struct symbol *sp = lemp->symbols[i]; char *cp; if( sp==lemp->errsym ){ sp->dtnum = arraysize+1; continue; } if( sp->type!=NONTERMINAL || (sp->datatype==0 && lemp->vartype==0) ){ sp->dtnum = 0; continue; } cp = sp->datatype; if( cp==0 ) cp = lemp->vartype; j = 0; while( ISSPACE(*cp) ) cp++; while( *cp ) stddt[j++] = *cp++; while( j>0 && ISSPACE(stddt[j-1]) ) j--; stddt[j] = 0; if( lemp->tokentype && strcmp(stddt, lemp->tokentype)==0 ){ sp->dtnum = 0; continue; } hash = 0; for(j=0; stddt[j]; j++){ hash = hash*53 + stddt[j]; } hash = (hash & 0x7fffffff)%arraysize; while( types[hash] ){ if( strcmp(types[hash],stddt)==0 ){ sp->dtnum = hash + 1; break; } hash++; if( hash>=(unsigned)arraysize ) hash = 0; } if( types[hash]==0 ){ sp->dtnum = hash + 1; types[hash] = (char*)lemon_malloc( lemonStrlen(stddt)+1 ); if( types[hash]==0 ){ fprintf(stderr,"Out of memory.\n"); exit(1); } lemon_strcpy(types[hash],stddt); } } /* Print out the definition of YYTOKENTYPE and YYMINORTYPE */ name = lemp->name ? lemp->name : "Parse"; lineno = *plineno; fprintf(out,"#[allow(non_camel_case_types)]\n"); lineno++; fprintf(out,"type %sTOKENTYPE = %s;\n",name, lemp->tokentype?lemp->tokentype:"()"); lineno++; fprintf(out,"#[allow(non_camel_case_types)]\n"); lineno++; fprintf(out,"enum YYMINORTYPE {\n"); lineno++; fprintf(out," yyinit(),\n"); lineno++; fprintf(out," yy0(%sTOKENTYPE),\n",name); lineno++; for(i=0; ierrsym && lemp->errsym->useCnt ){ fprintf(out," yy%d(i32),\n",lemp->errsym->dtnum); lineno++; } lemon_free(stddt); fprintf(out,"}\n"); lineno++; fprintf(out,"impl Default for YYMINORTYPE {\n"); lineno++; fprintf(out," fn default() -> YYMINORTYPE {\n"); lineno++; fprintf(out," YYMINORTYPE::yyinit()\n"); lineno++; fprintf(out," }\n"); lineno++; fprintf(out,"}\n"); lineno++; fprintf(out,"impl yyStackEntry {\n"); lineno++; fprintf(out," fn yy0(self) -> %sTOKENTYPE {\n",name); lineno++; fprintf(out," if let YYMINORTYPE::yy0(v) = self.minor {\n"); lineno++; fprintf(out," v\n"); lineno++; fprintf(out," } else {\n"); lineno++; fprintf(out," unreachable!()\n"); lineno++; fprintf(out," }\n"); lineno++; fprintf(out," }\n"); lineno++; for(i=0; i %s {\n",i+1,types[i]); lineno++; fprintf(out," if let YYMINORTYPE::yy%d(v) = self.minor {\n",i+1); lineno++; fprintf(out," v\n"); lineno++; fprintf(out," } else {\n"); lineno++; fprintf(out," unreachable!()\n"); lineno++; // TODO } else if YYMINORTYPE::yyinit() { return None; fprintf(out," }\n"); lineno++; fprintf(out," }\n"); lineno++; fprintf(out," #[allow(dead_code)]\n"); lineno++; // TODO generates only when needed fprintf(out," fn yy%d_ref(&mut self) -> &mut %s {\n",i+1,types[i]); lineno++; fprintf(out," if let YYMINORTYPE::yy%d(ref mut v) = self.minor {\n",i+1); lineno++; fprintf(out," v\n"); lineno++; fprintf(out," } else {\n"); lineno++; fprintf(out," unreachable!()\n"); lineno++; fprintf(out," }\n"); lineno++; fprintf(out," }\n"); lineno++; lemon_free(types[i]); } lemon_free(types); if( lemp->errsym && lemp->errsym->useCnt ){ fprintf(out," fn yy%d(self) -> i32 {\n",lemp->errsym->dtnum); lineno++; fprintf(out," if let YYMINORTYPE::yy%d(v) = self.minor {\n",lemp->errsym->dtnum); lineno++; fprintf(out," v\n"); lineno++; fprintf(out," } else {\n"); lineno++; fprintf(out," unreachable!()\n"); lineno++; fprintf(out," }\n"); lineno++; fprintf(out," }\n"); lineno++; } fprintf(out,"}\n"); lineno++; *plineno = lineno; } /* ** Return the name of a C datatype able to represent values between ** lwr and upr, inclusive. If pnByte!=NULL then also write the sizeof ** for that type (1, 2, or 4) into *pnByte. */ static const char *minimum_size_type(int lwr, int upr, int *pnByte){ const char *zType = "i32"; int nByte = 4; if( lwr>=0 ){ if( upr<=255 ){ zType = "u8"; nByte = 1; }else if( upr<65535 ){ zType = "u16"; nByte = 2; }else{ zType = "u32"; nByte = 4; } }else if( lwr>=-127 && upr<=127 ){ zType = "i8"; nByte = 1; }else if( lwr>=-32767 && upr<32767 ){ zType = "i16"; nByte = 2; } if( pnByte ) *pnByte = nByte; return zType; } /* ** Each state contains a set of token transaction and a set of ** nonterminal transactions. Each of these sets makes an instance ** of the following structure. An array of these structures is used ** to order the creation of entries in the yy_action[] table. */ struct axset { struct state *stp; /* A pointer to a state */ int isTkn; /* True to use tokens. False for non-terminals */ int nAction; /* Number of actions */ int iOrder; /* Original order of action sets */ }; /* ** Compare to axset structures for sorting purposes */ static int axset_compare(const void *a, const void *b){ struct axset *p1 = (struct axset*)a; struct axset *p2 = (struct axset*)b; int c; c = p2->nAction - p1->nAction; if( c==0 ){ c = p1->iOrder - p2->iOrder; } assert( c!=0 || p1==p2 ); return c; } /* ** Write text on "out" that describes the rule "rp". */ static void writeRuleText(FILE *out, struct rule *rp){ int j; fprintf(out,"%s ::=", rp->lhs->name); for(j=0; jnrhs; j++){ struct symbol *sp = rp->rhs[j]; if( sp->type!=MULTITERMINAL ){ fprintf(out," %s", sp->name); }else{ int k; fprintf(out," %s", sp->subsym[0]->name); for(k=1; knsubsym; k++){ fprintf(out,"|%s",sp->subsym[k]->name); } } } } /* Generate Rust source code for the parser */ void ReportTable( struct lemon *lemp, int mhflag, /* Output in makeheaders format if true */ int sqlFlag /* Generate the *.sql file too */ ){ FILE *out, *in, *sql; int lineno; struct state *stp; struct action *ap; struct rule *rp; struct acttab *pActtab; int i, j, n, sz; int nLookAhead; int szActionType; /* sizeof(YYACTIONTYPE) */ int szCodeType; /* sizeof(YYCODETYPE) */ int mnTknOfst, mxTknOfst; int mnNtOfst, mxNtOfst; struct axset *ax; char *prefix; lemp->minShiftReduce = lemp->nstate; lemp->errAction = lemp->minShiftReduce + lemp->nrule; lemp->accAction = lemp->errAction + 1; lemp->noAction = lemp->accAction + 1; lemp->minReduce = lemp->noAction + 1; lemp->maxAction = lemp->minReduce + lemp->nrule; in = tplt_open(lemp); if( in==0 ) return; out = file_open(lemp,".rs","wb"); if( out==0 ){ fclose(in); return; } if( sqlFlag==0 ){ sql = 0; }else{ sql = file_open(lemp, ".sql", "wb"); if( sql==0 ){ fclose(in); fclose(out); return; } fprintf(sql, "BEGIN;\n" "CREATE TABLE symbol(\n" " id INTEGER PRIMARY KEY,\n" " name TEXT NOT NULL,\n" " isTerminal BOOLEAN NOT NULL,\n" " fallback INTEGER REFERENCES symbol" " DEFERRABLE INITIALLY DEFERRED\n" ");\n" ); for(i=0; insymbol; i++){ fprintf(sql, "INSERT INTO symbol(id,name,isTerminal,fallback)" "VALUES(%d,'%s',%s", i, lemp->symbols[i]->name, interminal ? "TRUE" : "FALSE" ); if( lemp->symbols[i]->fallback ){ fprintf(sql, ",%d);\n", lemp->symbols[i]->fallback->index); }else{ fprintf(sql, ",NULL);\n"); } } fprintf(sql, "CREATE TABLE rule(\n" " ruleid INTEGER PRIMARY KEY,\n" " lhs INTEGER REFERENCES symbol(id),\n" " txt TEXT\n" ");\n" "CREATE TABLE rulerhs(\n" " ruleid INTEGER REFERENCES rule(ruleid),\n" " pos INTEGER,\n" " sym INTEGER REFERENCES symbol(id)\n" ");\n" ); for(i=0, rp=lemp->rule; rp; rp=rp->next, i++){ assert( i==rp->iRule ); fprintf(sql, "INSERT INTO rule(ruleid,lhs,txt)VALUES(%d,%d,'", rp->iRule, rp->lhs->index ); writeRuleText(sql, rp); fprintf(sql,"');\n"); for(j=0; jnrhs; j++){ struct symbol *sp = rp->rhs[j]; if( sp->type!=MULTITERMINAL ){ fprintf(sql, "INSERT INTO rulerhs(ruleid,pos,sym)VALUES(%d,%d,%d);\n", i,j,sp->index ); }else{ int k; for(k=0; knsubsym; k++){ fprintf(sql, "INSERT INTO rulerhs(ruleid,pos,sym)VALUES(%d,%d,%d);\n", i,j,sp->subsym[k]->index ); } } } } fprintf(sql, "COMMIT;\n"); } lineno = 1; fprintf(out, "/* This file is automatically generated by Lemon from input grammar\n" "** source file \"%s\"", lemp->filename); lineno++; if( nDefineUsed==0 ){ fprintf(out, ".\n*/\n"); lineno += 2; }else{ fprintf(out, " with these options:\n**\n"); lineno += 2; for(i=0; iinclude==0 ) lemp->include = ""; for(i=0; ISSPACE(lemp->include[i]); i++){ if( lemp->include[i]=='\n' ){ lemp->include += i+1; i = -1; } } if( lemp->include[0]=='/' ){ tplt_skip_header(in,&lineno); }else{ tplt_xfer(lemp->name,in,out,&lineno); } /* Generate the include code, if any */ tplt_print(out,lemp,lemp->include,&lineno); tplt_xfer(lemp->name,in,out,&lineno); /* Generate #defines for all tokens */ if( lemp->tokenprefix ) prefix = lemp->tokenprefix; else prefix = ""; if( mhflag ){ fprintf(out,"#[allow(non_camel_case_types)]\n"); lineno++; fprintf(out,"#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd)]\n"); lineno++; fprintf(out,"#[repr(%s)]\n", minimum_size_type(0, lemp->nsymbol+1, 0)); lineno++; fprintf(out,"pub enum TokenType {\n"); lineno++; fprintf(out," %sEOF = 0,\n",prefix); for(i=1; interminal; i++){ fprintf(out," %s%s = %d,\n",prefix,lemp->symbols[i]->name,i); lineno++; } fprintf(out,"}\n"); lineno++; } tplt_xfer(lemp->name,in,out,&lineno); /* Generate the defines */ fprintf(out,"#[allow(non_camel_case_types)]\n"); lineno++; fprintf(out,"pub type YYCODETYPE = %s; // unsigned\n", minimum_size_type(0, lemp->nsymbol+1, &szCodeType)); lineno++; fprintf(out,"const YYNOCODE: YYCODETYPE = %d;\n",lemp->nsymbol); lineno++; fprintf(out,"#[allow(non_camel_case_types)]\n"); lineno++; fprintf(out,"type YYACTIONTYPE = %s; // unsigned\n", minimum_size_type(0,lemp->maxAction,&szActionType)); lineno++; if( lemp->wildcard ){ fprintf(out,"const YYWILDCARD: YYCODETYPE = %d;\n", lemp->wildcard->index); lineno++; } else { fprintf(out,"const YYWILDCARD: YYCODETYPE = 0; // No wildcard\n"); lineno++; } print_stack_union(out,lemp,&lineno); if( lemp->stacksize ){ fprintf(out,"const YYSTACKDEPTH: usize = %s;\n",lemp->stacksize); lineno++; } if( lemp->errsym && lemp->errsym->useCnt ){ fprintf(out,"const YYERRORSYMBOL: YYCODETYPE = %d;\n",lemp->errsym->index); lineno++; //fprintf(out,"#define YYERRSYMDT yy%d\n",lemp->errsym->dtnum); lineno++; } else { fprintf(out,"const YYERRORSYMBOL: YYCODETYPE = 0; // No error symbol\n"); lineno++; } if( lemp->has_fallback ){ fprintf(out,"const YYFALLBACK: bool = true;\n"); lineno++; } else { fprintf(out,"const YYFALLBACK: bool = false;\n"); lineno++; } /* Compute the action table, but do not output it yet. The action ** table must be computed before generating the YYNSTATE macro because ** we need to know how many states can be eliminated. */ ax = (struct axset *) lemon_calloc(lemp->nxstate*2, sizeof(ax[0])); if( ax==0 ){ fprintf(stderr,"malloc failed\n"); exit(1); } for(i=0; inxstate; i++){ stp = lemp->sorted[i]; ax[i*2].stp = stp; ax[i*2].isTkn = 1; ax[i*2].nAction = stp->nTknAct; ax[i*2+1].stp = stp; ax[i*2+1].isTkn = 0; ax[i*2+1].nAction = stp->nNtAct; } mxTknOfst = mnTknOfst = 0; mxNtOfst = mnNtOfst = 0; /* In an effort to minimize the action table size, use the heuristic ** of placing the largest action sets first */ for(i=0; inxstate*2; i++) ax[i].iOrder = i; qsort(ax, lemp->nxstate*2, sizeof(ax[0]), axset_compare); pActtab = acttab_alloc(lemp->nsymbol, lemp->nterminal); for(i=0; inxstate*2 && ax[i].nAction>0; i++){ stp = ax[i].stp; if( ax[i].isTkn ){ for(ap=stp->ap; ap; ap=ap->next){ int action; if( ap->sp->index>=lemp->nterminal ) continue; action = compute_action(lemp, ap); if( action<0 ) continue; acttab_action(pActtab, ap->sp->index, action); } stp->iTknOfst = acttab_insert(pActtab, 1); if( stp->iTknOfstiTknOfst; if( stp->iTknOfst>mxTknOfst ) mxTknOfst = stp->iTknOfst; }else{ for(ap=stp->ap; ap; ap=ap->next){ int action; if( ap->sp->indexnterminal ) continue; if( ap->sp->index==lemp->nsymbol ) continue; action = compute_action(lemp, ap); if( action<0 ) continue; acttab_action(pActtab, ap->sp->index, action); } stp->iNtOfst = acttab_insert(pActtab, 0); if( stp->iNtOfstiNtOfst; if( stp->iNtOfst>mxNtOfst ) mxNtOfst = stp->iNtOfst; } #if 0 /* Uncomment for a trace of how the yy_action[] table fills out */ { int jj, nn; for(jj=nn=0; jjnAction; jj++){ if( pActtab->aAction[jj].action<0 ) nn++; } printf("%4d: State %3d %s n: %2d size: %5d freespace: %d\n", i, stp->statenum, ax[i].isTkn ? "Token" : "Var ", ax[i].nAction, pActtab->nAction, nn); } #endif } lemon_free(ax); /* Mark rules that are actually used for reduce actions after all ** optimizations have been applied */ for(rp=lemp->rule; rp; rp=rp->next) rp->doesReduce = LEMON_FALSE; for(i=0; inxstate; i++){ for(ap=lemp->sorted[i]->ap; ap; ap=ap->next){ if( ap->type==REDUCE || ap->type==SHIFTREDUCE ){ ap->x.rp->doesReduce = 1; } } } /* Finish rendering the constants now that the action table has ** been computed */ fprintf(out,"const YYNSTATE: YYACTIONTYPE = %d;\n",lemp->nxstate); lineno++; fprintf(out,"const YYNRULE: usize = %d;\n",lemp->nrule); lineno++; fprintf(out,"const YYNRULE_WITH_ACTION: YYACTIONTYPE = %d;\n",lemp->nruleWithAction); lineno++; fprintf(out,"const YYNTOKEN: YYACTIONTYPE = %d;\n",lemp->nterminal); lineno++; fprintf(out,"const YY_MAX_SHIFT: YYACTIONTYPE = %d;\n",lemp->nxstate-1); lineno++; i = lemp->minShiftReduce; fprintf(out,"const YY_MIN_SHIFTREDUCE: YYACTIONTYPE = %d;\n",i); lineno++; i += lemp->nrule; fprintf(out,"const YY_MAX_SHIFTREDUCE: YYACTIONTYPE = %d;\n", i-1); lineno++; fprintf(out,"const YY_ERROR_ACTION: YYACTIONTYPE = %d;\n", lemp->errAction); lineno++; fprintf(out,"const YY_ACCEPT_ACTION: YYACTIONTYPE = %d;\n", lemp->accAction); lineno++; fprintf(out,"//const YY_NO_ACTION: YYACTIONTYPE = %d;\n", lemp->noAction); lineno++; fprintf(out,"const YY_MIN_REDUCE: YYACTIONTYPE = %d;\n", lemp->minReduce); lineno++; i = lemp->minReduce + lemp->nrule; fprintf(out,"//const YY_MAX_REDUCE: YYACTIONTYPE = %d;\n", i-1); lineno++; tplt_xfer(lemp->name,in,out,&lineno); /* Now output the action table and its associates: ** ** yy_action[] A single table containing all actions. ** yy_lookahead[] A table containing the lookahead for each entry in ** yy_action. Used to detect hash collisions. ** yy_shift_ofst[] For each state, the offset into yy_action for ** shifting terminals. ** yy_reduce_ofst[] For each state, the offset into yy_action for ** shifting non-terminals after a reduce. ** yy_default[] Default action for each state. */ /* Output the yy_action table */ lemp->nactiontab = n = acttab_action_size(pActtab); lemp->tablesize += n*szActionType; fprintf(out,"macro_rules! YY_ACTTAB_COUNT {() => {%d}}\n", n); lineno++; fprintf(out,"#[rustfmt::skip]\n"); lineno++; fprintf(out,"#[allow(non_upper_case_globals)]\n"); lineno++; fprintf(out,"static yy_action: [YYACTIONTYPE; %d] = [\n", n); lineno++; for(i=j=0; inoAction; if( j==0 ) fprintf(out," /* %5d */ ", i); fprintf(out, " %4d,", action); if( j==9 || i==n-1 ){ fprintf(out, "\n"); lineno++; j = 0; }else{ j++; } } fprintf(out, "];\n"); lineno++; /* Output the yy_lookahead table */ lemp->nlookaheadtab = n = acttab_lookahead_size(pActtab); lemp->tablesize += n*szCodeType; fprintf(out,"#[rustfmt::skip]\n"); lineno++; fprintf(out,"#[allow(non_upper_case_globals)]\n"); lineno++; nLookAhead = lemp->nterminal + lemp->nactiontab; fprintf(out,"static yy_lookahead: [YYCODETYPE; %d] = [\n", nLookAhead); lineno++; for(i=j=0; insymbol; if( j==0 ) fprintf(out," /* %5d */ ", i); fprintf(out, " %4d,", la); if( j==9 ){ fprintf(out, "\n"); lineno++; j = 0; }else{ j++; } } /* Add extra entries to the end of the yy_lookahead[] table so that ** yy_shift_ofst[]+iToken will always be a valid index into the array, ** even for the largest possible value of yy_shift_ofst[] and iToken. */ while( interminal); if( j==9 ){ fprintf(out, "\n"); lineno++; j = 0; }else{ j++; } i++; } if( j>0 ){ fprintf(out, "\n"); lineno++; } fprintf(out, "];\n"); lineno++; /* Output the yy_shift_ofst[] table */ n = lemp->nxstate; while( n>0 && lemp->sorted[n-1]->iTknOfst==NO_OFFSET ) n--; fprintf(out, "#[allow(non_camel_case_types)]\n"); lineno++; fprintf(out, "type YY_SHIFT_TYPE = %s;\n", minimum_size_type(mnTknOfst, lemp->nterminal+lemp->nactiontab, &sz)); lineno++; fprintf(out, "const YY_SHIFT_COUNT: YYACTIONTYPE = %d;\n", n-1); lineno++; fprintf(out, "//const YY_SHIFT_MIN: YY_SHIFT_TYPE = %d;\n", mnTknOfst); lineno++; fprintf(out, "//const YY_SHIFT_MAX: YY_SHIFT_TYPE = %d;\n", mxTknOfst); lineno++; fprintf(out, "#[rustfmt::skip]\n"); lineno++; fprintf(out, "#[allow(non_upper_case_globals)]\n"); lineno++; fprintf(out, "static yy_shift_ofst: [YY_SHIFT_TYPE; %d] = [\n", n); lineno++; lemp->tablesize += n*sz; for(i=j=0; isorted[i]; ofst = stp->iTknOfst; if( ofst==NO_OFFSET ) ofst = lemp->nactiontab; if( j==0 ) fprintf(out," /* %5d */ ", i); fprintf(out, " %4d,", ofst); if( j==9 || i==n-1 ){ fprintf(out, "\n"); lineno++; j = 0; }else{ j++; } } fprintf(out, "];\n"); lineno++; /* Output the yy_reduce_ofst[] table */ fprintf(out, "#[allow(non_camel_case_types)]\n"); lineno++; fprintf(out, "type YY_REDUCE_TYPE = %s;\n", minimum_size_type(mnNtOfst-1, mxNtOfst, &sz)); lineno++; n = lemp->nxstate; while( n>0 && lemp->sorted[n-1]->iNtOfst==NO_OFFSET ) n--; fprintf(out, "const YY_REDUCE_COUNT: YYACTIONTYPE = %d;\n", n-1); lineno++; fprintf(out, "//const YY_REDUCE_MIN: YY_REDUCE_TYPE = %d;\n", mnNtOfst); lineno++; fprintf(out, "//const YY_REDUCE_MAX: YY_REDUCE_TYPE = %d;\n", mxNtOfst); lineno++; fprintf(out, "#[rustfmt::skip]\n"); lineno++; fprintf(out, "#[allow(non_upper_case_globals)]\n"); lineno++; fprintf(out, "static yy_reduce_ofst: [YY_REDUCE_TYPE; %d] = [\n", n); lineno++; lemp->tablesize += n*sz; for(i=j=0; isorted[i]; ofst = stp->iNtOfst; if( ofst==NO_OFFSET ) ofst = mnNtOfst - 1; if( j==0 ) fprintf(out," /* %5d */ ", i); fprintf(out, " %4d,", ofst); if( j==9 || i==n-1 ){ fprintf(out, "\n"); lineno++; j = 0; }else{ j++; } } fprintf(out, "];\n"); lineno++; /* Output the default action table */ n = lemp->nxstate; fprintf(out, "#[rustfmt::skip]\n"); lineno++; fprintf(out, "#[allow(non_upper_case_globals)]\n"); lineno++; fprintf(out, "static yy_default: [YYACTIONTYPE; %d] = [\n", n); lineno++; lemp->tablesize += n*szActionType; for(i=j=0; isorted[i]; if( j==0 ) fprintf(out," /* %5d */ ", i); if( stp->iDfltReduce<0 ){ fprintf(out, " %4d,", lemp->errAction); }else{ fprintf(out, " %4d,", stp->iDfltReduce + lemp->minReduce); } if( j==9 || i==n-1 ){ fprintf(out, "\n"); lineno++; j = 0; }else{ j++; } } fprintf(out, "];\n"); lineno++; tplt_xfer(lemp->name,in,out,&lineno); /* Generate the table of fallback tokens. */ fprintf(out, "#[allow(non_upper_case_globals)]\n"); lineno++; if( lemp->has_fallback ){ int mx = lemp->nterminal - 1; /* 2019-08-28: Generate fallback entries for every token to avoid ** having to do a range check on the index */ /* while( mx>0 && lemp->symbols[mx]->fallback==0 ){ mx--; } */ fprintf(out, "static yyFallback: [YYCODETYPE; %d] = [\n", mx+1); lineno++; lemp->tablesize += (mx+1)*szCodeType; for(i=0; i<=mx; i++){ struct symbol *p = lemp->symbols[i]; if( p->fallback==0 ){ fprintf(out, " 0, /* %10s => nothing */\n", p->name); }else{ fprintf(out, " %3d, /* %10s => %s */\n", p->fallback->index, p->name, p->fallback->name); } lineno++; } fprintf(out, "];\n"); lineno++; } else { fprintf(out, "static yyFallback: [YYCODETYPE; 0] = [];\n"); lineno++; } tplt_xfer(lemp->name, in, out, &lineno); /* Generate %extra_context field declaration */ if( lemp->ctx && lemp->ctx[0] ){ fprintf(out," pub %s<'input>,\n",lemp->ctx); lineno++; } tplt_xfer(lemp->name, in, out, &lineno); /* Generate a table containing the symbolic name of every symbol */ fprintf(out, "#[rustfmt::skip]\n"); lineno++; fprintf(out, "#[allow(non_upper_case_globals)]\n"); lineno++; fprintf(out, "static yyTokenName: [&str; %d] = [\n", lemp->nsymbol); lineno++; for(i=0; insymbol; i++){ fprintf(out," /* %4d */ \"%s\",\n",i, lemp->symbols[i]->name); lineno++; } if( (i&3)!=0 ){ fprintf(out,"\n"); lineno++; } fprintf(out, "];\n"); lineno++; tplt_xfer(lemp->name,in,out,&lineno); /* Generate a table containing a text string that describes every ** rule in the rule set of the grammar. This information is used ** when tracing REDUCE actions. */ for(i=0, rp=lemp->rule; rp; rp=rp->next, i++){ assert( rp->iRule==i ); fprintf(out," /* %3d */ \"", i); writeRuleText(out, rp); fprintf(out,"\",\n"); lineno++; } tplt_xfer(lemp->name,in,out,&lineno); /* Generate %extra_context parameter declaration */ if( lemp->ctx && lemp->ctx[0] ){ fprintf(out," %s,\n",lemp->ctx); lineno++; } tplt_xfer(lemp->name, in, out, &lineno); /* Generate %extra_context field initialization */ if( lemp->ctx && lemp->ctx[0] ){ char *name = strtok(lemp->ctx,":"); if (name) { fprintf(out," %s,\n",name); lineno++; } } tplt_xfer(lemp->name, in, out, &lineno); /* Generate code which executes whenever the parser stack overflows */ tplt_print(out,lemp,lemp->overflow,&lineno); tplt_xfer(lemp->name,in,out,&lineno); /* Generate the tables of rule information. yyRuleInfoLhs[] and ** yyRuleInfoNRhs[]. ** ** Note: This code depends on the fact that rules are number ** sequentially beginning with 0. */ for(i=0, rp=lemp->rule; rp; rp=rp->next, i++){ fprintf(out," %4d, /* (%d)", rp->lhs->index, i); rule_print(out, rp); fprintf(out," */\n"); lineno++; } tplt_xfer(lemp->name,in,out,&lineno); for(i=0, rp=lemp->rule; rp; rp=rp->next, i++){ fprintf(out," %3d, /* (%d)", -rp->nrhs, i); rule_print(out, rp); fprintf(out," */\n"); lineno++; } tplt_xfer(lemp->name,in,out,&lineno); /* Generate code which execution during each REDUCE action */ i = 0; for(rp=lemp->rule; rp; rp=rp->next){ i += translate_code(lemp, rp); } if( i ){ //fprintf(out," YYMINORTYPE yylhsminor;\n"); lineno++; } /* First output rules other than the default: rule */ for(rp=lemp->rule; rp; rp=rp->next){ struct rule *rp2; /* Other rules with the same action */ if( rp->codeEmitted ) continue; if( rp->noCode ){ /* No C code actions, so this will be part of the "default:" rule */ continue; } fprintf(out," %d /* ", rp->iRule); writeRuleText(out, rp); fprintf(out, " */\n"); lineno++; for(rp2=rp->next; rp2; rp2=rp2->next){ if( rp2->code==rp->code && rp2->codePrefix==rp->codePrefix && rp2->codeSuffix==rp->codeSuffix ){ fprintf(out," | %d /* ", rp2->iRule); writeRuleText(out, rp2); fprintf(out," */\n"); lineno++; rp2->codeEmitted = 1; } } fprintf(out," => {\n"); lineno++; emit_code(out,rp,lemp,&lineno); fprintf(out," }\n"); lineno++; rp->codeEmitted = 1; } /* Finally, output the default: rule. We choose as the default: all ** empty actions. */ fprintf(out," _ => {\n"); lineno++; for(rp=lemp->rule; rp; rp=rp->next){ if( rp->codeEmitted ) continue; assert( rp->noCode ); fprintf(out," /* (%d) ", rp->iRule); writeRuleText(out, rp); if( rp->neverReduce ){ fprintf(out, " (NEVER REDUCES) * debug_assert_ne!(yyruleno, %d); FIXME */\n", rp->iRule); lineno++; }else if( rp->doesReduce ){ fprintf(out, " * debug_assert_eq!(yyruleno, %d); */\n", rp->iRule); lineno++; }else{ fprintf(out, " (OPTIMIZED OUT) */ debug_assert_ne!(yyruleno, %d);\n", rp->iRule); lineno++; } } fprintf(out," }\n"); lineno++; tplt_xfer(lemp->name,in,out,&lineno); /* Generate code which executes if a parse fails */ tplt_print(out,lemp,lemp->failure,&lineno); tplt_xfer(lemp->name,in,out,&lineno); /* Generate code which executes when a syntax error occurs */ tplt_print(out,lemp,lemp->error,&lineno); tplt_xfer(lemp->name,in,out,&lineno); /* Generate code which executes when the parser accepts its input */ tplt_print(out,lemp,lemp->accept,&lineno); tplt_xfer(lemp->name,in,out,&lineno); /* Append any addition code the user desires */ tplt_print(out,lemp,lemp->extracode,&lineno); acttab_free(pActtab); fclose(in); fclose(out); if( sql ) fclose(sql); return; } /* Generate a header file for the parser */ void ReportHeader(struct lemon *lemp) { FILE *out; const char *prefix; int i; if( lemp->tokenprefix ) prefix = lemp->tokenprefix; else prefix = ""; out = file_open(lemp,".h","wb"); if( out ){ fprintf(out,"#[non_exhaustive]\n"); fprintf(out,"#[allow(non_camel_case_types)]\n"); fprintf(out,"#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd)]\n"); fprintf(out,"#[repr(%s)]\n", minimum_size_type(0, lemp->nsymbol+1, 0)); fprintf(out,"pub enum TokenType {\n"); fprintf(out," %sEOF = 0,\n",prefix); for(i=1; interminal; i++){ fprintf(out," %s%s = %d,\n",prefix,lemp->symbols[i]->name,i); } fprintf(out,"}\n"); fclose(out); } return; } /* Reduce the size of the action tables, if possible, by making use ** of defaults. ** ** In this version, we take the most frequent REDUCE action and make ** it the default. Except, there is no default if the wildcard token ** is a possible look-ahead. */ void CompressTables(struct lemon *lemp) { struct state *stp; struct action *ap, *ap2, *nextap; struct rule *rp, *rp2, *rbest; int nbest, n; int i; int usesWildcard; for(i=0; instate; i++){ stp = lemp->sorted[i]; nbest = 0; rbest = 0; usesWildcard = 0; for(ap=stp->ap; ap; ap=ap->next){ if( ap->type==SHIFT && ap->sp==lemp->wildcard ){ usesWildcard = 1; } if( ap->type!=REDUCE ) continue; rp = ap->x.rp; if( rp->lhsStart ) continue; if( rp==rbest ) continue; n = 1; for(ap2=ap->next; ap2; ap2=ap2->next){ if( ap2->type!=REDUCE ) continue; rp2 = ap2->x.rp; if( rp2==rbest ) continue; if( rp2==rp ) n++; } if( n>nbest ){ nbest = n; rbest = rp; } } /* Do not make a default if the number of rules to default ** is not at least 1 or if the wildcard token is a possible ** lookahead. */ if( nbest<1 || usesWildcard ) continue; /* Combine matching REDUCE actions into a single default */ for(ap=stp->ap; ap; ap=ap->next){ if( ap->type==REDUCE && ap->x.rp==rbest ) break; } assert( ap ); ap->sp = Symbol_new("{default}"); for(ap=ap->next; ap; ap=ap->next){ if( ap->type==REDUCE && ap->x.rp==rbest ) ap->type = NOT_USED; } stp->ap = Action_sort(stp->ap); for(ap=stp->ap; ap; ap=ap->next){ if( ap->type==SHIFT ) break; if( ap->type==REDUCE && ap->x.rp!=rbest ) break; } if( ap==0 ){ stp->autoReduce = 1; stp->pDfltReduce = rbest; } } /* Make a second pass over all states and actions. Convert ** every action that is a SHIFT to an autoReduce state into ** a SHIFTREDUCE action. */ for(i=0; instate; i++){ stp = lemp->sorted[i]; for(ap=stp->ap; ap; ap=ap->next){ struct state *pNextState; if( ap->type!=SHIFT ) continue; pNextState = ap->x.stp; if( pNextState->autoReduce && pNextState->pDfltReduce!=0 ){ ap->type = SHIFTREDUCE; ap->x.rp = pNextState->pDfltReduce; } } } /* If a SHIFTREDUCE action specifies a rule that has a single RHS term ** (meaning that the SHIFTREDUCE will land back in the state where it ** started) and if there is no C-code associated with the reduce action, ** then we can go ahead and convert the action to be the same as the ** action for the RHS of the rule. */ for(i=0; instate; i++){ stp = lemp->sorted[i]; for(ap=stp->ap; ap; ap=nextap){ nextap = ap->next; if( ap->type!=SHIFTREDUCE ) continue; rp = ap->x.rp; if( rp->noCode==0 ) continue; if( rp->nrhs!=1 ) continue; #if 1 /* Only apply this optimization to non-terminals. It would be OK to ** apply it to terminal symbols too, but that makes the parser tables ** larger. */ if( ap->sp->indexnterminal ) continue; #endif /* If we reach this point, it means the optimization can be applied */ nextap = ap; for(ap2=stp->ap; ap2 && (ap2==ap || ap2->sp!=rp->lhs); ap2=ap2->next){} assert( ap2!=0 ); ap->spOpt = ap2->sp; ap->type = ap2->type; ap->x = ap2->x; } } } /* ** Compare two states for sorting purposes. The smaller state is the ** one with the most non-terminal actions. If they have the same number ** of non-terminal actions, then the smaller is the one with the most ** token actions. */ static int stateResortCompare(const void *a, const void *b){ const struct state *pA = *(const struct state**)a; const struct state *pB = *(const struct state**)b; int n; n = pB->nNtAct - pA->nNtAct; if( n==0 ){ n = pB->nTknAct - pA->nTknAct; if( n==0 ){ n = pB->statenum - pA->statenum; } } assert( n!=0 ); return n; } /* ** Renumber and resort states so that states with fewer choices ** occur at the end. Except, keep state 0 as the first state. */ void ResortStates(struct lemon *lemp) { int i; struct state *stp; struct action *ap; for(i=0; instate; i++){ stp = lemp->sorted[i]; stp->nTknAct = stp->nNtAct = 0; stp->iDfltReduce = -1; /* Init dflt action to "syntax error" */ stp->iTknOfst = NO_OFFSET; stp->iNtOfst = NO_OFFSET; for(ap=stp->ap; ap; ap=ap->next){ int iAction = compute_action(lemp,ap); if( iAction>=0 ){ if( ap->sp->indexnterminal ){ stp->nTknAct++; }else if( ap->sp->indexnsymbol ){ stp->nNtAct++; }else{ assert( stp->autoReduce==0 || stp->pDfltReduce==ap->x.rp ); stp->iDfltReduce = iAction; } } } } qsort(&lemp->sorted[1], lemp->nstate-1, sizeof(lemp->sorted[0]), stateResortCompare); for(i=0; instate; i++){ lemp->sorted[i]->statenum = i; } lemp->nxstate = lemp->nstate; while( lemp->nxstate>1 && lemp->sorted[lemp->nxstate-1]->autoReduce ){ lemp->nxstate--; } } /***************** From the file "set.c" ************************************/ /* ** Set manipulation routines for the LEMON parser generator. */ static int size = 0; /* Set the set size */ void SetSize(int n) { size = n+1; } /* Allocate a new set */ char *SetNew(void){ char *s; s = (char*)lemon_calloc( size, 1); if( s==0 ){ memory_error(); } return s; } /* Deallocate a set */ void SetFree(char *s) { lemon_free(s); } /* Add a new element to the set. Return TRUE if the element was added ** and FALSE if it was already there. */ int SetAdd(char *s, int e) { int rv; assert( e>=0 && esize = 1024; x1a->count = 0; x1a->tbl = (x1node*)lemon_calloc(1024, sizeof(x1node) + sizeof(x1node*)); if( x1a->tbl==0 ){ lemon_free(x1a); x1a = 0; }else{ int i; x1a->ht = (x1node**)&(x1a->tbl[1024]); for(i=0; i<1024; i++) x1a->ht[i] = 0; } } } /* Insert a new record into the array. Return TRUE if successful. ** Prior data with the same key is NOT overwritten */ int Strsafe_insert(const char *data) { x1node *np; unsigned h; unsigned ph; if( x1a==0 ) return 0; ph = strhash(data); h = ph & (x1a->size-1); np = x1a->ht[h]; while( np ){ if( strcmp(np->data,data)==0 ){ /* An existing entry with the same key is found. */ /* Fail because overwrite is not allows. */ return 0; } np = np->next; } if( x1a->count>=x1a->size ){ /* Need to make the hash table bigger */ int i,arrSize; struct s_x1 array; array.size = arrSize = x1a->size*2; array.count = x1a->count; array.tbl = (x1node*)lemon_calloc(arrSize, sizeof(x1node)+sizeof(x1node*)); if( array.tbl==0 ) return 0; /* Fail due to malloc failure */ array.ht = (x1node**)&(array.tbl[arrSize]); for(i=0; icount; i++){ x1node *oldnp, *newnp; oldnp = &(x1a->tbl[i]); h = strhash(oldnp->data) & (arrSize-1); newnp = &(array.tbl[i]); if( array.ht[h] ) array.ht[h]->from = &(newnp->next); newnp->next = array.ht[h]; newnp->data = oldnp->data; newnp->from = &(array.ht[h]); array.ht[h] = newnp; } /* lemon_free(x1a->tbl); // This program was originally for 16-bit machines. ** Don't worry about freeing memory on modern platforms. */ *x1a = array; } /* Insert the new data */ h = ph & (x1a->size-1); np = &(x1a->tbl[x1a->count++]); np->data = data; if( x1a->ht[h] ) x1a->ht[h]->from = &(np->next); np->next = x1a->ht[h]; x1a->ht[h] = np; np->from = &(x1a->ht[h]); return 1; } /* Return a pointer to data assigned to the given key. Return NULL ** if no such key. */ const char *Strsafe_find(const char *key) { unsigned h; x1node *np; if( x1a==0 ) return 0; h = strhash(key) & (x1a->size-1); np = x1a->ht[h]; while( np ){ if( strcmp(np->data,key)==0 ) break; np = np->next; } return np ? np->data : 0; } /* Return a pointer to the (terminal or nonterminal) symbol "x". ** Create a new symbol if this is the first time "x" has been seen. */ struct symbol *Symbol_new(const char *x) { struct symbol *sp; sp = Symbol_find(x); if( sp==0 ){ sp = (struct symbol *)lemon_calloc(1, sizeof(struct symbol) ); MemoryCheck(sp); sp->name = Strsafe(x); sp->type = ISUPPER(*x) ? TERMINAL : NONTERMINAL; sp->rule = 0; sp->fallback = 0; sp->prec = -1; sp->assoc = UNK; sp->firstset = 0; sp->lambda = LEMON_FALSE; sp->datatype = 0; sp->useCnt = 0; Symbol_insert(sp,sp->name); } sp->useCnt++; return sp; } /* Compare two symbols for sorting purposes. Return negative, ** zero, or positive if a is less then, equal to, or greater ** than b. ** ** Symbols that begin with upper case letters (terminals or tokens) ** must sort before symbols that begin with lower case letters ** (non-terminals). And MULTITERMINAL symbols (created using the ** %token_class directive) must sort at the very end. Other than ** that, the order does not matter. ** ** We find experimentally that leaving the symbols in their original ** order (the order they appeared in the grammar file) gives the ** smallest parser tables in SQLite. */ int Symbolcmpp(const void *_a, const void *_b) { const struct symbol *a = *(const struct symbol **) _a; const struct symbol *b = *(const struct symbol **) _b; int i1 = a->type==MULTITERMINAL ? 3 : a->name[0]>'Z' ? 2 : 1; int i2 = b->type==MULTITERMINAL ? 3 : b->name[0]>'Z' ? 2 : 1; return i1==i2 ? a->index - b->index : i1 - i2; } /* There is one instance of the following structure for each ** associative array of type "x2". */ struct s_x2 { int size; /* The number of available slots. */ /* Must be a power of 2 greater than or */ /* equal to 1 */ int count; /* Number of currently slots filled */ struct s_x2node *tbl; /* The data stored here */ struct s_x2node **ht; /* Hash table for lookups */ }; /* There is one instance of this structure for every data element ** in an associative array of type "x2". */ typedef struct s_x2node { struct symbol *data; /* The data */ const char *key; /* The key */ struct s_x2node *next; /* Next entry with the same hash */ struct s_x2node **from; /* Previous link */ } x2node; /* There is only one instance of the array, which is the following */ static struct s_x2 *x2a; /* Allocate a new associative array */ void Symbol_init(void){ if( x2a ) return; x2a = (struct s_x2*)lemon_malloc( sizeof(struct s_x2) ); if( x2a ){ x2a->size = 128; x2a->count = 0; x2a->tbl = (x2node*)lemon_calloc(128, sizeof(x2node) + sizeof(x2node*)); if( x2a->tbl==0 ){ lemon_free(x2a); x2a = 0; }else{ int i; x2a->ht = (x2node**)&(x2a->tbl[128]); for(i=0; i<128; i++) x2a->ht[i] = 0; } } } /* Insert a new record into the array. Return TRUE if successful. ** Prior data with the same key is NOT overwritten */ int Symbol_insert(struct symbol *data, const char *key) { x2node *np; unsigned h; unsigned ph; if( x2a==0 ) return 0; ph = strhash(key); h = ph & (x2a->size-1); np = x2a->ht[h]; while( np ){ if( strcmp(np->key,key)==0 ){ /* An existing entry with the same key is found. */ /* Fail because overwrite is not allows. */ return 0; } np = np->next; } if( x2a->count>=x2a->size ){ /* Need to make the hash table bigger */ int i,arrSize; struct s_x2 array; array.size = arrSize = x2a->size*2; array.count = x2a->count; array.tbl = (x2node*)lemon_calloc(arrSize, sizeof(x2node)+sizeof(x2node*)); if( array.tbl==0 ) return 0; /* Fail due to malloc failure */ array.ht = (x2node**)&(array.tbl[arrSize]); for(i=0; icount; i++){ x2node *oldnp, *newnp; oldnp = &(x2a->tbl[i]); h = strhash(oldnp->key) & (arrSize-1); newnp = &(array.tbl[i]); if( array.ht[h] ) array.ht[h]->from = &(newnp->next); newnp->next = array.ht[h]; newnp->key = oldnp->key; newnp->data = oldnp->data; newnp->from = &(array.ht[h]); array.ht[h] = newnp; } /* lemon_free(x2a->tbl); // This program was originally written for 16-bit ** machines. Don't worry about freeing this trivial amount of memory ** on modern platforms. Just leak it. */ *x2a = array; } /* Insert the new data */ h = ph & (x2a->size-1); np = &(x2a->tbl[x2a->count++]); np->key = key; np->data = data; if( x2a->ht[h] ) x2a->ht[h]->from = &(np->next); np->next = x2a->ht[h]; x2a->ht[h] = np; np->from = &(x2a->ht[h]); return 1; } /* Return a pointer to data assigned to the given key. Return NULL ** if no such key. */ struct symbol *Symbol_find(const char *key) { unsigned h; x2node *np; if( x2a==0 ) return 0; h = strhash(key) & (x2a->size-1); np = x2a->ht[h]; while( np ){ if( strcmp(np->key,key)==0 ) break; np = np->next; } return np ? np->data : 0; } /* Return the size of the array */ int Symbol_count() { return x2a ? x2a->count : 0; } /* Return an array of pointers to all data in the table. ** The array is obtained from malloc. Return NULL if memory allocation ** problems, or if the array is empty. */ struct symbol **Symbol_arrayof() { struct symbol **array; int i,arrSize; if( x2a==0 ) return 0; arrSize = x2a->count; array = (struct symbol **)lemon_calloc(arrSize, sizeof(struct symbol *)); if( array ){ for(i=0; itbl[i].data; } return array; } /* Compare two configurations */ int Configcmp(const char *_a,const char *_b) { const struct config *a = (struct config *) _a; const struct config *b = (struct config *) _b; int x; x = a->rp->index - b->rp->index; if( x==0 ) x = a->dot - b->dot; return x; } /* Compare two states */ PRIVATE int statecmp(struct config *a, struct config *b) { int rc; for(rc=0; rc==0 && a && b; a=a->bp, b=b->bp){ rc = a->rp->index - b->rp->index; if( rc==0 ) rc = a->dot - b->dot; } if( rc==0 ){ if( a ) rc = 1; if( b ) rc = -1; } return rc; } /* Hash a state */ PRIVATE unsigned statehash(struct config *a) { unsigned h=0; while( a ){ h = h*571 + a->rp->index*37 + a->dot; a = a->bp; } return h; } /* Allocate a new state structure */ struct state *State_new() { struct state *newstate; newstate = (struct state *)lemon_calloc(1, sizeof(struct state) ); MemoryCheck(newstate); return newstate; } /* There is one instance of the following structure for each ** associative array of type "x3". */ struct s_x3 { int size; /* The number of available slots. */ /* Must be a power of 2 greater than or */ /* equal to 1 */ int count; /* Number of currently slots filled */ struct s_x3node *tbl; /* The data stored here */ struct s_x3node **ht; /* Hash table for lookups */ }; /* There is one instance of this structure for every data element ** in an associative array of type "x3". */ typedef struct s_x3node { struct state *data; /* The data */ struct config *key; /* The key */ struct s_x3node *next; /* Next entry with the same hash */ struct s_x3node **from; /* Previous link */ } x3node; /* There is only one instance of the array, which is the following */ static struct s_x3 *x3a; /* Allocate a new associative array */ void State_init(void){ if( x3a ) return; x3a = (struct s_x3*)lemon_malloc( sizeof(struct s_x3) ); if( x3a ){ x3a->size = 128; x3a->count = 0; x3a->tbl = (x3node*)lemon_calloc(128, sizeof(x3node) + sizeof(x3node*)); if( x3a->tbl==0 ){ lemon_free(x3a); x3a = 0; }else{ int i; x3a->ht = (x3node**)&(x3a->tbl[128]); for(i=0; i<128; i++) x3a->ht[i] = 0; } } } /* Insert a new record into the array. Return TRUE if successful. ** Prior data with the same key is NOT overwritten */ int State_insert(struct state *data, struct config *key) { x3node *np; unsigned h; unsigned ph; if( x3a==0 ) return 0; ph = statehash(key); h = ph & (x3a->size-1); np = x3a->ht[h]; while( np ){ if( statecmp(np->key,key)==0 ){ /* An existing entry with the same key is found. */ /* Fail because overwrite is not allows. */ return 0; } np = np->next; } if( x3a->count>=x3a->size ){ /* Need to make the hash table bigger */ int i,arrSize; struct s_x3 array; array.size = arrSize = x3a->size*2; array.count = x3a->count; array.tbl = (x3node*)lemon_calloc(arrSize, sizeof(x3node)+sizeof(x3node*)); if( array.tbl==0 ) return 0; /* Fail due to malloc failure */ array.ht = (x3node**)&(array.tbl[arrSize]); for(i=0; icount; i++){ x3node *oldnp, *newnp; oldnp = &(x3a->tbl[i]); h = statehash(oldnp->key) & (arrSize-1); newnp = &(array.tbl[i]); if( array.ht[h] ) array.ht[h]->from = &(newnp->next); newnp->next = array.ht[h]; newnp->key = oldnp->key; newnp->data = oldnp->data; newnp->from = &(array.ht[h]); array.ht[h] = newnp; } lemon_free(x3a->tbl); *x3a = array; } /* Insert the new data */ h = ph & (x3a->size-1); np = &(x3a->tbl[x3a->count++]); np->key = key; np->data = data; if( x3a->ht[h] ) x3a->ht[h]->from = &(np->next); np->next = x3a->ht[h]; x3a->ht[h] = np; np->from = &(x3a->ht[h]); return 1; } /* Return a pointer to data assigned to the given key. Return NULL ** if no such key. */ struct state *State_find(struct config *key) { unsigned h; x3node *np; if( x3a==0 ) return 0; h = statehash(key) & (x3a->size-1); np = x3a->ht[h]; while( np ){ if( statecmp(np->key,key)==0 ) break; np = np->next; } return np ? np->data : 0; } /* Return an array of pointers to all data in the table. ** The array is obtained from malloc. Return NULL if memory allocation ** problems, or if the array is empty. */ struct state **State_arrayof(void) { struct state **array; int i,arrSize; if( x3a==0 ) return 0; arrSize = x3a->count; array = (struct state **)lemon_calloc(arrSize, sizeof(struct state *)); if( array ){ for(i=0; itbl[i].data; } return array; } /* Hash a configuration */ PRIVATE unsigned confighash(struct config *a) { unsigned h=0; h = h*571 + a->rp->index*37 + a->dot; return h; } /* There is one instance of the following structure for each ** associative array of type "x4". */ struct s_x4 { int size; /* The number of available slots. */ /* Must be a power of 2 greater than or */ /* equal to 1 */ int count; /* Number of currently slots filled */ struct s_x4node *tbl; /* The data stored here */ struct s_x4node **ht; /* Hash table for lookups */ }; /* There is one instance of this structure for every data element ** in an associative array of type "x4". */ typedef struct s_x4node { struct config *data; /* The data */ struct s_x4node *next; /* Next entry with the same hash */ struct s_x4node **from; /* Previous link */ } x4node; /* There is only one instance of the array, which is the following */ static struct s_x4 *x4a; /* Allocate a new associative array */ void Configtable_init(void){ if( x4a ) return; x4a = (struct s_x4*)lemon_malloc( sizeof(struct s_x4) ); if( x4a ){ x4a->size = 64; x4a->count = 0; x4a->tbl = (x4node*)lemon_calloc(64, sizeof(x4node) + sizeof(x4node*)); if( x4a->tbl==0 ){ lemon_free(x4a); x4a = 0; }else{ int i; x4a->ht = (x4node**)&(x4a->tbl[64]); for(i=0; i<64; i++) x4a->ht[i] = 0; } } } /* Insert a new record into the array. Return TRUE if successful. ** Prior data with the same key is NOT overwritten */ int Configtable_insert(struct config *data) { x4node *np; unsigned h; unsigned ph; if( x4a==0 ) return 0; ph = confighash(data); h = ph & (x4a->size-1); np = x4a->ht[h]; while( np ){ if( Configcmp((const char *) np->data,(const char *) data)==0 ){ /* An existing entry with the same key is found. */ /* Fail because overwrite is not allows. */ return 0; } np = np->next; } if( x4a->count>=x4a->size ){ /* Need to make the hash table bigger */ int i,arrSize; struct s_x4 array; array.size = arrSize = x4a->size*2; array.count = x4a->count; array.tbl = (x4node*)lemon_calloc(arrSize, sizeof(x4node) + sizeof(x4node*)); if( array.tbl==0 ) return 0; /* Fail due to malloc failure */ array.ht = (x4node**)&(array.tbl[arrSize]); for(i=0; icount; i++){ x4node *oldnp, *newnp; oldnp = &(x4a->tbl[i]); h = confighash(oldnp->data) & (arrSize-1); newnp = &(array.tbl[i]); if( array.ht[h] ) array.ht[h]->from = &(newnp->next); newnp->next = array.ht[h]; newnp->data = oldnp->data; newnp->from = &(array.ht[h]); array.ht[h] = newnp; } *x4a = array; } /* Insert the new data */ h = ph & (x4a->size-1); np = &(x4a->tbl[x4a->count++]); np->data = data; if( x4a->ht[h] ) x4a->ht[h]->from = &(np->next); np->next = x4a->ht[h]; x4a->ht[h] = np; np->from = &(x4a->ht[h]); return 1; } /* Return a pointer to data assigned to the given key. Return NULL ** if no such key. */ struct config *Configtable_find(struct config *key) { int h; x4node *np; if( x4a==0 ) return 0; h = confighash(key) & (x4a->size-1); np = x4a->ht[h]; while( np ){ if( Configcmp((const char *) np->data,(const char *) key)==0 ) break; np = np->next; } return np ? np->data : 0; } /* Remove all data from the table. Pass each data to the function "f" ** as it is removed. ("f" may be null to avoid this step.) */ void Configtable_clear(int(*f)(struct config *)) { int i; if( x4a==0 || x4a->count==0 ) return; if( f ) for(i=0; icount; i++) (*f)(x4a->tbl[i].data); for(i=0; isize; i++) x4a->ht[i] = 0; x4a->count = 0; return; }