/* ### * IP: GHIDRA * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ %{ #include "grammar.hh" extern int yylex(void); extern int yyerror(const char *str); static CParse *parse; extern int yydebug; %} %union { uint4 flags; TypeDeclarator *dec; vector *declist; TypeSpecifiers *spec; vector *ptrspec; Datatype *type; Enumerator *enumer; vector *vecenum; string *str; uintb *i; } // Grammar taken from ISO/IEC 9899 %token DOTDOTDOT BADTOKEN STRUCT UNION ENUM DECLARATION_RESULT PARAM_RESULT %token NUMBER %token IDENTIFIER %token STORAGE_CLASS_SPECIFIER TYPE_QUALIFIER FUNCTION_SPECIFIER %token TYPE_NAME %type declaration init_declarator_list parameter_list parameter_type_list %type struct_declaration_list struct_declaration struct_declarator_list %type declarator init_declarator direct_declarator parameter_declaration %type abstract_declarator direct_abstract_declarator struct_declarator %type declaration_specifiers specifier_qualifier_list %type type_qualifier_list %type pointer %type assignment_expression %type type_specifier struct_or_union_specifier enum_specifier %type enumerator %type enumerator_list %% document: DECLARATION_RESULT declaration { parse->setResultDeclarations($2); } | PARAM_RESULT parameter_declaration { vector *res = parse->newVecDeclarator(); res->push_back($2); parse->setResultDeclarations(res); } ; declaration: declaration_specifiers ';' { $$ = parse->mergeSpecDecVec($1); } | declaration_specifiers init_declarator_list ';' { $$ = parse->mergeSpecDecVec($1,$2); } ; declaration_specifiers: STORAGE_CLASS_SPECIFIER { $$ = parse->newSpecifier(); parse->addSpecifier($$,$1); } | type_specifier { $$ = parse->newSpecifier(); parse->addTypeSpecifier($$,$1); } | TYPE_QUALIFIER { $$ = parse->newSpecifier(); parse->addSpecifier($$,$1); } | FUNCTION_SPECIFIER { $$ = parse->newSpecifier(); parse->addFuncSpecifier($$,$1); } | STORAGE_CLASS_SPECIFIER declaration_specifiers { $$ = parse->addSpecifier($2,$1); } | type_specifier declaration_specifiers { $$ = parse->addTypeSpecifier($2,$1); } | TYPE_QUALIFIER declaration_specifiers { $$ = parse->addSpecifier($2,$1); } | FUNCTION_SPECIFIER declaration_specifiers { $$ = parse->addFuncSpecifier($2,$1); } ; init_declarator_list: init_declarator { $$ = parse->newVecDeclarator(); $$->push_back($1); } | init_declarator_list ',' init_declarator { $$ = $1; $$->push_back($3); } ; init_declarator: declarator { $$ = $1; } //declarator = initializer ; type_specifier: TYPE_NAME { $$ = $1; } | struct_or_union_specifier { $$ = $1; } | enum_specifier { $$ = $1; } ; struct_or_union_specifier: STRUCT '{' struct_declaration_list '}' { $$ = parse->newStruct("",$3); } | STRUCT IDENTIFIER '{' struct_declaration_list '}' { $$ = parse->newStruct(*$2,$4); } | STRUCT IDENTIFIER { $$ = parse->oldStruct(*$2); } | UNION '{' struct_declaration_list '}' { $$ = parse->newUnion("",$3); } | UNION IDENTIFIER '{' struct_declaration_list '}' { $$ = parse->newUnion(*$2,$4); } | UNION IDENTIFIER { $$ = parse->oldUnion(*$2); } ; struct_declaration_list: struct_declaration { $$ = $1; } | struct_declaration_list struct_declaration { $$ = $1; $$->insert($$->end(),$2->begin(),$2->end()); } ; struct_declaration: specifier_qualifier_list struct_declarator_list ';' { $$ = parse->mergeSpecDecVec($1,$2); } ; specifier_qualifier_list: type_specifier { $$ = parse->newSpecifier(); parse->addTypeSpecifier($$,$1); } | type_specifier specifier_qualifier_list { $$ = parse->addTypeSpecifier($2,$1); } | TYPE_QUALIFIER { $$ = parse->newSpecifier(); parse->addSpecifier($$,$1); } | TYPE_QUALIFIER specifier_qualifier_list { $$ = parse->addSpecifier($2,$1); } ; struct_declarator_list: struct_declarator { $$ = parse->newVecDeclarator(); $$->push_back($1); } | struct_declarator_list ',' struct_declarator { $$ = $1; $$->push_back($3); } ; struct_declarator: declarator { $$ = $1; } // declarator ':' NUMBER ; enum_specifier: ENUM IDENTIFIER '{' enumerator_list '}' { $$ = parse->newEnum(*$2,$4); } | ENUM '{' enumerator_list '}' { $$ = parse->newEnum("",$3); } | ENUM IDENTIFIER '{' enumerator_list ',' '}' { $$ = parse->newEnum(*$2,$4); } | ENUM '{' enumerator_list ',' '}' { $$ = parse->newEnum("",$3); } | ENUM IDENTIFIER { $$ = parse->oldEnum(*$2); } ; enumerator_list: enumerator { $$ = parse->newVecEnumerator(); $$->push_back($1); } | enumerator_list ',' enumerator { $$ = $1; $$->push_back($3); } ; enumerator: IDENTIFIER { $$ = parse->newEnumerator(*$1); } | IDENTIFIER '=' NUMBER { $$ = parse->newEnumerator(*$1,*$3); } ; declarator: direct_declarator { $$ = $1; } | pointer direct_declarator { $$ = parse->mergePointer($1,$2); } ; direct_declarator: IDENTIFIER { $$ = parse->newDeclarator($1); } | '(' declarator ')' { $$ = $2; } | direct_declarator '[' type_qualifier_list assignment_expression ']' { $$ = parse->newArray($1,$3,$4); } | direct_declarator '[' assignment_expression ']' { $$ = parse->newArray($1,0,$3); } // direct_declarator '[' ']' | direct_declarator '(' parameter_type_list ')' { $$ = parse->newFunc($1,$3); } // direct_declarator ( identifier_list ) ; pointer: '*' { $$ = parse->newPointer(); $$->push_back(0); } | '*' type_qualifier_list { $$ = parse->newPointer(); $$->push_back($2); } | '*' pointer { $$ = $2; $$->push_back(0); } | '*' type_qualifier_list pointer { $$ = $3; $$->push_back($2); } ; type_qualifier_list: TYPE_QUALIFIER { $$ = parse->convertFlag($1); } | type_qualifier_list TYPE_QUALIFIER { $$ = $1; $$ |= parse->convertFlag($2); } ; parameter_type_list: parameter_list { $$ = $1; } | parameter_list ',' DOTDOTDOT { $$ = $1; $$->push_back((TypeDeclarator *)0); } ; parameter_list: parameter_declaration { $$ = parse->newVecDeclarator(); $$->push_back($1); } | parameter_list ',' parameter_declaration { $$ = $1; $$->push_back($3); } ; parameter_declaration: declaration_specifiers declarator { $$ = parse->mergeSpecDec($1,$2); } | declaration_specifiers { $$ = parse->mergeSpecDec($1); } | declaration_specifiers abstract_declarator { $$ = parse->mergeSpecDec($1,$2); } ; abstract_declarator: pointer { $$ = parse->newDeclarator(); parse->mergePointer($1,$$); } | direct_abstract_declarator { $$ = $1; } | pointer direct_abstract_declarator { $$ = parse->mergePointer($1,$2); } ; direct_abstract_declarator: '(' abstract_declarator ')' { $$ = $2; } // '[' assignment_expression ']' | direct_abstract_declarator '[' assignment_expression ']' { $$ = parse->newArray($1,0,$3); } // '(' parameter_type_list ')' | direct_abstract_declarator '(' parameter_type_list ')' { $$ = parse->newFunc($1,$3); } ; assignment_expression: NUMBER { $$ = $1; } ; %% void GrammarToken::set(uint4 tp) { type = tp; } void GrammarToken::set(uint4 tp,char *ptr,int4 len) { type = tp; switch(tp) { case integer: { string charstring(ptr,len); istringstream s(charstring); s.unsetf(ios::dec | ios::hex | ios::oct); intb val; s >> val; value.integer = (uintb)val; } break; case identifier: case stringval: value.stringval = new string(ptr,len); break; case charconstant: if (len==1) value.integer = (uintb)*ptr; else { // Backslash switch(ptr[1]) { case 'n': value.integer = 10; break; case '0': value.integer = 0; break; case 'a': value.integer = 7; break; case 'b': value.integer = 8; break; case 't': value.integer = 9; break; case 'v': value.integer = 11; break; case 'f': value.integer = 12; break; case 'r': value.integer = 13; break; default: value.integer = (uintb)ptr[1]; break; } } break; default: throw LowlevelError("Bad internal grammar token set"); } } GrammarToken::GrammarToken(void) { type = 0; value.integer = 0; } GrammarLexer::GrammarLexer(int4 maxbuffer) { buffersize = maxbuffer; buffer = new char[ maxbuffer ]; bufstart = 0; bufend = 0; curlineno = 0; state = start; in = (istream *)0; endoffile = true; } GrammarLexer::~GrammarLexer(void) { delete [] buffer; } void GrammarLexer::bumpLine(void) { // Keep track of a newline curlineno += 1; bufstart = 0; bufend = 0; } uint4 GrammarLexer::moveState(char lookahead) { // Change finite state machine based on lookahead uint4 res; bool newline = false; if (lookahead<32) { if ((lookahead == 9)||(lookahead==11)||(lookahead==12)|| (lookahead==13)) lookahead = ' '; else if (lookahead == '\n') { newline = true; lookahead = ' '; } else { setError("Illegal character"); return GrammarToken::badtoken; } } else if (lookahead >= 127) { setError("Illegal character"); return GrammarToken::badtoken; } res = 0; bool syntaxerror = false; switch(state) { case start: switch(lookahead) { case '/': state = slash; break; case '.': state = dot1; break; case '*': case ',': case '(': case ')': case '[': case ']': case '{': case '}': case ';': case '=': state = punctuation; bufstart = bufend-1; break; case '-': case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': state = number; bufstart = bufend-1; break; case ' ': break; // Ignore since we are already open case '\"': state = doublequote; bufstart = bufend-1; break; case '\'': state = singlequote; break; case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u': case 'v': case 'w': case 'x': case 'y': case 'z': case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': case 'Y': case 'Z': case '_': state = identifier; bufstart = bufend-1; break; default: setError("Illegal character"); return GrammarToken::badtoken; } break; case slash: if (lookahead=='*') state = c_comment; else if (lookahead == '/') state = endofline_comment; else syntaxerror = true; break; case dot1: if (lookahead=='.') state = dot2; else syntaxerror = true; break; case dot2: if (lookahead=='.') state = dot3; else syntaxerror = true; break; case dot3: state = start; res = GrammarToken::dotdotdot; break; case punctuation: state = start; res = (uint4)buffer[bufstart]; break; case endofline_comment: if (newline) state = start; break; // Anything else is part of comment case c_comment: if (lookahead == '/') { if ((bufend >1)&&(buffer[bufend-2]=='*')) state = start; } break; // Anything else is part of comment case doublequote: if (lookahead == '\"') state = doublequoteend; break; // Anything else is part of string case doublequoteend: state = start; res = GrammarToken::stringval; break; case singlequote: if (lookahead == '\\') state = singlebackslash; else if (lookahead == '\'') state = singlequoteend; break; // Anything else is part of string case singlequoteend: state = start; res = GrammarToken::charconstant; break; case singlebackslash: // Seen backslash in a single quoted string state = singlequote; break; case number: if (lookahead=='x') { if (((bufend-bufstart)!=2)||(buffer[bufstart]!='0')) syntaxerror = true; // x only allowed as 0x hex indicator } else if ((lookahead>='0')&&(lookahead<='9')) { } else if ((lookahead>='A')&&(lookahead<='Z')) { } else if ((lookahead>='a')&&(lookahead<='z')) { } else if (lookahead == '_') { } else { state = start; res = GrammarToken::integer; } break; case identifier: if ((lookahead>='0')&&(lookahead<='9')) { } else if ((lookahead>='A')&&(lookahead<='Z')) { } else if ((lookahead>='a')&&(lookahead<='z')) { } else if (lookahead == '_' || lookahead == ':') { } else { state = start; res = GrammarToken::identifier; } break; } if (syntaxerror) { setError("Syntax error"); return GrammarToken::badtoken; } if (newline) bumpLine(); return res; } void GrammarLexer::establishToken(GrammarToken &token,uint4 val) { if (val < GrammarToken::integer) token.set(val); else { token.set(val,buffer+bufstart,(bufend-bufstart)-1); } token.setPosition(filestack.back(),curlineno,bufstart); } void GrammarLexer::clear(void) { // Clear lexer for a brand new parse filenamemap.clear(); streammap.clear(); filestack.clear(); bufstart = 0; bufend = 0; curlineno = 0; state = start; in = (istream *)0; endoffile = true; error.clear(); } void GrammarLexer::writeLocation(ostream &s,int4 line,int4 filenum) { s << " at line " << dec << line; s << " in " << filenamemap[filenum]; } void GrammarLexer::writeTokenLocation(ostream &s,int4 line,int4 colno) { if (line!=curlineno) return; // Does line match current line in buffer for(int4 i=0;i= buffersize) { setError("Line too long"); tok = GrammarToken::badtoken; break; } in->get(nextchar); if (!(*in)) { endoffile = true; break; } buffer[bufend++] = nextchar; } else nextchar = buffer[bufend-1]; // Get old lookahead token tok = moveState(nextchar); firsttimethru = false; } while(tok == 0); if (endoffile) { buffer[bufend++] = ' '; // Simulate a space tok = moveState(' '); // to let the final token resolve if ((tok==0)&&(state != start)&&(state != endofline_comment)) { setError("Incomplete token"); tok = GrammarToken::badtoken; } } establishToken(token,tok); } Datatype *PointerModifier::modType(Datatype *base,const TypeDeclarator *decl,Architecture *glb) const { int4 addrsize = glb->getDefaultDataSpace()->getAddrSize(); Datatype *restype; restype = glb->types->getTypePointer(addrsize,base,glb->getDefaultDataSpace()->getWordSize()); return restype; } Datatype *ArrayModifier::modType(Datatype *base,const TypeDeclarator *decl,Architecture *glb) const { Datatype *restype = glb->types->getTypeArray(arraysize,base); return restype; } FunctionModifier::FunctionModifier(const vector *p,bool dtdtdt) { paramlist = *p; if (paramlist.size()==1) { TypeDeclarator *decl = paramlist[0]; if (decl->numModifiers()==0) { // Check for void as an inputtype Datatype *ct = decl->getBaseType(); if ((ct != (Datatype *)0)&&(ct->getMetatype()==TYPE_VOID)) paramlist.clear(); } } dotdotdot = dtdtdt; } void FunctionModifier::getInTypes(vector &intypes,Architecture *glb) const { for(uint4 i=0;ibuildType(glb); intypes.push_back( ct ); } } void FunctionModifier::getInNames(vector &innames) const { for(uint4 i=0;igetIdentifier()); } bool FunctionModifier::isValid(void) const { for(uint4 i=0;iisValid()) return false; if (decl->numModifiers()==0) { Datatype *ct = decl->getBaseType(); if ((ct != (Datatype *)0)&&(ct->getMetatype()==TYPE_VOID)) return false; // Extra void type } } return true; } Datatype *FunctionModifier::modType(Datatype *base,const TypeDeclarator *decl,Architecture *glb) const { vector intypes; // Varargs is encoded as extra null pointer in paramlist bool dotdotdot = false; if ((!paramlist.empty())&&(paramlist.back() == (TypeDeclarator *)0)) { dotdotdot = true; } getInTypes(intypes,glb); ProtoModel *protomodel = decl->getModel(glb); return glb->types->getTypeCode(protomodel,base,intypes,dotdotdot); } TypeDeclarator::~TypeDeclarator(void) { for(uint4 i=0;i::const_iterator iter; iter = mods.end(); while(iter != mods.begin()) { --iter; restype = (*iter)->modType(restype,this,glb); } return restype; } ProtoModel *TypeDeclarator::getModel(Architecture *glb) const { // Get prototype model ProtoModel *protomodel = (ProtoModel *)0; if (model.size()!=0) protomodel = glb->getModel(model); if (protomodel == (ProtoModel *)0) protomodel = glb->defaultfp; return protomodel; } bool TypeDeclarator::getPrototype(PrototypePieces &pieces,Architecture *glb) const { TypeModifier *mod = (TypeModifier *)0; if (mods.size() > 0) mod = mods[0]; if ((mod == (TypeModifier *)0)||(mod->getType()!=TypeModifier::function_mod)) return false; FunctionModifier *fmod = (FunctionModifier *)mod; pieces.model = getModel(glb); pieces.name = ident; pieces.intypes.clear(); fmod->getInTypes(pieces.intypes,glb); pieces.innames.clear(); fmod->getInNames(pieces.innames); pieces.dotdotdot = fmod->isDotdotdot(); // Construct the output type pieces.outtype = basetype; vector::const_iterator iter; iter = mods.end(); --iter; // At least one modification while(iter != mods.begin()) { // Do not apply function modifier pieces.outtype = (*iter)->modType(pieces.outtype,this,glb); --iter; } return true; } bool TypeDeclarator::isValid(void) const { if (basetype == (Datatype *)0) return false; // No basetype int4 count=0; if ((flags & CParse::f_typedef)!=0) count += 1; if ((flags & CParse::f_extern)!=0) count += 1; if ((flags & CParse::f_static)!=0) count += 1; if ((flags & CParse::f_auto)!=0) count += 1; if ((flags & CParse::f_register)!=0) count += 1; if (count > 1) throw ParseError("Multiple storage specifiers"); count = 0; if ((flags & CParse::f_const)!=0) count += 1; if ((flags & CParse::f_restrict)!=0) count += 1; if ((flags & CParse::f_volatile)!=0) count += 1; if (count > 1) throw ParseError("Multiple type qualifiers"); for(uint4 i=0;iisValid()) return false; } return true; } CParse::CParse(Architecture *g,int4 maxbuf) : lexer(maxbuf) { glb = g; firsttoken = -1; lastdecls = (vector *)0; keywords["typedef"] = f_typedef; keywords["extern"] = f_extern; keywords["static"] = f_static; keywords["auto"] = f_auto; keywords["register"] = f_register; keywords["const"] = f_const; keywords["restrict"] = f_restrict; keywords["volatile"] = f_volatile; keywords["inline"] = f_inline; keywords["struct"] = f_struct; keywords["union"] = f_union; keywords["enum"] = f_enum; } CParse::~CParse(void) { clearAllocation(); } void CParse::clear(void) { clearAllocation(); lasterror.clear(); lastdecls = (vector *)0; lexer.clear(); firsttoken = -1; } TypeDeclarator *CParse::mergeSpecDec(TypeSpecifiers *spec,TypeDeclarator *dec) { dec->basetype = spec->type_specifier; dec->model = spec->function_specifier; dec->flags |= spec->flags; return dec; } TypeDeclarator *CParse::mergeSpecDec(TypeSpecifiers *spec) { TypeDeclarator *dec = new TypeDeclarator(); typedec_alloc.push_back(dec); return mergeSpecDec(spec,dec); } vector *CParse::mergeSpecDecVec(TypeSpecifiers *spec,vector *declist) { for(uint4 i=0;isize();++i) mergeSpecDec(spec,(*declist)[i]); return declist; } vector *CParse::mergeSpecDecVec(TypeSpecifiers *spec) { vector *declist; declist = new vector(); vecdec_alloc.push_back(declist); TypeDeclarator *dec = new TypeDeclarator(); typedec_alloc.push_back(dec); declist->push_back( dec ); return mergeSpecDecVec(spec,declist); } uint4 CParse::convertFlag(string *str) { map::const_iterator iter; iter = keywords.find(*str); if (iter != keywords.end()) return (*iter).second; setError("Unknown qualifier"); return 0; } TypeSpecifiers *CParse::addSpecifier(TypeSpecifiers *spec,string *str) { uint4 flag = convertFlag(str); spec->flags |= flag; return spec; } TypeSpecifiers *CParse::addTypeSpecifier(TypeSpecifiers *spec,Datatype *tp) { if (spec->type_specifier!=(Datatype *)0) setError("Multiple type specifiers"); spec->type_specifier = tp; return spec; } TypeSpecifiers *CParse::addFuncSpecifier(TypeSpecifiers *spec,string *str) { map::const_iterator iter; iter = keywords.find(*str); if (iter != keywords.end()) spec->flags |= (*iter).second; // A reserved specifier else { if (spec->function_specifier.size()!=0) setError("Multiple parameter models"); spec->function_specifier = *str; } return spec; } TypeDeclarator *CParse::mergePointer(vector *ptr,TypeDeclarator *dec) { for(uint4 i=0;isize();++i) { PointerModifier *newmod = new PointerModifier((*ptr)[i]); dec->mods.push_back(newmod); } return dec; } TypeDeclarator *CParse::newDeclarator(string *str) { TypeDeclarator *res = new TypeDeclarator(*str); typedec_alloc.push_back(res); return res; } TypeDeclarator *CParse::newDeclarator(void) { TypeDeclarator *res = new TypeDeclarator(); typedec_alloc.push_back(res); return res; } TypeSpecifiers *CParse::newSpecifier(void) { TypeSpecifiers *spec = new TypeSpecifiers(); typespec_alloc.push_back(spec); return spec; } vector *CParse::newVecDeclarator(void) { vector *res = new vector(); vecdec_alloc.push_back(res); return res; } vector *CParse::newPointer(void) { vector *res = new vector(); vecuint4_alloc.push_back(res); return res; } TypeDeclarator *CParse::newArray(TypeDeclarator *dec,uint4 flags,uintb *num) { ArrayModifier *newmod = new ArrayModifier(flags,(int4)*num); dec->mods.push_back(newmod); return dec; } TypeDeclarator *CParse::newFunc(TypeDeclarator *dec,vector *declist) { bool dotdotdot = false; if (!declist->empty()) { if (declist->back() == (TypeDeclarator *)0) { dotdotdot = true; declist->pop_back(); } } FunctionModifier *newmod = new FunctionModifier(declist,dotdotdot); dec->mods.push_back(newmod); return dec; } Datatype *CParse::newStruct(const string &ident,vector *declist) { // Build a new structure TypeStruct *res = glb->types->getTypeStruct(ident); // Create stub (for recursion) vector sublist; for(uint4 i=0;isize();++i) { TypeDeclarator *decl = (*declist)[i]; if (!decl->isValid()) { setError("Invalid structure declarator"); glb->types->destroyType(res); return (Datatype *)0; } sublist.push_back(TypeField()); sublist.back().type = decl->buildType(glb); sublist.back().name = decl->getIdentifier(); sublist.back().offset = -1; // Let typegrp figure out offset } if (!glb->types->setFields(sublist,res,-1,0)) { setError("Bad structure definition"); glb->types->destroyType(res); return (Datatype *)0; } return res; } Datatype *CParse::oldStruct(const string &ident) { Datatype *res = glb->types->findByName(ident); if ((res==(Datatype *)0)||(res->getMetatype() != TYPE_STRUCT)) setError("Identifier does not represent a struct as required"); return res; } Datatype *CParse::newUnion(const string &ident,vector *declist) { setError("Unions are currently unsupported"); return (Datatype *)0; } Datatype *CParse::oldUnion(const string &ident) { setError("Unions are currently unsupported"); return (Datatype *)0; } Enumerator *CParse::newEnumerator(const string &ident) { Enumerator *res = new Enumerator(ident); enum_alloc.push_back(res); return res; } Enumerator *CParse::newEnumerator(const string &ident,uintb val) { Enumerator *res = new Enumerator(ident,val); enum_alloc.push_back(res); return res; } vector *CParse::newVecEnumerator(void) { vector *res = new vector(); vecenum_alloc.push_back(res); return res; } Datatype *CParse::newEnum(const string &ident,vector *vecenum) { TypeEnum *res = glb->types->getTypeEnum(ident); vector namelist; vector vallist; vector assignlist; for(uint4 i=0;isize();++i) { Enumerator *enumer = (*vecenum)[i]; namelist.push_back(enumer->enumconstant); vallist.push_back(enumer->value); assignlist.push_back(enumer->constantassigned); } if (!glb->types->setEnumValues(namelist,vallist,assignlist,res)) { setError("Bad enumeration values"); glb->types->destroyType(res); return (Datatype *)0; } return res; } Datatype *CParse::oldEnum(const string &ident) { Datatype *res = glb->types->findByName(ident); if ((res==(Datatype *)0)||(!res->isEnumType())) setError("Identifier does not represent an enum as required"); return res; } void CParse::clearAllocation(void) { list::iterator iter1; for(iter1=typedec_alloc.begin();iter1!=typedec_alloc.end();++iter1) delete *iter1; typedec_alloc.clear(); list::iterator iter2; for(iter2=typespec_alloc.begin();iter2!=typespec_alloc.end();++iter2) delete *iter2; typespec_alloc.clear(); list *>::iterator iter3; for(iter3=vecuint4_alloc.begin();iter3!=vecuint4_alloc.end();++iter3) delete *iter3; vecuint4_alloc.clear(); list *>::iterator iter4; for(iter4=vecdec_alloc.begin();iter4!=vecdec_alloc.end();++iter4) delete *iter4; vecdec_alloc.clear(); list::iterator iter5; for(iter5=string_alloc.begin();iter5!=string_alloc.end();++iter5) delete *iter5; string_alloc.clear(); list::iterator iter6; for(iter6=num_alloc.begin();iter6!=num_alloc.end();++iter6) delete *iter6; num_alloc.clear(); list::iterator iter7; for(iter7=enum_alloc.begin();iter7!=enum_alloc.end();++iter7) delete *iter7; enum_alloc.clear(); list *>::iterator iter8; for(iter8=vecenum_alloc.begin();iter8!=vecenum_alloc.end();++iter8) delete *iter8; vecenum_alloc.clear(); } int4 CParse::lookupIdentifier(const string &nm) { map::const_iterator iter = keywords.find(nm); if (iter != keywords.end()) { switch( (*iter).second ) { case f_typedef: case f_extern: case f_static: case f_auto: case f_register: return STORAGE_CLASS_SPECIFIER; case f_const: case f_restrict: case f_volatile: return TYPE_QUALIFIER; case f_inline: return FUNCTION_SPECIFIER; case f_struct: return STRUCT; case f_union: return UNION; case f_enum: return ENUM; default: break; } } Datatype *tp = glb->types->findByName(nm); if (tp != (Datatype *)0) { yylval.type = tp; return TYPE_NAME; } if (glb->hasModel(nm)) return FUNCTION_SPECIFIER; return IDENTIFIER; // Unknown identifier } int4 CParse::lex(void) { GrammarToken tok; if (firsttoken != -1) { int4 retval = firsttoken; firsttoken = -1; return retval; } if (lasterror.size()!=0) return BADTOKEN; lexer.getNextToken(tok); lineno = tok.getLineNo(); colno = tok.getColNo(); filenum = tok.getFileNum(); switch(tok.getType()) { case GrammarToken::integer: case GrammarToken::charconstant: yylval.i = new uintb(tok.getInteger()); num_alloc.push_back(yylval.i); return NUMBER; case GrammarToken::identifier: yylval.str = tok.getString(); string_alloc.push_back(yylval.str); return lookupIdentifier(*yylval.str); case GrammarToken::stringval: delete tok.getString(); setError("Illegal string constant"); return BADTOKEN; case GrammarToken::dotdotdot: return DOTDOTDOT; case GrammarToken::badtoken: setError(lexer.getError()); // Error from lexer return BADTOKEN; case GrammarToken::endoffile: return -1; // No more tokens default: return (int4)tok.getType(); } } void CParse::setError(const string &msg) { ostringstream s; s << msg; lexer.writeLocation(s,lineno,filenum); s << '\n'; lexer.writeTokenLocation(s,lineno,colno); lasterror = s.str(); } bool CParse::runParse(uint4 doctype) { // Assuming the stream has been setup, parse it switch(doctype) { case doc_declaration: firsttoken = DECLARATION_RESULT; break; case doc_parameter_declaration: firsttoken = PARAM_RESULT; break; default: throw LowlevelError("Bad document type"); } parse = this; // Setup global object for yyparse int4 res = yyparse(); if (res != 0) { if (lasterror.size()==0) setError("Syntax error"); return false; } return true; } bool CParse::parseFile(const string &nm,uint4 doctype) { // Run the parser on a file, return true if no parse errors clear(); // Clear out any old parsing ifstream s(nm.c_str()); // open file if (!s) throw LowlevelError("Unable to open file for parsing: "+nm); lexer.pushFile(nm,&s); // Inform lexer of filename and stream bool res = runParse(doctype); s.close(); return res; } bool CParse::parseStream(istream &s,uint4 doctype) { clear(); lexer.pushFile("stream",&s); return runParse(doctype); } int yylex(void) { return parse->lex(); } int yyerror(const char *str) { return 0; } Datatype *parse_type(istream &s,string &name,Architecture *glb) { CParse parser(glb,1000); if (!parser.parseStream(s,CParse::doc_parameter_declaration)) throw ParseError(parser.getError()); vector *decls = parser.getResultDeclarations(); if ((decls == (vector *)0)||(decls->size()==0)) throw ParseError("Did not parse a datatype"); if (decls->size() > 1) throw ParseError("Parsed multiple declarations"); TypeDeclarator *decl = (*decls)[0]; if (!decl->isValid()) throw ParseError("Parsed type is invalid"); name = decl->getIdentifier(); return decl->buildType(glb); } void parse_protopieces(PrototypePieces &pieces, istream &s,Architecture *glb) { CParse parser(glb,1000); if (!parser.parseStream(s,CParse::doc_declaration)) throw ParseError(parser.getError()); vector *decls = parser.getResultDeclarations(); if ((decls == (vector *)0)||(decls->size()==0)) throw ParseError("Did not parse a datatype"); if (decls->size() > 1) throw ParseError("Parsed multiple declarations"); TypeDeclarator *decl = (*decls)[0]; if (!decl->isValid()) throw ParseError("Parsed type is invalid"); if (!decl->getPrototype(pieces,glb)) throw ParseError("Did not parse a prototype"); } void parse_C(Architecture *glb,istream &s) { // Load type data straight into datastructures CParse parser(glb,1000); if (!parser.parseStream(s,CParse::doc_declaration)) throw ParseError(parser.getError()); vector *decls = parser.getResultDeclarations(); if ((decls == (vector *)0)||(decls->size()==0)) throw ParseError("Did not parse a datatype"); if (decls->size() > 1) throw ParseError("Parsed multiple declarations"); TypeDeclarator *decl = (*decls)[0]; if (!decl->isValid()) throw ParseError("Parsed type is invalid"); if (decl->hasProperty(CParse::f_extern)) { PrototypePieces pieces; if (!decl->getPrototype(pieces,glb)) throw ParseError("Did not parse prototype as expected"); glb->setPrototype(pieces); } else if (decl->hasProperty(CParse::f_typedef)) { Datatype *ct = decl->buildType(glb); if (decl->getIdentifier().size() == 0) throw ParseError("Missing identifier for typedef"); glb->types->setName(ct,decl->getIdentifier()); } else if (decl->getBaseType()->getMetatype()==TYPE_STRUCT) { // We parsed a struct, treat as a typedef } else if (decl->getBaseType()->isEnumType()) { // We parsed an enum, treat as a typedef } else throw LowlevelError("Not sure what to do with this type"); } void parse_toseparator(istream &s,string &name) { // parse to next (C) separator char tok; name.erase(); s >> ws; tok = s.peek(); while((isalnum(tok))||(tok=='_')) { s >> tok; name += tok; tok = s.peek(); } } Address parse_varnode(istream &s,int4 &size,Address &pc,uintm &uq,const TypeFactory &typegrp) { // Scan for a specific varnode char tok; int4 discard; Address loc(parse_machaddr(s,size,typegrp)); s >> ws >> tok; if (tok != '(') throw ParseError("Missing '('"); s >> ws; tok = s.peek(); pc = Address(); // pc starts out as invalid if (tok == 'i') s >> tok; else if (s.peek() != ':') { s.unsetf(ios::dec | ios::hex | ios::oct); // Let user specify base pc = parse_machaddr(s,discard,typegrp,true); } s >> ws; if (s.peek() == ':') { // Scan uniq s >> tok >> ws >> hex >> uq; // Assume uniq is in hex } else uq = ~((uintm)0); s >> ws >> tok; if (tok != ')') throw ParseError("Missing ')'"); return loc; } Address parse_op(istream &s,uintm &uq,const TypeFactory &typegrp) { int4 size; char tok; Address loc(parse_machaddr(s,size,typegrp,true)); s >> ws >> tok; if (tok != ':') throw ParseError("Missing ':'"); s >> ws >> hex >> uq; // Assume uniq is in hex return loc; } Address parse_machaddr(istream &s,int4 &defaultsize,const TypeFactory &typegrp,bool ignorecolon) { // Read Address from ASCII stream string token; AddrSpace *b; int4 size = -1; int4 oversize; char tok; const AddrSpaceManager *manage = typegrp.getArch(); s >> ws; tok = s.peek(); if (tok == '[') { s >> tok; parse_toseparator(s,token); // scan base address token b = manage->getSpaceByName(token); if (b == (AddrSpace *)0) throw ParseError("Bad address base"); s >> ws >> tok; if (tok != ',') throw ParseError("Missing ',' in address"); parse_toseparator(s,token); // Get the offset portion of the address s >> ws >> tok; if (tok == ',') { // Optional size specifier s.unsetf(ios::dec | ios::hex | ios::oct); s >> size; s >> ws >> tok; } if (tok != ']') throw ParseError("Missing ']' in address"); } else if (tok == '{') { b = manage->getJoinSpace(); s >> tok; s >> tok; while(tok != '}') // Scan to the matching curly brace token += tok; } else { if (tok == '0') { b = manage->getDefaultCodeSpace(); } else { b = manage->getSpaceByShortcut(tok); s >> tok; } if (b==(AddrSpace *)0) { s >> token; string errmsg = "Bad address: "; errmsg += tok; errmsg += token; throw ParseError(errmsg); } token.erase(); s >> ws; tok = s.peek(); if (ignorecolon) { while((isalnum(tok))||(tok=='_')||(tok=='+')) { token += tok; s >> tok; tok = s.peek(); } } else { while((isalnum(tok))||(tok=='_')||(tok=='+')||(tok==':')) { token += tok; s >> tok; tok = s.peek(); } } } Address res(b,0); oversize = res.read(token); // Read the address of this particular type // oversize is "standard size" if (oversize == -1) throw ParseError("Bad machine address"); defaultsize = (size==-1) ? oversize : size; // If not overriden use standard return res; }