%option reentrant %option bison-bridge %option noyywrap %{ // // Copyright (C) 2003-2018 Greg Landrum and Rational Discovery LLC // // @@ All Rights Reserved @@ // #if defined(__CYGWIN__) && !defined(fileno) // -std=c++11 turns off recent posix features extern "C" int fileno(FILE*); #endif #include #ifdef WIN32 #include #endif #include #include #include #include #include #include "smarts.tab.hpp" using namespace RDKit; //static PeriodicTable * gl_ptab = PeriodicTable::getTable(); #define YY_FATAL_ERROR(msg) smarts_lexer_error(msg) void smarts_lexer_error(const char *msg) { BOOST_LOG(rdErrorLog) << msg< 32) break; } for(end = _yybytes_len ; end > start; --end) { if (yybytes[end] > 32) break; } _yybytes_len = end-start+1; n = _yybytes_len + 2; memcpy(buf, yybytes+start, _yybytes_len); buf[_yybytes_len] = buf[_yybytes_len+1] = YY_END_OF_BUFFER_CHAR; b = yysmarts__scan_buffer(buf,n ,yyscanner); if ( ! b ) smarts_lexer_error( "bad buffer in yysmarts__scan_bytes()" ); /* It's okay to grow etc. this buffer, and we should throw it * away when we're done. */ b->yy_is_our_buffer = 1; POSTCONDITION(b,"invalid buffer"); return start; } %} %option stack %s IN_ATOM_STATE %s IN_BRANCH_STATE %s IN_RECURSION_STATE %% %{ if (start_token) { int t = start_token; start_token = 0; return t; } %} @[' ']*TH | @[' ']*AL | @[' ']*SQ | @[' ']*BP | @[' ']*OH { return CHI_CLASS_TOKEN; } @ { return AT_TOKEN; } He | Li | Be | Ne | Na | Mg | Al | Si | Ar | K | Ca | Sc | Ti | V | Cr | Mn | Co | Fe | Ni | Cu | Zn | Ga | Ge | As | Se | Kr | Rb | Sr | Y | Zr | Nb | Mo | Tc | Ru | Rh | Pd | Ag | Cd | In | Sn | Sb | Te | Xe | Cs | Ba | La | Ce | Pr | Nd | Pm | Sm | Eu | Gd | Tb | Dy | Ho | Er | Tm | Yb | Lu | Hf | Ta | W | Re | Os | Ir | Pt | Au | Hg | Tl | Pb | Bi | Po | At | Rn | Fr | Ra | Ac | Th | Pa | U | Np | Pu | Am | Cm | Bk | Cf | Es | Fm | Md | No | Lr | Rf | Db | Sg | Bh | Hs | Mt | Ds | Rg | Cn | Uut | Fl | Uup | Lv { yylval->atom = new QueryAtom( PeriodicTable::getTable()->getAtomicNumber( yytext ) ); return ATOM_TOKEN; } D { yylval->atom = new QueryAtom(); yylval->atom->setQuery(makeAtomExplicitDegreeQuery(1)); return COMPLEX_ATOM_QUERY_TOKEN; } d { yylval->atom = new QueryAtom(); yylval->atom->setQuery(makeAtomNonHydrogenDegreeQuery(1)); return COMPLEX_ATOM_QUERY_TOKEN; } X { yylval->atom = new QueryAtom(); yylval->atom->setQuery(makeAtomTotalDegreeQuery(1)); return COMPLEX_ATOM_QUERY_TOKEN; } x { yylval->atom = new QueryAtom(); yylval->atom->setQuery(makeAtomHasRingBondQuery()); return RINGBOND_ATOM_QUERY_TOKEN; } v { yylval->atom = new QueryAtom(); yylval->atom->setQuery(makeAtomTotalValenceQuery(1)); return COMPLEX_ATOM_QUERY_TOKEN; } z { yylval->atom = new QueryAtom(); yylval->atom->setQuery(makeAtomHasHeteroatomNbrsQuery()); return HETERONEIGHBOR_ATOM_QUERY_TOKEN; } Z { yylval->atom = new QueryAtom(); yylval->atom->setQuery(makeAtomHasAliphaticHeteroatomNbrsQuery()); return ALIPHATICHETERONEIGHBOR_ATOM_QUERY_TOKEN; } h { yylval->atom = new QueryAtom(); yylval->atom->setQuery(makeAtomHasImplicitHQuery()); return IMPLICIT_H_ATOM_QUERY_TOKEN; } R { yylval->atom = new QueryAtom(); yylval->atom->setQuery(new AtomRingQuery(-1)); return COMPLEX_ATOM_QUERY_TOKEN; } r { yylval->atom = new QueryAtom(); yylval->atom->setQuery(makeAtomInRingQuery()); return RINGSIZE_ATOM_QUERY_TOKEN; } H { return H_TOKEN; } B { yylval->ival = 5; return ORGANIC_ATOM_TOKEN; } C { yylval->ival = 6; return ORGANIC_ATOM_TOKEN; } N { yylval->ival = 7; return ORGANIC_ATOM_TOKEN; } O { yylval->ival = 8; return ORGANIC_ATOM_TOKEN; } F { yylval->ival = 9; return ORGANIC_ATOM_TOKEN; } P { yylval->ival = 15; return ORGANIC_ATOM_TOKEN; } S { yylval->ival = 16; return ORGANIC_ATOM_TOKEN; } Cl { yylval->ival = 17; return ORGANIC_ATOM_TOKEN; } Br { yylval->ival = 35; return ORGANIC_ATOM_TOKEN; } I { yylval->ival = 53; return ORGANIC_ATOM_TOKEN; } b { yylval->ival = 5; return AROMATIC_ATOM_TOKEN; } c { yylval->ival = 6; return AROMATIC_ATOM_TOKEN; } n { yylval->ival = 7; return AROMATIC_ATOM_TOKEN; } o { yylval->ival = 8; return AROMATIC_ATOM_TOKEN; } p { yylval->ival = 15; return AROMATIC_ATOM_TOKEN; } s { yylval->ival = 16; return AROMATIC_ATOM_TOKEN; } si { yylval->ival = 14; return AROMATIC_ATOM_TOKEN; } as { yylval->ival = 33; return AROMATIC_ATOM_TOKEN; } se { yylval->ival = 34; return AROMATIC_ATOM_TOKEN; } te { yylval->ival = 52; return AROMATIC_ATOM_TOKEN; } \* { yylval->atom = new QueryAtom(); yylval->atom->setQuery(makeAtomNullQuery()); return SIMPLE_ATOM_QUERY_TOKEN; } a { yylval->atom = new QueryAtom(); yylval->atom->setQuery(makeAtomAromaticQuery()); yylval->atom->setIsAromatic(true); return SIMPLE_ATOM_QUERY_TOKEN; } A { yylval->atom = new QueryAtom(); yylval->atom->setQuery(makeAtomAliphaticQuery()); return SIMPLE_ATOM_QUERY_TOKEN; } \: { return COLON_TOKEN; } \_ { return UNDERSCORE_TOKEN; } \# { return HASH_TOKEN; } \= { yylval->bond = new QueryBond(Bond::DOUBLE); yylval->bond->setQuery(makeBondOrderEqualsQuery(Bond::DOUBLE)); return BOND_TOKEN; } \~ { yylval->bond = new QueryBond(); yylval->bond->setQuery(makeBondNullQuery()); return BOND_TOKEN; } \$ { yylval->bond = new QueryBond(Bond::QUADRUPLE); yylval->bond->setQuery(makeBondOrderEqualsQuery(Bond::QUADRUPLE)); return BOND_TOKEN; } [\\]{1,2} { yylval->bond = new QueryBond(Bond::SINGLE); yylval->bond->setBondDir(Bond::ENDDOWNRIGHT); return BOND_TOKEN; } [\/] { yylval->bond = new QueryBond(Bond::SINGLE); yylval->bond->setBondDir(Bond::ENDUPRIGHT); return BOND_TOKEN; } \-\> { yylval->bond = new QueryBond(Bond::DATIVER); return BOND_TOKEN; } \<\- { yylval->bond = new QueryBond(Bond::DATIVEL); return BOND_TOKEN; } \- { return MINUS_TOKEN; } \+ { return PLUS_TOKEN; } \$\( { yy_push_state(IN_RECURSION_STATE,yyscanner); return BEGIN_RECURSE; } \( { yy_push_state(IN_BRANCH_STATE,yyscanner); return GROUP_OPEN_TOKEN; } \) { yy_pop_state(yyscanner); return GROUP_CLOSE_TOKEN; } \) { yy_pop_state(yyscanner); return END_RECURSE; } \{ { return RANGE_OPEN_TOKEN; } \} { return RANGE_CLOSE_TOKEN; } \[ { yy_push_state(IN_ATOM_STATE,yyscanner); return ATOM_OPEN_TOKEN; } \] { yy_pop_state(yyscanner); return ATOM_CLOSE_TOKEN; } \] { /* FIX: ??? This rule is here because otherwise recursive SMARTS queries like: [$(C(=O)[O,N])] lex improperly (no ATOM_CLOSE token is returned). I am not 100% sure that the approach we're using here will work all the time, but I'm hoping that any problems caused here in the lexer will get caught in the parser. */ return ATOM_CLOSE_TOKEN; } \. { return SEPARATOR_TOKEN; } \% { return PERCENT_TOKEN; } [0] { yylval->ival = 0; return ZERO_TOKEN; } [1-9] { yylval->ival = yytext[0]-'0'; return NONZERO_DIGIT_TOKEN; } \! { return NOT_TOKEN; } \; { return SEMI_TOKEN; } \& { return AND_TOKEN; } \, { return OR_TOKEN; } \^0 { yylval->atom = new QueryAtom(); yylval->atom->setQuery(makeAtomHybridizationQuery(Atom::S)); return HYB_TOKEN; } \^1 { yylval->atom = new QueryAtom(); yylval->atom->setQuery(makeAtomHybridizationQuery(Atom::SP)); return HYB_TOKEN; } \^2 { yylval->atom = new QueryAtom(); yylval->atom->setQuery(makeAtomHybridizationQuery(Atom::SP2)); return HYB_TOKEN; } \^3 { yylval->atom = new QueryAtom(); yylval->atom->setQuery(makeAtomHybridizationQuery(Atom::SP3)); return HYB_TOKEN; } \^4 { yylval->atom = new QueryAtom(); yylval->atom->setQuery(makeAtomHybridizationQuery(Atom::SP3D)); return HYB_TOKEN; } \^5 { yylval->atom = new QueryAtom(); yylval->atom->setQuery(makeAtomHybridizationQuery(Atom::SP3D2)); return HYB_TOKEN; } \n return EOS_TOKEN; <> { return EOS_TOKEN; } . return yytext[0]; %% #undef yysmarts_wrap int yysmarts_wrap( void ) { return 1; }