%option reentrant %option bison-bridge %option noyywrap %{ // $Id$ // // Copyright (C) 2001-2010 Randal Henne, Greg Landrum and Rational Discovery LLC // // @@ All Rights Reserved @@ // #if defined(__CYGWIN__) && !defined(fileno) // -std=c++11 turns off recent posix features extern "C" int fileno(FILE*); #endif #include #ifdef WIN32 #include #endif #include #include #include #include #include #include #include #include #include #include "smiles.tab.hpp" using namespace RDKit; #define YY_FATAL_ERROR(msg) smiles_lexer_error(msg) void smiles_lexer_error(const char *msg) { BOOST_LOG(rdErrorLog) << msg< 32) break; } for(end = _yybytes_len ; end > start; --end) { if (yybytes[end] > 32) break; } _yybytes_len = end-start+1; n = _yybytes_len + 2; memcpy(buf, yybytes+start, _yybytes_len); buf[_yybytes_len] = buf[_yybytes_len+1] = YY_END_OF_BUFFER_CHAR; b = yysmiles__scan_buffer(buf,n ,yyscanner); if ( ! b ) smiles_lexer_error( "bad buffer in yysmiles__scan_bytes()" ); /* It's okay to grow etc. this buffer, and we should throw it * away when we're done. */ b->yy_is_our_buffer = 1; POSTCONDITION(b,"invalid buffer"); return start; } %} %s IN_ATOM_STATE %% %{ if (start_token) { int t = start_token; start_token = 0; return t; } %} @[' ']*TH | @[' ']*AL | @[' ']*SQ | @[' ']*BP | @[' ']*OH { return CHI_CLASS_TOKEN; } @ { return AT_TOKEN; } He { yylval->atom = new Atom(2); return ATOM_TOKEN; } Li { yylval->atom = new Atom(3); return ATOM_TOKEN; } Be { yylval->atom = new Atom(4); return ATOM_TOKEN; } Ne { yylval->atom = new Atom(10); return ATOM_TOKEN; } Na { yylval->atom = new Atom(11); return ATOM_TOKEN; } Mg { yylval->atom = new Atom(12); return ATOM_TOKEN; } Al { yylval->atom = new Atom(13); return ATOM_TOKEN; } Si { yylval->atom = new Atom(14); return ATOM_TOKEN; } Ar { yylval->atom = new Atom(18); return ATOM_TOKEN; } K { yylval->atom = new Atom(19); return ATOM_TOKEN; } Ca { yylval->atom = new Atom(20); return ATOM_TOKEN; } Sc { yylval->atom = new Atom(21); return ATOM_TOKEN; } Ti { yylval->atom = new Atom(22); return ATOM_TOKEN; } V { yylval->atom = new Atom(23); return ATOM_TOKEN; } Cr { yylval->atom = new Atom(24); return ATOM_TOKEN; } Mn { yylval->atom = new Atom(25); return ATOM_TOKEN; } Fe { yylval->atom = new Atom(26); return ATOM_TOKEN; } Co { yylval->atom = new Atom(27); return ATOM_TOKEN; } Ni { yylval->atom = new Atom(28); return ATOM_TOKEN; } Cu { yylval->atom = new Atom(29); return ATOM_TOKEN; } Zn { yylval->atom = new Atom(30); return ATOM_TOKEN; } Ga { yylval->atom = new Atom(31); return ATOM_TOKEN; } Ge { yylval->atom = new Atom(32); return ATOM_TOKEN; } As { yylval->atom = new Atom(33); return ATOM_TOKEN; } Se { yylval->atom = new Atom(34); return ATOM_TOKEN; } Kr { yylval->atom = new Atom(36); return ATOM_TOKEN; } Rb { yylval->atom = new Atom(37); return ATOM_TOKEN; } Sr { yylval->atom = new Atom(38); return ATOM_TOKEN; } Y { yylval->atom = new Atom(39); return ATOM_TOKEN; } Zr { yylval->atom = new Atom(40); return ATOM_TOKEN; } Nb { yylval->atom = new Atom(41); return ATOM_TOKEN; } Mo { yylval->atom = new Atom(42); return ATOM_TOKEN; } Tc { yylval->atom = new Atom(43); return ATOM_TOKEN; } Ru { yylval->atom = new Atom(44); return ATOM_TOKEN; } Rh { yylval->atom = new Atom(45); return ATOM_TOKEN; } Pd { yylval->atom = new Atom(46); return ATOM_TOKEN; } Ag { yylval->atom = new Atom(47); return ATOM_TOKEN; } Cd { yylval->atom = new Atom(48); return ATOM_TOKEN; } In { yylval->atom = new Atom(49); return ATOM_TOKEN; } Sn { yylval->atom = new Atom(50); return ATOM_TOKEN; } Sb { yylval->atom = new Atom(51); return ATOM_TOKEN; } Te { yylval->atom = new Atom(52); return ATOM_TOKEN; } Xe { yylval->atom = new Atom(54); return ATOM_TOKEN; } Cs { yylval->atom = new Atom(55); return ATOM_TOKEN; } Ba { yylval->atom = new Atom(56); return ATOM_TOKEN; } La { yylval->atom = new Atom(57); return ATOM_TOKEN; } Ce { yylval->atom = new Atom(58); return ATOM_TOKEN; } Pr { yylval->atom = new Atom(59); return ATOM_TOKEN; } Nd { yylval->atom = new Atom(60); return ATOM_TOKEN; } Pm { yylval->atom = new Atom(61); return ATOM_TOKEN; } Sm { yylval->atom = new Atom(62); return ATOM_TOKEN; } Eu { yylval->atom = new Atom(63); return ATOM_TOKEN; } Gd { yylval->atom = new Atom(64); return ATOM_TOKEN; } Tb { yylval->atom = new Atom(65); return ATOM_TOKEN; } Dy { yylval->atom = new Atom(66); return ATOM_TOKEN; } Ho { yylval->atom = new Atom(67); return ATOM_TOKEN; } Er { yylval->atom = new Atom(68); return ATOM_TOKEN; } Tm { yylval->atom = new Atom(69); return ATOM_TOKEN; } Yb { yylval->atom = new Atom(70); return ATOM_TOKEN; } Lu { yylval->atom = new Atom(71); return ATOM_TOKEN; } Hf { yylval->atom = new Atom(72); return ATOM_TOKEN; } Ta { yylval->atom = new Atom(73); return ATOM_TOKEN; } W { yylval->atom = new Atom(74); return ATOM_TOKEN; } Re { yylval->atom = new Atom(75); return ATOM_TOKEN; } Os { yylval->atom = new Atom(76); return ATOM_TOKEN; } Ir { yylval->atom = new Atom(77); return ATOM_TOKEN; } Pt { yylval->atom = new Atom(78); return ATOM_TOKEN; } Au { yylval->atom = new Atom(79); return ATOM_TOKEN; } Hg { yylval->atom = new Atom(80); return ATOM_TOKEN; } Tl { yylval->atom = new Atom(81); return ATOM_TOKEN; } Pb { yylval->atom = new Atom(82); return ATOM_TOKEN; } Bi { yylval->atom = new Atom(83); return ATOM_TOKEN; } Po { yylval->atom = new Atom(84); return ATOM_TOKEN; } At { yylval->atom = new Atom(85); return ATOM_TOKEN; } Rn { yylval->atom = new Atom(86); return ATOM_TOKEN; } Fr { yylval->atom = new Atom(87); return ATOM_TOKEN; } Ra { yylval->atom = new Atom(88); return ATOM_TOKEN; } Ac { yylval->atom = new Atom(89); return ATOM_TOKEN; } Th { yylval->atom = new Atom(90); return ATOM_TOKEN; } Pa { yylval->atom = new Atom(91); return ATOM_TOKEN; } U { yylval->atom = new Atom(92); return ATOM_TOKEN; } Np { yylval->atom = new Atom(93); return ATOM_TOKEN; } Pu { yylval->atom = new Atom(94); return ATOM_TOKEN; } Am { yylval->atom = new Atom(95); return ATOM_TOKEN; } Cm { yylval->atom = new Atom(96); return ATOM_TOKEN; } Bk { yylval->atom = new Atom(97); return ATOM_TOKEN; } Cf { yylval->atom = new Atom(98); return ATOM_TOKEN; } Es { yylval->atom = new Atom(99); return ATOM_TOKEN; } Fm { yylval->atom = new Atom(100); return ATOM_TOKEN; } Md { yylval->atom = new Atom(101); return ATOM_TOKEN; } No { yylval->atom = new Atom(102); return ATOM_TOKEN; } Lr { yylval->atom = new Atom(103); return ATOM_TOKEN; } Rf { yylval->atom = new Atom(104); return ATOM_TOKEN; } Db { yylval->atom = new Atom(105); return ATOM_TOKEN; } Sg { yylval->atom = new Atom(106); return ATOM_TOKEN; } Bh { yylval->atom = new Atom(107); return ATOM_TOKEN; } Hs { yylval->atom = new Atom(108); return ATOM_TOKEN; } Mt { yylval->atom = new Atom(109); return ATOM_TOKEN; } Ds { yylval->atom = new Atom(110); return ATOM_TOKEN; } Rg { yylval->atom = new Atom(111); return ATOM_TOKEN; } Cn { yylval->atom = new Atom(112); return ATOM_TOKEN; } Nh { yylval->atom = new Atom(113); return ATOM_TOKEN; } Fl { yylval->atom = new Atom(114); return ATOM_TOKEN; } Mc { yylval->atom = new Atom(115); return ATOM_TOKEN; } Lv { yylval->atom = new Atom(116); return ATOM_TOKEN; } Ts { yylval->atom = new Atom(117); return ATOM_TOKEN; } Og { yylval->atom = new Atom(118); return ATOM_TOKEN; } Uun { yylval->atom = new Atom(110); return ATOM_TOKEN; } Uuu { yylval->atom = new Atom(111); return ATOM_TOKEN; } Uub { yylval->atom = new Atom(112); return ATOM_TOKEN; } Uut { yylval->atom = new Atom(113); return ATOM_TOKEN; } Uuq { yylval->atom = new Atom(114); return ATOM_TOKEN; } Uup { yylval->atom = new Atom(115); return ATOM_TOKEN; } Uuh { yylval->atom = new Atom(116); return ATOM_TOKEN; } Uus { yylval->atom = new Atom(117); return ATOM_TOKEN; } Uuo { yylval->atom = new Atom(118); return ATOM_TOKEN; } B { yylval->atom = new Atom(5);return ORGANIC_ATOM_TOKEN; } C { yylval->atom = new Atom(6);return ORGANIC_ATOM_TOKEN; } N { yylval->atom = new Atom(7);return ORGANIC_ATOM_TOKEN; } O { yylval->atom = new Atom(8);return ORGANIC_ATOM_TOKEN; } P { yylval->atom = new Atom(15);return ORGANIC_ATOM_TOKEN; } S { yylval->atom = new Atom(16);return ORGANIC_ATOM_TOKEN; } F { yylval->atom = new Atom(9);return ORGANIC_ATOM_TOKEN; } Cl { yylval->atom = new Atom(17);return ORGANIC_ATOM_TOKEN; } Br { yylval->atom = new Atom(35);return ORGANIC_ATOM_TOKEN; } I { yylval->atom = new Atom(53);return ORGANIC_ATOM_TOKEN; } H { return H_TOKEN; } b { yylval->atom = new Atom ( 5 ); yylval->atom->setIsAromatic(true); return AROMATIC_ATOM_TOKEN; } c { yylval->atom = new Atom ( 6 ); yylval->atom->setIsAromatic(true); return AROMATIC_ATOM_TOKEN; } n { yylval->atom = new Atom( 7 ); yylval->atom->setIsAromatic(true); return AROMATIC_ATOM_TOKEN; } o { yylval->atom = new Atom( 8 ); yylval->atom->setIsAromatic(true); return AROMATIC_ATOM_TOKEN; } p { yylval->atom = new Atom( 15 ); yylval->atom->setIsAromatic(true); return AROMATIC_ATOM_TOKEN; } s { yylval->atom = new Atom( 16 ); yylval->atom->setIsAromatic(true); return AROMATIC_ATOM_TOKEN; } si { yylval->atom = new Atom( 14 ); yylval->atom->setIsAromatic(true); return AROMATIC_ATOM_TOKEN; } as { yylval->atom = new Atom( 33 ); yylval->atom->setIsAromatic(true); return AROMATIC_ATOM_TOKEN; } se { yylval->atom = new Atom( 34 ); yylval->atom->setIsAromatic(true); return AROMATIC_ATOM_TOKEN; } te { yylval->atom = new Atom( 52 ); yylval->atom->setIsAromatic(true); return AROMATIC_ATOM_TOKEN; } \* { yylval->atom = new Atom( 0 ); yylval->atom->setProp(common_properties::dummyLabel, std::string("*")); // must be ORGANIC_ATOM_TOKEN because // we aren't in square brackets: return ORGANIC_ATOM_TOKEN; } \: { return COLON_TOKEN; } \# { return HASH_TOKEN; } %{ // The next block is a workaround for a pathlogy in the SMILES produced // by some Biovia tools %} \'Rf\' { yylval->atom = new Atom(104); return ATOM_TOKEN; } \'Db\' { yylval->atom = new Atom(105); return ATOM_TOKEN; } \'Sg\' { yylval->atom = new Atom(106); return ATOM_TOKEN; } \'Bh\' { yylval->atom = new Atom(107); return ATOM_TOKEN; } \'Hs\' { yylval->atom = new Atom(108); return ATOM_TOKEN; } \'Mt\' { yylval->atom = new Atom(109); return ATOM_TOKEN; } \'Ds\' { yylval->atom = new Atom(110); return ATOM_TOKEN; } \'Rg\' { yylval->atom = new Atom(111); return ATOM_TOKEN; } \'Cn\' { yylval->atom = new Atom(112); return ATOM_TOKEN; } \'Nh\' { yylval->atom = new Atom(113); return ATOM_TOKEN; } \'Fl\' { yylval->atom = new Atom(114); return ATOM_TOKEN; } \'Mc\' { yylval->atom = new Atom(115); return ATOM_TOKEN; } \'Lv\' { yylval->atom = new Atom(116); return ATOM_TOKEN; } \'Ts\' { yylval->atom = new Atom(117); return ATOM_TOKEN; } \'Og\' { yylval->atom = new Atom(118); return ATOM_TOKEN; } \= { yylval->bond = new Bond(Bond::DOUBLE); return BOND_TOKEN; } \# { yylval->bond = new Bond(Bond::TRIPLE); return BOND_TOKEN; } \: { yylval->bond = new Bond(Bond::AROMATIC); yylval->bond->setIsAromatic(true); return BOND_TOKEN; } \$ { yylval->bond = new Bond(Bond::QUADRUPLE); return BOND_TOKEN; } \-\> { yylval->bond = new Bond(Bond::DATIVER); return BOND_TOKEN; } \<\- { yylval->bond = new Bond(Bond::DATIVEL); return BOND_TOKEN; } \~ { yylval->bond = new QueryBond(); yylval->bond->setQuery(makeBondNullQuery()); return BOND_TOKEN; } [\\]{1,2} { yylval->bond = new Bond(Bond::UNSPECIFIED); yylval->bond->setProp(RDKit::common_properties::_unspecifiedOrder,1); yylval->bond->setBondDir(Bond::ENDDOWNRIGHT); return BOND_TOKEN; } [\/] { yylval->bond = new Bond(Bond::UNSPECIFIED); yylval->bond->setProp(RDKit::common_properties::_unspecifiedOrder,1); yylval->bond->setBondDir(Bond::ENDUPRIGHT); return BOND_TOKEN; } \- { return MINUS_TOKEN; } \+ { return PLUS_TOKEN; } \( { return GROUP_OPEN_TOKEN; } \) { return GROUP_CLOSE_TOKEN; } \[ { BEGIN IN_ATOM_STATE; return ATOM_OPEN_TOKEN; } \] { BEGIN INITIAL; return ATOM_CLOSE_TOKEN; } \. { return SEPARATOR_TOKEN; } \% { return PERCENT_TOKEN; } [0] { yylval->ival = 0; return ZERO_TOKEN; } [1-9] { yylval->ival = atoi( yytext ); return NONZERO_DIGIT_TOKEN; } \n return 0; <> { return EOS_TOKEN; } . return yytext[0]; %% #undef yysmiles_wrap int yysmiles_wrap( void ) { return 1; }