// // Copyright (C) 2014 Novartis Institutes for BioMedical Research // // @@ All Rights Reserved @@ // This file is part of the RDKit. // The contents are covered by the terms of the BSD license // which is included in the file license.txt, found at the root // of the RDKit source tree. // #include #include #include #include "../QueryAtom.h" #include "../QueryBond.h" #include "../SmilesParse/SmilesWrite.h" #include "../SmilesParse/SmartsWrite.h" #include "../SmilesParse/SmilesParse.h" #include "../Substruct/SubstructMatch.h" #include "SubstructMatchCustom.h" #include "MaximumCommonSubgraph.h" #include #include #include namespace RDKit { namespace FMCS { struct LabelDefinition { unsigned ItemIndex; // item with this label value unsigned Value; LabelDefinition() : ItemIndex(NotSet), Value(NotSet) {} LabelDefinition(unsigned i, unsigned value) : ItemIndex(i), Value(value) {} }; MaximumCommonSubgraph::MaximumCommonSubgraph(const MCSParameters* params) { Parameters = (nullptr != params ? *params : MCSParameters()); if (!Parameters.ProgressCallback) { Parameters.ProgressCallback = MCSProgressCallbackTimeout; Parameters.ProgressCallbackUserData = &To; } if ((Parameters.AtomCompareParameters.MatchChiralTag || Parameters.BondCompareParameters.MatchFusedRings || Parameters.BondCompareParameters.MatchFusedRingsStrict) && nullptr == Parameters.FinalMatchChecker) { Parameters.FinalMatchChecker = FinalMatchCheckFunction; if (Parameters.AtomCompareParameters.MatchChiralTag) { Parameters.BondCompareParameters.MatchStereo = true; } } To = nanoClock(); } static bool molPtr_NumBondLess( const ROMol* l, const ROMol* r) { // need for sorting the source molecules by size return l->getNumBonds() < r->getNumBonds(); } void MaximumCommonSubgraph::init() { QueryMolecule = Molecules.front(); Targets.clear(); #ifdef FAST_SUBSTRUCT_CACHE QueryAtomLabels.clear(); QueryBondLabels.clear(); QueryAtomMatchTable.clear(); QueryBondMatchTable.clear(); RingMatchTables.clear(); #endif #ifdef DUP_SUBSTRUCT_CACHE DuplicateCache.clear(); #endif void* userData = Parameters.CompareFunctionsUserData; size_t nq = 0; #ifdef FAST_SUBSTRUCT_CACHE // fill out RingMatchTables to check cache Hash collision by checking match a // part of Query to Query if (!userData // predefined functor - compute RingMatchTable for all targets && (Parameters.BondCompareParameters.CompleteRingsOnly || Parameters.BondCompareParameters.RingMatchesRingOnly || Parameters.AtomCompareParameters.RingMatchesRingOnly)) { RingMatchTables.init(QueryMolecule); Parameters.CompareFunctionsUserData = &RingMatchTables; RingMatchTables.computeRingMatchTable(QueryMolecule, QueryMolecule, Parameters); } // fill out match tables nq = QueryMolecule->getNumAtoms(); QueryAtomMatchTable.resize(nq, nq); for (size_t aj = 0; aj < nq; aj++) { for (size_t ai = 0; ai < nq; ai++) { QueryAtomMatchTable.set( ai, aj, Parameters.AtomTyper(Parameters.AtomCompareParameters, *QueryMolecule, ai, *QueryMolecule, aj, Parameters.CompareFunctionsUserData)); } } nq = QueryMolecule->getNumBonds(); QueryBondMatchTable.resize(nq, nq); for (size_t aj = 0; aj < nq; aj++) { for (size_t ai = 0; ai < nq; ai++) { QueryBondMatchTable.set( ai, aj, Parameters.BondTyper(Parameters.BondCompareParameters, *QueryMolecule, ai, *QueryMolecule, aj, Parameters.CompareFunctionsUserData)); } } // Compute label values based on current functor and parameters for code // Morgan correct computation. unsigned currentLabelValue = 1; std::vector labels; nq = QueryMolecule->getNumAtoms(); QueryAtomLabels.resize(nq, NotSet); for (size_t ai = 0; ai < nq; ++ai) { if (MCSAtomCompareAny == Parameters.AtomTyper) { // predefined functor without atom compare // parameters QueryAtomLabels[ai] = 1; } else { const Atom* atom = QueryMolecule->getAtomWithIdx(ai); if (MCSAtomCompareElements == Parameters.AtomTyper) { // predefined functor without atom compare // parameters QueryAtomLabels[ai] = atom->getAtomicNum() | (Parameters.AtomCompareParameters.MatchValences ? (atom->getTotalValence() >> 8) : 0); } else if (MCSAtomCompareIsotopes == Parameters.AtomTyper) { // predefined functor without atom // compare parameters QueryAtomLabels[ai] = atom->getAtomicNum() | (atom->getIsotope() >> 8) | (Parameters.AtomCompareParameters.MatchValences ? (atom->getTotalValence() >> 16) : 0); } else { // custom user defined functor QueryAtomLabels[ai] = NotSet; for (auto& label : labels) { if (Parameters.AtomTyper(Parameters.AtomCompareParameters, *QueryMolecule, label.ItemIndex, *QueryMolecule, ai, userData)) { // equal items QueryAtomLabels[ai] = label.Value; break; } } if (NotSet == QueryAtomLabels[ai]) { // not found -> create new label QueryAtomLabels[ai] = ++currentLabelValue; labels.emplace_back(ai, currentLabelValue); } } } } labels.clear(); currentLabelValue = 1; nq = QueryMolecule->getNumBonds(); QueryBondLabels.resize(nq, NotSet); for (size_t aj = 0; aj < nq; ++aj) { const Bond* bond = QueryMolecule->getBondWithIdx(aj); unsigned ring = 0; if (!userData && (Parameters.BondCompareParameters.CompleteRingsOnly || Parameters.BondCompareParameters.RingMatchesRingOnly)) { ring = RingMatchTables.isQueryBondInRing(aj) ? 0 : 1; // is bond in ring } if (MCSBondCompareAny == Parameters.BondTyper) { // predefined functor without atom compare // parameters QueryBondLabels[aj] = 1 | (ring >> 8); } else if (MCSBondCompareOrderExact == Parameters.BondTyper) { // predefined functor without compare // parameters QueryBondLabels[aj] = (bond->getBondType() + 1) | (ring >> 8); } else if (MCSBondCompareOrder == Parameters .BondTyper) { // predefined functor, ignore Aromatization unsigned order = bond->getBondType(); if (Bond::AROMATIC == order || Bond::ONEANDAHALF == order) { // ignore Aromatization order = Bond::SINGLE; } else if (Bond::TWOANDAHALF == order) { order = Bond::DOUBLE; } else if (Bond::THREEANDAHALF == order) { order = Bond::TRIPLE; } else if (Bond::FOURANDAHALF == order) { order = Bond::QUADRUPLE; } else if (Bond::FIVEANDAHALF == order) { order = Bond::QUINTUPLE; } QueryBondLabels[aj] = (order + 1) | (ring >> 8); } else { // custom user defined functor QueryBondLabels[aj] = NotSet; for (auto& label : labels) { if (Parameters.BondTyper(Parameters.BondCompareParameters, *QueryMolecule, label.ItemIndex, *QueryMolecule, aj, userData)) { // equal bonds + ring ... QueryBondLabels[aj] = label.Value; break; } } if (NotSet == QueryBondLabels[aj]) { // not found -> create new label QueryBondLabels[aj] = ++currentLabelValue; labels.emplace_back(aj, currentLabelValue); } } } #endif Targets.resize(Molecules.size() - 1); size_t i = 0; for (auto it = Molecules.begin() + 1; it != Molecules.end(); it++, i++) { Targets[i].Molecule = *it; // build Target Topology ADD ATOMs size_t j = 0; // current item for (ROMol::ConstAtomIterator a = Targets[i].Molecule->beginAtoms(); a != Targets[i].Molecule->endAtoms(); a++, j++) { Targets[i].Topology.addAtom((*a)->getIdx()); } // build Target Topology ADD BONDs for (ROMol::ConstBondIterator b = Targets[i].Molecule->beginBonds(); b != Targets[i].Molecule->endBonds(); b++) { const Bond* bond = *b; unsigned ii = bond->getBeginAtomIdx(); unsigned jj = bond->getEndAtomIdx(); Targets[i].Topology.addBond((*b)->getIdx(), ii, jj); } #ifdef FAST_SUBSTRUCT_CACHE // fill out RingMatchTables if (!userData // predefined functor - compute RingMatchTable for all // targets && (Parameters.BondCompareParameters.CompleteRingsOnly || Parameters.BondCompareParameters.RingMatchesRingOnly)) { RingMatchTables.addTargetBondRingsIndeces(Targets[i].Molecule); RingMatchTables.computeRingMatchTable(QueryMolecule, Targets[i].Molecule, Parameters); } #endif // fill out match tables size_t nq = QueryMolecule->getNumAtoms(); size_t nt = (*it)->getNumAtoms(); Targets[i].AtomMatchTable.resize(nq, nt); for (size_t aj = 0; aj < nt; aj++) { for (size_t ai = 0; ai < nq; ai++) { Targets[i].AtomMatchTable.set( ai, aj, Parameters.AtomTyper(Parameters.AtomCompareParameters, *QueryMolecule, ai, *Targets[i].Molecule, aj, Parameters.CompareFunctionsUserData)); } } nq = QueryMolecule->getNumBonds(); nt = (*it)->getNumBonds(); Targets[i].BondMatchTable.resize(nq, nt); for (size_t aj = 0; aj < nt; aj++) { for (size_t ai = 0; ai < nq; ai++) { Targets[i].BondMatchTable.set( ai, aj, Parameters.BondTyper(Parameters.BondCompareParameters, *QueryMolecule, ai, *Targets[i].Molecule, aj, Parameters.CompareFunctionsUserData)); } } } Parameters.CompareFunctionsUserData = userData; // restore } struct QueryRings { std::vector BondRings; // amount of rings std::vector AtomRings; // amount of rings QueryRings(const ROMol* query) : BondRings(query->getNumBonds()), AtomRings(query->getNumAtoms()) { for (unsigned int& BondRing : BondRings) { BondRing = 0; } const RingInfo::VECT_INT_VECT& brings = query->getRingInfo()->bondRings(); for (const auto& ring : brings) { for (int ri : ring) { ++BondRings[ri]; } } for (unsigned int& AtomRing : AtomRings) { AtomRing = 0; } const RingInfo::VECT_INT_VECT& arings = query->getRingInfo()->atomRings(); for (const auto& ring : arings) { for (int ri : ring) { ++AtomRings[ri]; } } } inline unsigned getNumberRings(const Bond* bond) const { return BondRings[bond->getIdx()]; } inline unsigned getNumberRings(const Atom* atom) const { return AtomRings[atom->getIdx()]; } }; // namespace RDKit struct WeightedBond { const Bond* BondPtr{nullptr}; unsigned Weight{0}; WeightedBond() {} WeightedBond(const Bond* bond, const QueryRings& r) : BondPtr(bond), Weight(0) { // score ((bond.is_in_ring + atom1.is_in_ring + atom2.is_in_ring) if (r.getNumberRings(bond)) { Weight += 1; } if (r.getNumberRings(bond->getBeginAtom())) { Weight += 1; } if (r.getNumberRings(bond->getEndAtom())) { Weight += 1; } } bool operator<(const WeightedBond& r) { return Weight >= r.Weight; // sort in Z-A order (Rings first) } }; void MaximumCommonSubgraph::makeInitialSeeds() { // build a set of initial seeds as "all" single bonds from query // molecule std::vector excludedBonds(QueryMolecule->getNumBonds(), false); Seeds.clear(); QueryMoleculeMatchedBonds = 0; QueryMoleculeMatchedAtoms = 0; QueryMoleculeSingleMatchedAtom = nullptr; if (!Parameters.InitialSeed.empty()) { // make user defined seed std::unique_ptr initialSeedMolecule( (const ROMol*)SmartsToMol(Parameters.InitialSeed)); // make a set of of seed as indices and pointers to current query // molecule items based on matching results std::vector matching_substructs; SubstructMatch(*QueryMolecule, *initialSeedMolecule, matching_substructs); // loop throw all fragments of Query matched to initial seed for (std::vector::const_iterator ms = matching_substructs.begin(); ms != matching_substructs.end(); ms++) { Seed seed; seed.ExcludedBonds = excludedBonds; seed.MatchResult.resize(Targets.size()); #ifdef VERBOSE_STATISTICS_ON { ++VerboseStatistics.Seed; ++VerboseStatistics.InitialSeed; } #endif // add all matched atoms of the matched query fragment std::map initialSeedToQueryAtom; for (const auto& msb : *ms) { unsigned qai = msb.second; unsigned sai = msb.first; seed.addAtom(QueryMolecule->getAtomWithIdx(qai)); initialSeedToQueryAtom[sai] = qai; } // add all bonds (existed in initial seed !!!) between all matched // atoms in query for (const auto& msb : *ms) { const Atom* atom = initialSeedMolecule->getAtomWithIdx(msb.first); ROMol::OEDGE_ITER beg, end; for (boost::tie(beg, end) = initialSeedMolecule->getAtomBonds(atom); beg != end; beg++) { const Bond& initialBond = *((*initialSeedMolecule)[*beg]); unsigned qai1 = initialSeedToQueryAtom.find(initialBond.getBeginAtomIdx()) ->second; unsigned qai2 = initialSeedToQueryAtom.find(initialBond.getEndAtomIdx())->second; const Bond* b = QueryMolecule->getBondBetweenAtoms(qai1, qai2); if (!seed.ExcludedBonds[b->getIdx()]) { seed.addBond(b); seed.ExcludedBonds[b->getIdx()] = true; } } } seed.computeRemainingSize(*QueryMolecule); if (checkIfMatchAndAppend(seed)) { QueryMoleculeMatchedBonds = seed.getNumBonds(); } } if (Seeds.empty()) { BOOST_LOG(rdWarningLog) << "The provided InitialSeed is not an MCS and will be ignored" << std::endl; } } if (Seeds.empty()) { // create a set of seeds from each query bond // R1 additional performance OPTIMISATION // if(Parameters.BondCompareParameters.CompleteRingsOnly) // disable all mismatched rings, and do not generate initial seeds // from such disabled bonds // for( rings .....) for(i......) // if(mismatched) excludedBonds[i.......] = true; QueryRings r(QueryMolecule); std::vector wb; wb.reserve(QueryMolecule->getNumBonds()); for (RWMol::ConstBondIterator bi = QueryMolecule->beginBonds(); bi != QueryMolecule->endBonds(); bi++) { wb.emplace_back(*bi, r); } for (std::vector::const_iterator bi = wb.begin(); bi != wb.end(); bi++) { // R1 additional performance OPTIMISATION // if(excludedBonds[(*bi)->getIdx()]) // continue; Seed seed; seed.MatchResult.resize(Targets.size()); #ifdef VERBOSE_STATISTICS_ON { ++VerboseStatistics.Seed; ++VerboseStatistics.InitialSeed; } #endif seed.addAtom(bi->BondPtr->getBeginAtom()); seed.addAtom(bi->BondPtr->getEndAtom()); seed.ExcludedBonds = excludedBonds; // all bonds from first to current seed.addBond(bi->BondPtr); excludedBonds[bi->BondPtr->getIdx()] = true; seed.computeRemainingSize(*QueryMolecule); if (checkIfMatchAndAppend(seed)) { ++QueryMoleculeMatchedBonds; } else { // optionally remove all such bonds from all targets TOPOLOGY // where it exists. //.......... // disable (mark as already processed) mismatched bond in all // seeds for (auto& Seed : Seeds) { Seed.ExcludedBonds[bi->BondPtr->getIdx()] = true; } #ifdef VERBOSE_STATISTICS_ON ++VerboseStatistics.MismatchedInitialSeed; #endif } } } size_t nq = QueryMolecule->getNumAtoms(); for (size_t i = 0; i < nq; i++) { // all query's atoms const Atom* queryMolAtom = QueryMolecule->getAtomWithIdx(i); bool isQueryMolAtomInRing = queryIsAtomInRing(queryMolAtom); unsigned matched = 0; for (std::vector::const_iterator tag = Targets.begin(); tag != Targets.end(); tag++) { size_t nt = tag->Molecule->getNumAtoms(); for (size_t aj = 0; aj < nt; aj++) { if (tag->AtomMatchTable.at(i, aj)) { const Atom* targetMolAtom = tag->Molecule->getAtomWithIdx(aj); bool isTargetMolAtomInRing = queryIsAtomInRing(targetMolAtom); ++matched; if (!(Parameters.BondCompareParameters.CompleteRingsOnly && (isQueryMolAtomInRing || isTargetMolAtomInRing))) { if (!QueryMoleculeSingleMatchedAtom) { QueryMoleculeSingleMatchedAtom = queryMolAtom; } else { QueryMoleculeSingleMatchedAtom = (std::max)( queryMolAtom, QueryMoleculeSingleMatchedAtom, [](const Atom* a, const Atom* b) { if (a->getDegree() != b->getDegree()) { return (a->getDegree() < b->getDegree()); } else if (a->getFormalCharge() != b->getFormalCharge()) { return (a->getFormalCharge() < b->getFormalCharge()); } else if (a->getAtomicNum() != b->getAtomicNum()) { return (a->getAtomicNum() < b->getAtomicNum()); } return (a->getIdx() < b->getIdx()); }); } } break; } } } if (matched >= ThresholdCount) { ++QueryMoleculeMatchedAtoms; } } } namespace { void _DFS(const Graph& g, const boost::dynamic_bitset<>& ringBonds, boost::dynamic_bitset<>& openBonds, unsigned vertex, std::vector apath, std::vector& colors, unsigned fromVertex) { std::pair bonds = boost::out_edges(vertex, g); colors[vertex] = 1; while (bonds.first != bonds.second) { unsigned bIdx = g[*(bonds.first)]; if (!ringBonds[bIdx]) { ++bonds.first; continue; } // find the other vertex: unsigned oVertex = boost::source(*(bonds.first), g); if (oVertex == vertex) { oVertex = boost::target((*bonds.first), g); } ++bonds.first; if (oVertex == fromVertex) { continue; } if (colors[oVertex] == 1) { // closing a cycle for (auto ai = apath.rbegin(); ai != apath.rend() && *ai != oVertex; ++ai) { auto epair = boost::edge(*ai, *(ai + 1), g); CHECK_INVARIANT(epair.second, "edge not found"); openBonds.reset(g[epair.first]); } auto epair = boost::edge(oVertex, *apath.rbegin(), g); CHECK_INVARIANT(epair.second, "edge not found"); openBonds.reset(g[epair.first]); } else if (colors[oVertex] == 0) { std::vector napath = apath; napath.push_back(oVertex); _DFS(g, ringBonds, openBonds, oVertex, napath, colors, vertex); } } colors[vertex] = 2; return; } bool checkIfRingsAreClosed(const Seed& fs) { if (!fs.MoleculeFragment.Bonds.size()) { return true; } bool res = true; const auto& om = fs.MoleculeFragment.Bonds[0]->getOwningMol(); const auto ri = om.getRingInfo(); boost::dynamic_bitset<> ringBonds(om.getNumBonds()); for (const auto bond : fs.MoleculeFragment.Bonds) { if (ri->numBondRings(bond->getIdx())) { ringBonds.set(bond->getIdx()); } } boost::dynamic_bitset<> openBonds = ringBonds; if (openBonds.any()) { std::vector colors(om.getNumAtoms()); for (unsigned bi = 0; bi < openBonds.size(); ++bi) { if (!openBonds[bi]) { continue; } std::pair bonds = boost::edges(fs.Topology); while (bonds.first != bonds.second && fs.Topology[*(bonds.first)] != bi) { ++bonds.first; } CHECK_INVARIANT(bonds.first != bonds.second, "bond not found"); unsigned startVertex = boost::source(*(bonds.first), fs.Topology); std::vector apath = {startVertex}; _DFS(fs.Topology, ringBonds, openBonds, startVertex, apath, colors, om.getNumAtoms() + 1); } res = openBonds.none(); } return res; } bool checkNoLoneRingAtoms(const Seed& fs) { if (!fs.MoleculeFragment.Atoms.size()) { return true; } bool res = true; const auto& om = fs.MoleculeFragment.Atoms[0]->getOwningMol(); const auto ri = om.getRingInfo(); for (const auto& ithRingAtomIndices : ri->atomRings()) { size_t count = 0; for (const auto atom : fs.MoleculeFragment.Atoms) { if (std::find(ithRingAtomIndices.begin(), ithRingAtomIndices.end(), atom->getIdx()) != ithRingAtomIndices.end() && ++count > 1) { break; } } if (count == 1) { res = false; break; } } return res; } } // namespace bool MaximumCommonSubgraph::growSeeds() { bool mcsFound = false; bool canceled = false; unsigned steps = 99999; // steps from last progress callback call. call // it immediately in the beginning // Find MCS -- SDF Seed growing OPTIMISATION (it works in 3 times // faster) while (!Seeds.empty()) { if (getMaxNumberBonds() == QueryMoleculeMatchedBonds) { // MCS == Query break; } ++steps; #ifdef VERBOSE_STATISTICS_ON VerboseStatistics.TotalSteps++; #endif auto si = Seeds.begin(); si->grow(*this); { const Seed& fs = Seeds.front(); // bigger substructure found if (fs.CopyComplete) { bool possibleMCS = false; if ((!Parameters.MaximizeBonds && (fs.getNumAtoms() > getMaxNumberAtoms() || (fs.getNumAtoms() == getMaxNumberAtoms() && fs.getNumBonds() > getMaxNumberBonds())))) { possibleMCS = true; } else if (Parameters.MaximizeBonds && (fs.getNumBonds() > getMaxNumberBonds() || (fs.getNumBonds() == getMaxNumberBonds() && fs.getNumAtoms() > getMaxNumberAtoms()))) { possibleMCS = true; } // #945: test here to see if the MCS actually has all rings closed if (possibleMCS && Parameters.BondCompareParameters.CompleteRingsOnly) { possibleMCS = checkIfRingsAreClosed(fs); } if (possibleMCS && Parameters.AtomCompareParameters.CompleteRingsOnly) { possibleMCS = checkNoLoneRingAtoms(fs); } if (possibleMCS) { mcsFound = true; #ifdef VERBOSE_STATISTICS_ON VerboseStatistics.MCSFoundStep = VerboseStatistics.TotalSteps; VerboseStatistics.MCSFoundTime = nanoClock(); #endif McsIdx.Atoms = fs.MoleculeFragment.Atoms; McsIdx.Bonds = fs.MoleculeFragment.Bonds; McsIdx.AtomsIdx = fs.MoleculeFragment.AtomsIdx; McsIdx.BondsIdx = fs.MoleculeFragment.BondsIdx; if (Parameters.Verbose) { std::cout << VerboseStatistics.TotalSteps << " Seeds:" << Seeds.size() << " MCS " << McsIdx.Atoms.size() << " atoms, " << McsIdx.Bonds.size() << " bonds"; printf(" for %.4lf seconds. bond[0]=%u\n", double(VerboseStatistics.MCSFoundTime - To) / 1000000., McsIdx.BondsIdx[0]); } } } } if (NotSet == si->GrowingStage) { // finished Seeds.erase(si); } if (Parameters.ProgressCallback) { steps = 0; Stat.NumAtoms = getMaxNumberAtoms(); Stat.NumBonds = getMaxNumberBonds(); if (!Parameters.ProgressCallback(Stat, Parameters, Parameters.ProgressCallbackUserData)) { canceled = true; break; } } } if (mcsFound) { // postponed copy of current set of molecules for // threshold < 1. McsIdx.QueryMolecule = QueryMolecule; McsIdx.Targets = Targets; } return !canceled; } // namespace FMCS struct AtomMatch { // for each seed atom (matched) unsigned QueryAtomIdx; unsigned TargetAtomIdx; AtomMatch() : QueryAtomIdx(NotSet), TargetAtomIdx(NotSet) {} }; typedef std::vector AtomMatchSet; std::pair MaximumCommonSubgraph::generateResultSMARTSAndQueryMol( const MCS& mcsIdx) const { // match the result MCS with all targets to check if it is exact match // or template Seed seed; // result MCS seed.ExcludedBonds.resize(mcsIdx.QueryMolecule->getNumBonds(), false); std::vector atomMatchResult(mcsIdx.Targets.size()); std::vector atomIdxMap(mcsIdx.QueryMolecule->getNumAtoms()); std::vector> bondMatchSet( mcsIdx.Bonds.size()); // key is unique BondType std::vector> atomMatchSet( mcsIdx.Atoms.size()); // key is unique atomic number for (auto atom : mcsIdx.Atoms) { atomIdxMap[atom->getIdx()] = seed.getNumAtoms(); seed.addAtom(atom); } for (auto bond : mcsIdx.Bonds) { seed.addBond(bond); } if (!mcsIdx.Bonds.empty()) { unsigned itarget = 0; for (auto tag = mcsIdx.Targets.begin(); tag != mcsIdx.Targets.end(); tag++, itarget++) { match_V_t match; // THERE IS NO Bonds match INFO !!!! bool target_matched = SubstructMatchCustomTable( tag->Topology, *tag->Molecule, seed.Topology, *QueryMolecule, tag->AtomMatchTable, tag->BondMatchTable, &Parameters, &match); if (!target_matched) { continue; } atomMatchResult[itarget].resize(seed.getNumAtoms()); for (match_V_t::const_iterator mit = match.begin(); mit != match.end(); mit++) { unsigned ai = mit->first; // SeedAtomIdx atomMatchResult[itarget][ai].QueryAtomIdx = seed.Topology[mit->first]; atomMatchResult[itarget][ai].TargetAtomIdx = tag->Topology[mit->second]; const Atom* ta = tag->Molecule->getAtomWithIdx(tag->Topology[mit->second]); if (ta && ta->getAtomicNum() != seed.MoleculeFragment.Atoms[ai]->getAtomicNum()) { atomMatchSet[ai][ta->getAtomicNum()] = ta; // add } } // AND BUILD BOND MATCH INFO unsigned bi = 0; for (auto bond = mcsIdx.Bonds.begin(); bond != mcsIdx.Bonds.end(); bond++, bi++) { unsigned i = atomIdxMap[(*bond)->getBeginAtomIdx()]; unsigned j = atomIdxMap[(*bond)->getEndAtomIdx()]; unsigned ti = atomMatchResult[itarget][i].TargetAtomIdx; unsigned tj = atomMatchResult[itarget][j].TargetAtomIdx; const Bond* tb = tag->Molecule->getBondBetweenAtoms(ti, tj); if (tb && (*bond)->getBondType() != tb->getBondType()) { bondMatchSet[bi][tb->getBondType()] = tb; // add } } } } // Generate result's SMARTS // create molecule from MCS for MolToSmarts() auto* mol = new RWMol(); const RingInfo* ri = mcsIdx.QueryMolecule->getRingInfo(); unsigned ai = 0; // SeedAtomIdx for (auto atom = mcsIdx.Atoms.begin(); atom != mcsIdx.Atoms.end(); atom++, ai++) { QueryAtom a; if (Parameters.AtomTyper == MCSAtomCompareIsotopes || Parameters.AtomCompareParameters .MatchIsotope) { // do '[0*]-[0*]-[13*]' for CC[13NH2] a.setQuery(makeAtomIsotopeQuery((int)(*atom)->getIsotope())); } else { // generate [#6] instead of C or c ! a.setQuery(makeAtomNumQuery((*atom)->getAtomicNum())); // for all atomMatchSet[ai] items add atom query to template like // [#6,#17,#9, ... ] for (std::map::const_iterator am = atomMatchSet[ai].begin(); am != atomMatchSet[ai].end(); am++) { a.expandQuery(makeAtomNumQuery(am->second->getAtomicNum()), Queries::COMPOSITE_OR); if (Parameters.AtomCompareParameters.MatchChiralTag && (am->second->getChiralTag() == Atom::CHI_TETRAHEDRAL_CW || am->second->getChiralTag() == Atom::CHI_TETRAHEDRAL_CCW)) { a.setChiralTag(am->second->getChiralTag()); } } } if (Parameters.AtomCompareParameters.RingMatchesRingOnly) { ATOM_EQUALS_QUERY* q = makeAtomInRingQuery(); q->setNegation(!ri->numAtomRings((*atom)->getIdx())); a.expandQuery(q, Queries::COMPOSITE_AND, true); } mol->addAtom(&a, true, false); } unsigned bi = 0; // Seed Idx for (auto bond = mcsIdx.Bonds.begin(); bond != mcsIdx.Bonds.end(); bond++, bi++) { QueryBond b; unsigned beginAtomIdx = atomIdxMap[(*bond)->getBeginAtomIdx()]; unsigned endAtomIdx = atomIdxMap[(*bond)->getEndAtomIdx()]; b.setBeginAtomIdx(beginAtomIdx); b.setEndAtomIdx(endAtomIdx); b.setQuery(makeBondOrderEqualsQuery((*bond)->getBondType())); // add OR template if need for (std::map::const_iterator bm = bondMatchSet[bi].begin(); bm != bondMatchSet[bi].end(); bm++) { b.expandQuery(makeBondOrderEqualsQuery(bm->second->getBondType()), Queries::COMPOSITE_OR); if (Parameters.BondCompareParameters.MatchStereo && bm->second->getStereo() > Bond::STEREOANY) { b.setStereo(bm->second->getStereo()); } } if (Parameters.BondCompareParameters.RingMatchesRingOnly) { BOND_EQUALS_QUERY* q = makeBondIsInRingQuery(); q->setNegation(!ri->numBondRings((*bond)->getIdx())); b.expandQuery(q, Queries::COMPOSITE_AND, true); } mol->addBond(&b, false); } return std::make_pair(MolToSmarts(*mol, true), mol); } bool MaximumCommonSubgraph::addFusedBondQueries(const MCS& mcsIdx, RWMol* rwMol) const { const RingInfo* ri = mcsIdx.QueryMolecule->getRingInfo(); unsigned bi = 0; // Seed Idx bool haveFusedBondQuery = false; std::map> bondRingMembership; const VECT_INT_VECT& br = ri->bondRings(); std::vector mcsRingSize(br.size(), 0); for (size_t ringIdx = 0; ringIdx < br.size(); ++ringIdx) { for (int bondIdx : br[ringIdx]) { bondRingMembership[bondIdx].insert(ringIdx); } } for (auto Bond : mcsIdx.Bonds) { unsigned int bondIdx = Bond->getIdx(); if (!ri->numBondRings(bondIdx)) { continue; } for (size_t ringIdx : bondRingMembership[bondIdx]) { ++mcsRingSize[ringIdx]; } } for (auto bond = mcsIdx.Bonds.begin(); bond != mcsIdx.Bonds.end(); ++bond, ++bi) { unsigned int bondIdx = (*bond)->getIdx(); if (!ri->numBondRings(bondIdx)) { continue; } haveFusedBondQuery = true; for (int ringIdx : bondRingMembership[bondIdx]) { if (mcsRingSize[ringIdx] < br[ringIdx].size()) { continue; } rwMol->getBondWithIdx(bi)->expandQuery( makeBondInRingOfSizeQuery(br[ringIdx].size()), Queries::COMPOSITE_AND, true); } } return haveFusedBondQuery; } bool MaximumCommonSubgraph::createSeedFromMCS(size_t newQueryTarget, Seed& newSeed) { Seed mcs; mcs.ExcludedBonds.resize(McsIdx.QueryMolecule->getNumBonds(), false); std::vector mcsAtomIdxMap(McsIdx.QueryMolecule->getNumAtoms()); for (std::vector::const_iterator atom = McsIdx.Atoms.begin(); atom != McsIdx.Atoms.end(); atom++) { mcsAtomIdxMap[(*atom)->getIdx()] = mcs.addAtom((*atom)); } for (std::vector::const_iterator bond = McsIdx.Bonds.begin(); bond != McsIdx.Bonds.end(); bond++) { mcs.addBond((*bond)); } const Target& newQuery = McsIdx.Targets[newQueryTarget]; match_V_t match; bool target_matched = SubstructMatchCustomTable( newQuery.Topology, *newQuery.Molecule, mcs.Topology, *McsIdx.QueryMolecule, newQuery.AtomMatchTable, newQuery.BondMatchTable, &Parameters, &match); if (!target_matched) { return false; } AtomMatchSet atomMatchResult(mcs.getNumAtoms()); newSeed.ExcludedBonds.resize(newQuery.Molecule->getNumBonds(), false); for (match_V_t::const_iterator mit = match.begin(); mit != match.end(); mit++) { unsigned ai = mit->first; // SeedAtomIdx in mcs seed atomMatchResult[ai].QueryAtomIdx = mcs.Topology[mit->first]; atomMatchResult[ai].TargetAtomIdx = newQuery.Topology[mit->second]; const Atom* ta = newQuery.Molecule->getAtomWithIdx(newQuery.Topology[mit->second]); newSeed.addAtom(ta); } for (std::vector::const_iterator bond = McsIdx.Bonds.begin(); bond != McsIdx.Bonds.end(); bond++) { unsigned i = mcsAtomIdxMap[(*bond)->getBeginAtomIdx()]; unsigned j = mcsAtomIdxMap[(*bond)->getEndAtomIdx()]; unsigned ti = atomMatchResult[i].TargetAtomIdx; unsigned tj = atomMatchResult[j].TargetAtomIdx; const Bond* tb = newQuery.Molecule->getBondBetweenAtoms(ti, tj); newSeed.addBond(tb); } newSeed.computeRemainingSize(*newQuery.Molecule); return true; } MCSResult MaximumCommonSubgraph::find(const std::vector& src_mols) { clear(); MCSResult res; if (src_mols.size() < 2) { throw std::runtime_error( "FMCS. Invalid argument. mols.size() must be at least 2"); } if (Parameters.Threshold > 1.0) { throw std::runtime_error( "FMCS. Invalid argument. Parameter Threshold must be 1.0 or " "less."); } ThresholdCount = (unsigned)ceil((src_mols.size()) * Parameters.Threshold) - 1; // minimal required number of matched targets if (ThresholdCount < 1) { // at least one target ThresholdCount = 1; } if (ThresholdCount > src_mols.size() - 1) { // max all targets ThresholdCount = src_mols.size() - 1; } // AtomCompareParameters.CompleteRingsOnly implies // BondCompareParameters.CompleteRingsOnly if (Parameters.AtomCompareParameters.CompleteRingsOnly) { Parameters.BondCompareParameters.CompleteRingsOnly = true; } // Selecting CompleteRingsOnly option also enables // --ring-matches-ring-only. ring--ring and chain bonds only match chain // bonds. if (Parameters.BondCompareParameters.CompleteRingsOnly) { Parameters.BondCompareParameters.RingMatchesRingOnly = true; } if (Parameters.AtomCompareParameters.CompleteRingsOnly) { Parameters.AtomCompareParameters.RingMatchesRingOnly = true; } unsigned i = 0; boost::dynamic_bitset<> faked_ring_info(src_mols.size()); for (const auto& src_mol : src_mols) { Molecules.push_back(src_mol.get()); if (!Molecules.back()->getRingInfo()->isInitialized()) { Molecules.back()->getRingInfo()->initialize(); // but do not fill out !!! faked_ring_info.set(i); } ++i; } // sort source set of molecules by their 'size' and assume the smallest // molecule as a query std::stable_sort(Molecules.begin(), Molecules.end(), molPtr_NumBondLess); bool areSeedsEmpty = false; for (size_t i = 0; i < Molecules.size() - ThresholdCount && !areSeedsEmpty && !res.Canceled; ++i) { init(); if (Targets.empty()) { break; } MCSFinalMatchCheckFunction tff = Parameters.FinalMatchChecker; if (FinalMatchCheckFunction == Parameters.FinalMatchChecker) { Parameters.FinalMatchChecker = nullptr; // skip final match check for } // initial seed to allow future growing // of it makeInitialSeeds(); Parameters.FinalMatchChecker = tff; // restore final functor if (Parameters.Verbose) { std::cout << "Query " << MolToSmiles(*QueryMolecule) << " " << QueryMolecule->getNumAtoms() << "(" << QueryMoleculeMatchedAtoms << ") atoms, " << QueryMolecule->getNumBonds() << "(" << QueryMoleculeMatchedBonds << ") bonds\n"; } areSeedsEmpty = Seeds.empty(); res.Canceled = !(areSeedsEmpty || growSeeds()); // verify what MCS is equal to one of initial seed for chirality match if ((FinalMatchCheckFunction == Parameters.FinalMatchChecker && 1 == getMaxNumberBonds()) || 0 == getMaxNumberBonds()) { McsIdx = MCS(); // clear makeInitialSeeds(); // check all possible initial seeds if (!areSeedsEmpty) { const Seed& fs = Seeds.front(); if (1 == getMaxNumberBonds() || !(Parameters.BondCompareParameters.CompleteRingsOnly && fs.MoleculeFragment.Bonds.size() == 1 && queryIsBondInRing(fs.MoleculeFragment.Bonds.front()))) { McsIdx.QueryMolecule = QueryMolecule; McsIdx.Atoms = fs.MoleculeFragment.Atoms; McsIdx.Bonds = fs.MoleculeFragment.Bonds; McsIdx.AtomsIdx = fs.MoleculeFragment.AtomsIdx; McsIdx.BondsIdx = fs.MoleculeFragment.BondsIdx; } } if (!McsIdx.QueryMolecule && QueryMoleculeSingleMatchedAtom) { McsIdx.QueryMolecule = QueryMolecule; McsIdx.Atoms = std::vector{QueryMoleculeSingleMatchedAtom}; McsIdx.Bonds = std::vector(); McsIdx.AtomsIdx = std::vector{0}; McsIdx.BondsIdx = std::vector(); } } else if (i + 1 < Molecules.size() - ThresholdCount) { Seed seed; if (createSeedFromMCS(i, seed)) { // MCS is matched with new query Seeds.push_back(seed); } std::swap(Molecules[0], Molecules[i + 1]); // change query molecule for threshold < 1. } } res.NumAtoms = getMaxNumberAtoms(); if (!res.NumAtoms && QueryMoleculeSingleMatchedAtom) { res.NumAtoms = 1; } res.NumBonds = getMaxNumberBonds(); if (res.NumBonds > 0 || QueryMoleculeSingleMatchedAtom) { std::pair smartsQueryMolPair = generateResultSMARTSAndQueryMol(McsIdx); res.SmartsString = smartsQueryMolPair.first; res.QueryMol = ROMOL_SPTR(smartsQueryMolPair.second); if (Parameters.BondCompareParameters.MatchFusedRingsStrict) { addFusedBondQueries(McsIdx, smartsQueryMolPair.second); } } #ifdef VERBOSE_STATISTICS_ON if (Parameters.Verbose) { unsigned itarget = 0; for (std::vector::const_iterator tag = Targets.begin(); res.NumAtoms > 0 && tag != Targets.end(); tag++, itarget++) { MatchVectType match; bool target_matched = SubstructMatch(*tag->Molecule, *res.QueryMol, match); if (!target_matched) { std::cout << "Target " << itarget + 1 << (target_matched ? " matched " : " MISMATCHED ") << MolToSmiles(*tag->Molecule) << "\n"; } } std::cout << "STATISTICS:\n"; std::cout << "Total Growing Steps = " << VerboseStatistics.TotalSteps << ", MCS found on " << VerboseStatistics.MCSFoundStep << " step"; if (VerboseStatistics.MCSFoundTime - To > 0) { printf(", for %.4lf seconds\n", double(VerboseStatistics.MCSFoundTime - To) / 1000000.); } else { std::cout << ", for less than 1 second\n"; } std::cout << "Initial Seeds = " << VerboseStatistics.InitialSeed << ", Mismatched " << VerboseStatistics.MismatchedInitialSeed << "\n"; std::cout << "Inspected Seeds = " << VerboseStatistics.Seed << "\n"; std::cout << "Rejected by BestSize = " << VerboseStatistics.RemainingSizeRejected << "\n"; std::cout << "SingleBondExcluded = " << VerboseStatistics.SingleBondExcluded << "\n"; #ifdef EXCLUDE_WRONG_COMPOSITION std::cout << "Rejected by WrongComposition = " << VerboseStatistics.WrongCompositionRejected << " [ " << VerboseStatistics.WrongCompositionDetected << " Detected ]\n"; #endif std::cout << "MatchCheck Seeds = " << VerboseStatistics.SeedCheck << "\n"; std::cout //<< "\n" << " MatchCalls = " << VerboseStatistics.MatchCall << "\n" << " MatchFound = " << VerboseStatistics.MatchCallTrue << "\n"; std::cout << " fastMatchCalls = " << VerboseStatistics.FastMatchCall << "\n" << " fastMatchFound = " << VerboseStatistics.FastMatchCallTrue << "\n"; std::cout << " slowMatchCalls = " << VerboseStatistics.MatchCall - VerboseStatistics.FastMatchCallTrue << "\n" << " slowMatchFound = " << VerboseStatistics.SlowMatchCallTrue << "\n"; #ifdef VERBOSE_STATISTICS_FASTCALLS_ON std::cout << "AtomFunctorCalls = " << VerboseStatistics.AtomFunctorCalls << "\n"; std::cout << "BondCompareCalls = " << VerboseStatistics.BondCompareCalls << "\n"; #endif std::cout << " DupCacheFound = " << VerboseStatistics.DupCacheFound << " " << VerboseStatistics.DupCacheFoundMatch << " matched, " << VerboseStatistics.DupCacheFound - VerboseStatistics.DupCacheFoundMatch << " mismatched\n"; #ifdef FAST_SUBSTRUCT_CACHE std::cout << "HashCache size = " << HashCache.keyssize() << " keys\n"; std::cout << "HashCache size = " << HashCache.fullsize() << " entries\n"; std::cout << "FindHashInCache = " << VerboseStatistics.FindHashInCache << "\n"; std::cout << "HashFoundInCache= " << VerboseStatistics.HashKeyFoundInCache << "\n"; std::cout << "ExactMatchCalls = " << VerboseStatistics.ExactMatchCall << "\n" << "ExactMatchFound = " << VerboseStatistics.ExactMatchCallTrue << "\n"; #endif } #endif auto pos = faked_ring_info.find_first(); while (pos != boost::dynamic_bitset<>::npos) { src_mols[pos]->getRingInfo()->reset(); pos = faked_ring_info.find_next(pos); } clear(); return res; } bool MaximumCommonSubgraph::checkIfMatchAndAppend(Seed& seed) { #ifdef VERBOSE_STATISTICS_ON ++VerboseStatistics.SeedCheck; #endif #ifdef FAST_SUBSTRUCT_CACHE SubstructureCache::HashKey cacheKey; SubstructureCache::TIndexEntry* cacheEntry = nullptr; #endif bool foundInCache = false; bool foundInDupCache = false; { #ifdef DUP_SUBSTRUCT_CACHE if (DuplicateCache.find(seed.DupCacheKey, foundInCache)) { // duplicate found. skip match() but store both seeds, because they will grow by // different paths !!! #ifdef VERBOSE_STATISTICS_ON VerboseStatistics.DupCacheFound++; VerboseStatistics.DupCacheFoundMatch += foundInCache ? 1 : 0; #endif if (!foundInCache) { // mismatched !!! return false; } } foundInDupCache = foundInCache; #endif #ifdef FAST_SUBSTRUCT_CACHE if (!foundInCache) { #ifdef VERBOSE_STATISTICS_ON ++VerboseStatistics.FindHashInCache; #endif cacheEntry = HashCache.find(seed, QueryAtomLabels, QueryBondLabels, cacheKey); if (cacheEntry) { // possibly found. check for hash collision #ifdef VERBOSE_STATISTICS_ON ++VerboseStatistics.HashKeyFoundInCache; #endif // check hash collisions (time +3%): for (SubstructureCache::TIndexEntry::const_iterator g = cacheEntry->begin(); !foundInCache && g != cacheEntry->end(); g++) { if (g->m_vertices.size() != seed.getNumAtoms() || g->m_edges.size() != seed.getNumBonds()) { continue; } #ifdef VERBOSE_STATISTICS_ON ++VerboseStatistics.ExactMatchCall; #endif // EXACT MATCH foundInCache = SubstructMatchCustomTable( (*g), *QueryMolecule, seed.Topology, *QueryMolecule, QueryAtomMatchTable, QueryBondMatchTable, &Parameters); #ifdef VERBOSE_STATISTICS_ON if (foundInCache) { ++VerboseStatistics.ExactMatchCallTrue; } #endif } } } #endif } bool found = foundInCache; if (!found) { found = match(seed); } Seed* newSeed = nullptr; { if (found) { // Store new generated seed, if found in cache or in // all(- threshold) targets { newSeed = &Seeds.add(seed); newSeed->CopyComplete = false; } #ifdef DUP_SUBSTRUCT_CACHE if (!foundInDupCache && seed.getNumBonds() >= 3) { // only seed with a ring // can be duplicated - // do not store very // small seed in cache DuplicateCache.add(seed.DupCacheKey, true); } #endif #ifdef FAST_SUBSTRUCT_CACHE if (!foundInCache) { HashCache.add(seed, cacheKey, cacheEntry); } #endif } else { #ifdef DUP_SUBSTRUCT_CACHE if (seed.getNumBonds() > 3) { DuplicateCache.add(seed.DupCacheKey, false); // opt. cache mismatched duplicates too } #endif } } if (newSeed) { *newSeed = seed; // non-blocking copy for MULTI_THREAD and best CPU // utilization } return found; // new matched seed has been actually added } bool MaximumCommonSubgraph::match(Seed& seed) { unsigned max_miss = Targets.size() - ThresholdCount; unsigned missing = 0; unsigned passed = 0; unsigned itarget = 0; for (std::vector::const_iterator tag = Targets.begin(); tag != Targets.end(); tag++, itarget++) { #ifdef VERBOSE_STATISTICS_ON { ++VerboseStatistics.MatchCall; } #endif bool target_matched = false; if (!seed.MatchResult.empty() && !seed.MatchResult[itarget].empty()) { target_matched = matchIncrementalFast(seed, itarget); } if (!target_matched) { // slow full match match_V_t match; // THERE IS NO Bonds match INFO !!!! target_matched = SubstructMatchCustomTable( tag->Topology, *tag->Molecule, seed.Topology, *QueryMolecule, tag->AtomMatchTable, tag->BondMatchTable, &Parameters, &match); // save current match info if (target_matched) { if (seed.MatchResult.empty()) { seed.MatchResult.resize(Targets.size()); } seed.MatchResult[itarget].init(seed, match, *QueryMolecule, *tag); } else if (!seed.MatchResult.empty()) { seed.MatchResult[itarget].clear(); //.Empty = true; // == fast clear(); } #ifdef VERBOSE_STATISTICS_ON if (target_matched) { ++VerboseStatistics.SlowMatchCallTrue; } #endif } if (target_matched) { if (++passed >= ThresholdCount) { // it's enough break; } } else { // mismatched if (++missing > max_miss) { break; } } } if (missing <= max_miss) { #ifdef VERBOSE_STATISTICS_ON ++VerboseStatistics.MatchCallTrue; #endif return true; } return false; } // call it for each target, if failed perform full match check bool MaximumCommonSubgraph::matchIncrementalFast(Seed& seed, unsigned itarget) { // use and update results of previous match stored in the seed #ifdef VERBOSE_STATISTICS_ON { ++VerboseStatistics.FastMatchCall; } #endif const Target& target = Targets[itarget]; TargetMatch& match = seed.MatchResult[itarget]; if (match.empty()) { return false; } /* // CHIRALITY: FinalMatchCheck: if(Parameters.AtomCompareParameters.MatchChiralTag || Parameters.FinalMatchChecker) { // TEMP match.clear(); return false; } */ bool matched = false; for (unsigned newBondSeedIdx = match.MatchedBondSize; newBondSeedIdx < seed.getNumBonds(); newBondSeedIdx++) { matched = false; bool atomAdded = false; const Bond* newBond = seed.MoleculeFragment.Bonds[newBondSeedIdx]; unsigned newBondQueryIdx = seed.MoleculeFragment.BondsIdx[newBondSeedIdx]; unsigned newBondSourceAtomSeedIdx; // seed's index of atom from which // new bond was added unsigned newBondAnotherAtomSeedIdx; // seed's index of atom on // another end of the bond unsigned i = seed.MoleculeFragment.SeedAtomIdxMap[newBond->getBeginAtomIdx()]; unsigned j = seed.MoleculeFragment.SeedAtomIdxMap[newBond->getEndAtomIdx()]; if (i >= match.MatchedAtomSize) { // this is new atom in the seed newBondSourceAtomSeedIdx = j; newBondAnotherAtomSeedIdx = i; } else { newBondSourceAtomSeedIdx = i; newBondAnotherAtomSeedIdx = j; } unsigned newBondAnotherAtomQueryIdx = seed.MoleculeFragment.AtomsIdx[newBondAnotherAtomSeedIdx]; unsigned newBondSourceAtomQueryIdx = seed.MoleculeFragment.AtomsIdx[newBondSourceAtomSeedIdx]; unsigned newBondSourceAtomTargetIdx = match.TargetAtomIdx [newBondSourceAtomQueryIdx]; // matched to // newBondSourceAtomSeedIdx const Bond* tb = nullptr; unsigned newBondAnotherAtomTargetIdx = NotSet; if (newBondAnotherAtomSeedIdx < match.MatchedAtomSize) { // new bond between old atoms - both are // matched to exact atoms in the target newBondAnotherAtomTargetIdx = match.TargetAtomIdx[newBondAnotherAtomQueryIdx]; tb = target.Molecule->getBondBetweenAtoms( newBondSourceAtomTargetIdx, newBondAnotherAtomTargetIdx); // target bond between Source and // Another atoms if (tb) { // bond exists, check match with query molecule unsigned tbi = tb->getIdx(); unsigned qbi = seed.MoleculeFragment.BondsIdx[newBondSeedIdx]; if (!match.VisitedTargetBonds[tbi]) { // false if target bond is // already matched matched = target.BondMatchTable.at(qbi, tbi); } } } else { // enumerate all bonds from source atom in the target const Atom* atom = target.Molecule->getAtomWithIdx(newBondSourceAtomTargetIdx); ROMol::OEDGE_ITER beg, end; for (boost::tie(beg, end) = target.Molecule->getAtomBonds(atom); beg != end; beg++) { tb = &*((*target.Molecule)[*beg]); if (!match.VisitedTargetBonds[tb->getIdx()]) { newBondAnotherAtomTargetIdx = tb->getBeginAtomIdx(); if (newBondSourceAtomTargetIdx == newBondAnotherAtomTargetIdx) { newBondAnotherAtomTargetIdx = tb->getEndAtomIdx(); } if (newBondAnotherAtomSeedIdx < seed.LastAddedAtomsBeginIdx // RING: old atom, new atom // in matched substructure // must be new in seed || std::find(seed.MoleculeFragment.AtomsIdx.begin() + seed.LastAddedAtomsBeginIdx, seed.MoleculeFragment.AtomsIdx.begin() + newBondAnotherAtomSeedIdx, newBondAnotherAtomQueryIdx) == seed.MoleculeFragment.AtomsIdx.end()) { if (!match.VisitedTargetAtoms[newBondAnotherAtomTargetIdx]) { continue; } } else { if (match.VisitedTargetAtoms[newBondAnotherAtomTargetIdx]) { continue; } } // check AnotherAtom and bond matched = target.AtomMatchTable.at(newBondAnotherAtomQueryIdx, newBondAnotherAtomTargetIdx) && target.BondMatchTable.at( seed.MoleculeFragment.BondsIdx[newBondSeedIdx], tb->getIdx()); if (matched) { atomAdded = true; break; } } } } if (matched) { // update match history if (atomAdded) { // new atom has been added match.MatchedAtomSize++; match.TargetAtomIdx[newBondAnotherAtomQueryIdx] = newBondAnotherAtomTargetIdx; match.VisitedTargetAtoms[newBondAnotherAtomTargetIdx] = true; } match.MatchedBondSize++; match.TargetBondIdx[newBondQueryIdx] = tb->getIdx(); match.VisitedTargetBonds[tb->getIdx()] = true; } else { match.clear(); return false; } } if (match.MatchedAtomSize != seed.getNumAtoms() || match.MatchedBondSize != seed.getNumBonds()) { // number of unique items !!! match.clear(); return false; } // CHIRALITY: FinalMatchCheck if (matched && Parameters.FinalMatchChecker) { std::vector c1; c1.reserve(4096); std::vector c2; c2.reserve(4096); for (unsigned si = 0; si < seed.getNumAtoms(); si++) { // index in the seed topology c1.push_back(si); c2.push_back(match.TargetAtomIdx[seed.Topology[si]]); } matched = Parameters.FinalMatchChecker(c1.data(), c2.data(), *QueryMolecule, seed.Topology, *target.Molecule, target.Topology, &Parameters); // check CHIRALITY if (!matched) { match.clear(); } } #ifdef VERBOSE_STATISTICS_ON if (matched) { #ifdef MULTI_THREAD Guard statlock(StatisticsMutex); #endif ++VerboseStatistics.FastMatchCallTrue; } #endif return matched; } } // namespace FMCS } // namespace RDKit