// // Copyright (C) 2018 Boran Adas, Google Summer of Code // // @@ All Rights Reserved @@ // This file is part of the RDKit. // The contents are covered by the terms of the BSD license // which is included in the file license.txt, found at the root // of the RDKit source tree. // #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include namespace RDKit { namespace RDKitFP { std::vector *RDKitFPAtomInvGenerator::getAtomInvariants( const ROMol &mol) const { auto *result = new std::vector(); result->reserve(mol.getNumAtoms()); for (ROMol::ConstAtomIterator atomIt = mol.beginAtoms(); atomIt != mol.endAtoms(); ++atomIt) { unsigned int aHash = ((*atomIt)->getAtomicNum() % 128) << 1 | static_cast((*atomIt)->getIsAromatic()); result->push_back(aHash); } return result; } std::string RDKitFPAtomInvGenerator::infoString() const { return "RDKitFPAtomInvGenerator"; } RDKitFPAtomInvGenerator *RDKitFPAtomInvGenerator::clone() const { return new RDKitFPAtomInvGenerator(); } template OutputType RDKitFPArguments::getResultSize() const { return std::numeric_limits::max(); } template std::string RDKitFPArguments::infoString() const { return "RDKitFPArguments minPath=" + std::to_string(d_minPath) + " maxPath=" + std::to_string(d_maxPath) + " useHs=" + std::to_string(df_useHs) + " branchedPaths=" + std::to_string(df_branchedPaths) + " useBondOrder=" + std::to_string(df_useBondOrder); } template RDKitFPArguments::RDKitFPArguments( unsigned int minPath, unsigned int maxPath, bool useHs, bool branchedPaths, bool useBondOrder, bool countSimulation, const std::vector countBounds, std::uint32_t fpSize, std::uint32_t numBitsPerFeature) : FingerprintArguments(countSimulation, countBounds, fpSize, numBitsPerFeature), d_minPath(minPath), d_maxPath(maxPath), df_useHs(useHs), df_branchedPaths(branchedPaths), df_useBondOrder(useBondOrder) { PRECONDITION(minPath != 0, "minPath==0"); PRECONDITION(maxPath >= minPath, "maxPath OutputType RDKitFPAtomEnv::getBitId( FingerprintArguments *, // arguments const std::vector *, // atomInvariants const std::vector *, // bondInvariants const AdditionalOutput *additionalOutput, const bool, // hashResults const std::uint64_t fpSize) const { if (additionalOutput) { OutputType bit_id = d_bitId; if (fpSize) { bit_id %= fpSize; } if (additionalOutput->bitPaths) { (*additionalOutput->bitPaths)[bit_id].push_back(d_bondPath); } if (additionalOutput->atomToBits || additionalOutput->atomCounts) { for (size_t i = 0; i < d_atomsInPath.size(); ++i) { if (d_atomsInPath[i]) { if (additionalOutput->atomToBits) { additionalOutput->atomToBits->at(i).push_back(bit_id); } if (additionalOutput->atomCounts) { additionalOutput->atomCounts->at(i)++; } } } } } return d_bitId; } template std::string RDKitFPEnvGenerator::infoString() const { return "RDKitFPEnvGenerator"; } template std::vector *> RDKitFPEnvGenerator::getEnvironments( const ROMol &mol, FingerprintArguments *arguments, const std::vector *fromAtoms, const std::vector *, // ignoreAtoms const int, // confId const AdditionalOutput *, // additionalOutput const std::vector *atomInvariants, const std::vector *, // bondInvariants const bool // hashResults ) const { PRECONDITION(!atomInvariants || atomInvariants->size() >= mol.getNumAtoms(), "bad atomInvariants size"); auto *rDKitFPArguments = dynamic_cast *>(arguments); std::vector *> result; // get all paths INT_PATH_LIST_MAP allPaths; RDKitFPUtils::enumerateAllPaths( mol, allPaths, fromAtoms, rDKitFPArguments->df_branchedPaths, rDKitFPArguments->df_useHs, rDKitFPArguments->d_minPath, rDKitFPArguments->d_maxPath); // identify query bonds std::vector isQueryBond(mol.getNumBonds(), 0); std::vector bondCache; RDKitFPUtils::identifyQueryBonds(mol, bondCache, isQueryBond); boost::dynamic_bitset<> atomsInPath(mol.getNumAtoms()); for (INT_PATH_LIST_MAP_CI paths = allPaths.begin(); paths != allPaths.end(); paths++) { for (const auto &path : paths->second) { // the bond hashes of the path std::vector bondHashes = RDKitFPUtils::generateBondHashes( mol, atomsInPath, bondCache, isQueryBond, path, rDKitFPArguments->df_useBondOrder, atomInvariants); if (!bondHashes.size()) { continue; } // hash the path to generate a seed: unsigned long seed; if (path.size() > 1) { std::sort(bondHashes.begin(), bondHashes.end()); // finally, we will add the number of distinct atoms in the path at the // end // of the vect. This allows us to distinguish C1CC1 from CC(C)C bondHashes.push_back(static_cast(atomsInPath.count())); seed = gboost::hash_range(bondHashes.begin(), bondHashes.end()); } else { seed = bondHashes[0]; } result.push_back(new RDKitFPAtomEnv( static_cast(seed), atomsInPath, path)); } } return result; } template FingerprintGenerator *getRDKitFPGenerator( unsigned int minPath, unsigned int maxPath, bool useHs, bool branchedPaths, bool useBondOrder, AtomInvariantsGenerator *atomInvariantsGenerator, bool countSimulation, const std::vector countBounds, std::uint32_t fpSize, std::uint32_t numBitsPerFeature, bool ownsAtomInvGen) { AtomEnvironmentGenerator *envGenerator = new RDKitFPEnvGenerator(); FingerprintArguments *arguments = new RDKitFPArguments(minPath, maxPath, useHs, branchedPaths, useBondOrder, countSimulation, countBounds, fpSize, numBitsPerFeature); bool ownsAtomInvGenerator = ownsAtomInvGen; if (!atomInvariantsGenerator) { atomInvariantsGenerator = new RDKitFPAtomInvGenerator(); ownsAtomInvGenerator = true; } return new FingerprintGenerator(envGenerator, arguments, atomInvariantsGenerator, nullptr, ownsAtomInvGenerator, false); } template RDKIT_FINGERPRINTS_EXPORT FingerprintGenerator *getRDKitFPGenerator(unsigned int minPath, unsigned int maxPath, bool useHs, bool branchedPaths, bool useBondOrder, AtomInvariantsGenerator *atomInvariantsGenerator, bool countSimulation, const std::vector countBounds, std::uint32_t fpSize, std::uint32_t numBitsPerFeature, bool ownsAtomInvGen); template RDKIT_FINGERPRINTS_EXPORT FingerprintGenerator *getRDKitFPGenerator(unsigned int minPath, unsigned int maxPath, bool useHs, bool branchedPaths, bool useBondOrder, AtomInvariantsGenerator *atomInvariantsGenerator, bool countSimulation, const std::vector countBounds, std::uint32_t fpSize, std::uint32_t numBitsPerFeature, bool ownsAtomInvGen); } // namespace RDKitFP } // namespace RDKit