// // Copyright (C) 2014 Greg Landrum // // @@ All Rights Reserved @@ // This file is part of the RDKit. // The contents are covered by the terms of the BSD license // which is included in the file license.txt, found at the root // of the RDKit source tree. // #include #include #include #include #include #include #include #include #include #include #include #include #include using namespace RDKit; int pcmp(const void *a, const void *b) { if ((*(int *)a) < (*(int *)b)) { return -1; } else if ((*(int *)a) > (*(int *)b)) { return 1; } return 0; } int icmp(int a, int b) { if (a < b) { return -1; } else if (a > b) { return 1; } return 0; } class int_compare_ftor { const int *dp_ints{nullptr}; public: int_compare_ftor(){}; int_compare_ftor(const int *ints) : dp_ints(ints){}; int operator()(int i, int j) const { PRECONDITION(dp_ints, "no ints"); unsigned int ivi = dp_ints[i]; unsigned int ivj = dp_ints[j]; if (ivi < ivj) { return -1; } else if (ivi > ivj) { return 1; } else { return 0; } } }; void qs1(const std::vector> &vects) { BOOST_LOG(rdInfoLog) << "sorting (qsort) vectors" << std::endl; for (auto tv : vects) { int *data = &tv.front(); qsort(data, tv.size(), sizeof(int), pcmp); for (unsigned int j = 1; j < tv.size(); ++j) { TEST_ASSERT(tv[j] >= tv[j - 1]); } } BOOST_LOG(rdInfoLog) << "done: " << vects.size() << std::endl; } void hs1(const std::vector> &vects) { BOOST_LOG(rdInfoLog) << "sorting (hanoi sort) vectors" << std::endl; for (const auto &vect : vects) { const int *data = &vect.front(); int_compare_ftor icmp(data); int *indices = (int *)malloc(vect.size() * sizeof(int)); for (unsigned int j = 0; j < vect.size(); ++j) { indices[j] = j; } int *count = (int *)malloc(vect.size() * sizeof(int)); int *changed = (int *)malloc(vect.size() * sizeof(int)); memset(changed, 1, vect.size() * sizeof(int)); RDKit::hanoisort(indices, vect.size(), count, changed, icmp); for (unsigned int j = 1; j < vect.size(); ++j) { TEST_ASSERT(data[indices[j]] >= data[indices[j - 1]]); } free(count); free(indices); free(changed); } BOOST_LOG(rdInfoLog) << "done: " << vects.size() << std::endl; } void test1() { BOOST_LOG(rdInfoLog) << "Testing the hanoi sort" << std::endl; typedef boost::random::mersenne_twister rng_type; typedef boost::uniform_int<> distrib_type; typedef boost::variate_generator source_type; rng_type generator(42u); const unsigned int nVects = 500000; const unsigned int vectSize = 50; const unsigned int nClasses = 15; distrib_type dist(0, nClasses); source_type randomSource(generator, dist); BOOST_LOG(rdInfoLog) << "populating vectors" << std::endl; std::vector> vects(nVects); for (unsigned int i = 0; i < nVects; ++i) { vects[i] = std::vector(vectSize); for (unsigned int j = 0; j < vectSize; ++j) { vects[i][j] = randomSource(); } } // qs1(vects); hs1(vects); BOOST_LOG(rdInfoLog) << "Done" << std::endl; }; class atomcomparefunctor { Canon::canon_atom *d_atoms{nullptr}; public: atomcomparefunctor(){}; atomcomparefunctor(Canon::canon_atom *atoms) : d_atoms(atoms){}; int operator()(int i, int j) const { PRECONDITION(d_atoms, "no atoms"); unsigned int ivi, ivj; // always start with the current class: ivi = d_atoms[i].index; ivj = d_atoms[j].index; if (ivi < ivj) { return -1; } else if (ivi > ivj) { return 1; } ivi = d_atoms[i].atom->getAtomicNum(); ivj = d_atoms[j].atom->getAtomicNum(); if (ivi < ivj) { return -1; } else if (ivi > ivj) { return 1; } return 0; } }; class atomcomparefunctor2 { Canon::canon_atom *d_atoms{nullptr}; public: atomcomparefunctor2(){}; atomcomparefunctor2(Canon::canon_atom *atoms) : d_atoms(atoms){}; int operator()(int i, int j) const { PRECONDITION(d_atoms, "no atoms"); unsigned int ivi, ivj; // always start with the current class: ivi = d_atoms[i].index; ivj = d_atoms[j].index; if (ivi < ivj) { return -1; } else if (ivi > ivj) { return 1; } // start by comparing degree ivi = d_atoms[i].atom->getDegree(); ivj = d_atoms[j].atom->getDegree(); if (ivi < ivj) { return -1; } else if (ivi > ivj) { return 1; } // move onto atomic number ivi = d_atoms[i].atom->getAtomicNum(); ivj = d_atoms[j].atom->getAtomicNum(); if (ivi < ivj) { return -1; } else if (ivi > ivj) { return 1; } return 0; } }; void test2() { BOOST_LOG(rdInfoLog) << "Testing hanoi with a functor." << std::endl; // make sure that hanoi works with a functor and "molecule data" { std::string smi = "FC1C(Cl)C1C"; RWMol *m = SmilesToMol(smi); TEST_ASSERT(m); std::vector atoms(m->getNumAtoms()); std::vector indices(m->getNumAtoms()); for (unsigned int i = 0; i < m->getNumAtoms(); ++i) { atoms[i].atom = m->getAtomWithIdx(i); atoms[i].index = 0; indices[i] = i; } atomcomparefunctor ftor(&atoms.front()); int *data = &indices.front(); int *count = (int *)malloc(atoms.size() * sizeof(int)); int *changed = (int *)malloc(atoms.size() * sizeof(int)); memset(changed, 1, atoms.size() * sizeof(int)); RDKit::hanoisort(data, atoms.size(), count, changed, ftor); for (unsigned int i = 0; i < m->getNumAtoms(); ++i) { // std::cerr< 0) { TEST_ASSERT(atoms[indices[i]].atom->getAtomicNum() >= atoms[indices[i - 1]].atom->getAtomicNum()); if (atoms[indices[i]].atom->getAtomicNum() != atoms[indices[i - 1]].atom->getAtomicNum()) { TEST_ASSERT(count[indices[i]] != 0); } else { TEST_ASSERT(count[indices[i]] == 0); } } else { TEST_ASSERT(count[indices[i]] != 0); } } delete m; free(count); free(changed); } BOOST_LOG(rdInfoLog) << "Done" << std::endl; }; void test3() { BOOST_LOG(rdInfoLog) << "Testing basic partition refinement." << std::endl; // basic partition refinement { std::string smi = "FC1C(Cl)CCC1C"; RWMol *m = SmilesToMol(smi); TEST_ASSERT(m); std::vector atoms(m->getNumAtoms()); initCanonAtoms(*m, atoms, true); atomcomparefunctor ftor(&atoms.front()); RDKit::Canon::canon_atom *data = &atoms.front(); int *count = (int *)malloc(atoms.size() * sizeof(int)); int *order = (int *)malloc(atoms.size() * sizeof(int)); int activeset; int *next = (int *)malloc(atoms.size() * sizeof(int)); int *changed = (int *)malloc(atoms.size() * sizeof(int)); memset(changed, 1, atoms.size() * sizeof(int)); char *touched = (char *)malloc(atoms.size() * sizeof(char)); memset(touched, 0, atoms.size() * sizeof(char)); RDKit::Canon::CreateSinglePartition(atoms.size(), order, count, data); RDKit::Canon::ActivatePartitions(atoms.size(), order, count, activeset, next, changed); // std::cerr<<"----------------------------------"<getNumAtoms();++i){ // std::cerr<getNumAtoms();++i){ // std::cerr<getNumAtoms();++i){ // std::cerr< atoms(m->getNumAtoms()); initCanonAtoms(*m, atoms, true); atomcomparefunctor2 ftor(&atoms.front()); RDKit::Canon::canon_atom *data = &atoms.front(); int *count = (int *)malloc(atoms.size() * sizeof(int)); int *order = (int *)malloc(atoms.size() * sizeof(int)); int activeset; int *next = (int *)malloc(atoms.size() * sizeof(int)); int *changed = (int *)malloc(atoms.size() * sizeof(int)); memset(changed, 1, atoms.size() * sizeof(int)); char *touched = (char *)malloc(atoms.size() * sizeof(char)); memset(touched, 0, atoms.size() * sizeof(char)); RDKit::Canon::CreateSinglePartition(atoms.size(), order, count, data); RDKit::Canon::ActivatePartitions(atoms.size(), order, count, activeset, next, changed); RDKit::Canon::RefinePartitions(*m, data, ftor, false, order, count, activeset, next, changed, touched); // std::cerr<<"----------------------------------"<getNumAtoms();++i){ // std::cerr<getAtomWithIdx(i); std::vector nbrs(at->getDegree()); unsigned int nbridx = 0; ROMol::OEDGE_ITER beg, end; boost::tie(beg, end) = dp_mol->getAtomBonds(at); while (beg != end) { const Bond *bond = (*dp_mol)[*beg]; nbrs[nbridx] = static_cast(100 * bond->getBondTypeAsDouble()) + dp_atoms[bond->getOtherAtomIdx(i)].index; ++beg; ++nbridx; } std::sort(nbrs.begin(), nbrs.end()); for (nbridx = 0; nbridx < at->getDegree(); ++nbridx) { res += (nbridx + 1) * 1000 + nbrs[nbridx]; } return res; } int basecomp(int i, int j) const { PRECONDITION(dp_atoms, "no atoms"); unsigned int ivi, ivj; // always start with the current class: ivi = dp_atoms[i].index; ivj = dp_atoms[j].index; if (ivi < ivj) { return -1; } else if (ivi > ivj) { return 1; } // start by comparing degree ivi = dp_atoms[i].atom->getDegree(); ivj = dp_atoms[j].atom->getDegree(); if (ivi < ivj) { return -1; } else if (ivi > ivj) { return 1; } // move onto atomic number ivi = dp_atoms[i].atom->getAtomicNum(); ivj = dp_atoms[j].atom->getAtomicNum(); if (ivi < ivj) { return -1; } else if (ivi > ivj) { return 1; } return 0; } public: bool df_useNbrs{false}; atomcomparefunctor3(){}; atomcomparefunctor3(Canon::canon_atom *atoms, const ROMol &m) : dp_atoms(atoms), dp_mol(&m), df_useNbrs(false){}; int operator()(int i, int j) const { PRECONDITION(dp_atoms, "no atoms"); PRECONDITION(dp_mol, "no molecule"); int v = basecomp(i, j); if (v) { return v; } unsigned int ivi, ivj; if (df_useNbrs) { ivi = dp_atoms[i].index + 1 + getAtomNeighborhood(i); ivj = dp_atoms[j].index + 1 + getAtomNeighborhood(j); // std::cerr<<" "< ivj) { return 1; } } return 0; } }; void test4() { BOOST_LOG(rdInfoLog) << "Testing partition refinement with neighbors." << std::endl; // partition refinement with neighbors { std::string smi = "FC1C(Cl)CCC1C"; RWMol *m = SmilesToMol(smi); TEST_ASSERT(m); std::vector atoms(m->getNumAtoms()); initCanonAtoms(*m, atoms, true); atomcomparefunctor3 ftor(&atoms.front(), *m); RDKit::Canon::canon_atom *data = &atoms.front(); int *count = (int *)malloc(atoms.size() * sizeof(int)); int *order = (int *)malloc(atoms.size() * sizeof(int)); int activeset; int *next = (int *)malloc(atoms.size() * sizeof(int)); int *changed = (int *)malloc(atoms.size() * sizeof(int)); memset(changed, 1, atoms.size() * sizeof(int)); char *touched = (char *)malloc(atoms.size() * sizeof(char)); memset(touched, 0, atoms.size() * sizeof(char)); RDKit::Canon::CreateSinglePartition(atoms.size(), order, count, data); RDKit::Canon::ActivatePartitions(atoms.size(), order, count, activeset, next, changed); // std::cerr<<"1----------------------------------"<getNumAtoms();++i){ // std::cerr<getNumAtoms();++i){ // std::cerr<getNumAtoms();++i){ // std::cerr<getNumAtoms(); ++i) { // std::cerr< 0) { TEST_ASSERT(ftor(order[i], order[i - 1]) >= 0); } } delete m; free(count); free(order); free(next); free(changed); free(touched); } { std::string smi = "FC1C(CO)CCC1CC"; RWMol *m = SmilesToMol(smi); TEST_ASSERT(m); std::vector atoms(m->getNumAtoms()); initCanonAtoms(*m, atoms, true); atomcomparefunctor3 ftor(&atoms.front(), *m); RDKit::Canon::canon_atom *data = &atoms.front(); int *count = (int *)malloc(atoms.size() * sizeof(int)); int *order = (int *)malloc(atoms.size() * sizeof(int)); int activeset; int *next = (int *)malloc(atoms.size() * sizeof(int)); int *changed = (int *)malloc(atoms.size() * sizeof(int)); memset(changed, 1, atoms.size() * sizeof(int)); char *touched = (char *)malloc(atoms.size() * sizeof(char)); memset(touched, 0, atoms.size() * sizeof(char)); RDKit::Canon::CreateSinglePartition(atoms.size(), order, count, data); RDKit::Canon::ActivatePartitions(atoms.size(), order, count, activeset, next, changed); RDKit::Canon::RefinePartitions(*m, data, ftor, false, order, count, activeset, next, changed, touched); // std::cerr<<"----------------------------------"<getNumAtoms();++i){ // std::cerr<getNumAtoms(); ++i) { // std::cerr< 0) { // std::cerr<<" ftor: "<= 0); } } delete m; free(count); free(order); free(next); free(changed); free(touched); } { std::string smi = "FC1C(CC)CCC1CC"; RWMol *m = SmilesToMol(smi); TEST_ASSERT(m); std::vector atoms(m->getNumAtoms()); initCanonAtoms(*m, atoms, true); atomcomparefunctor3 ftor(&atoms.front(), *m); RDKit::Canon::canon_atom *data = &atoms.front(); int *count = (int *)malloc(atoms.size() * sizeof(int)); int *order = (int *)malloc(atoms.size() * sizeof(int)); int activeset; int *next = (int *)malloc(atoms.size() * sizeof(int)); int *changed = (int *)malloc(atoms.size() * sizeof(int)); memset(changed, 1, atoms.size() * sizeof(int)); char *touched = (char *)malloc(atoms.size() * sizeof(char)); memset(touched, 0, atoms.size() * sizeof(char)); RDKit::Canon::CreateSinglePartition(atoms.size(), order, count, data); RDKit::Canon::ActivatePartitions(atoms.size(), order, count, activeset, next, changed); // std::cerr<<"----------------------------------"<getNumAtoms();++i){ // std::cerr<getNumAtoms(); ++i) { // std::cerr< 0) { // std::cerr<<" ftor: "<= 0); } } // here we can't manage to get everything unique TEST_ASSERT(order[0] == 4 && count[4] == 2); TEST_ASSERT(order[1] == 9 && count[9] == 0); TEST_ASSERT(order[2] == 0 && count[0] == 1); TEST_ASSERT(order[3] == 3 && count[3] == 2); TEST_ASSERT(order[4] == 8 && count[8] == 0); TEST_ASSERT(order[5] == 5 && count[5] == 2); TEST_ASSERT(order[6] == 6 && count[6] == 0); TEST_ASSERT(order[7] == 2 && count[2] == 2); TEST_ASSERT(order[8] == 7 && count[7] == 0); TEST_ASSERT(order[9] == 1 && count[1] == 1); delete m; free(count); free(order); free(next); free(changed); free(touched); } BOOST_LOG(rdInfoLog) << "Done" << std::endl; }; void test5() { BOOST_LOG(rdInfoLog) << "testing canonicalization via tie breaking." << std::endl; // canonicalization via tie breaking { std::string smi = "FC1C(CC)CCC1CC"; RWMol *m = SmilesToMol(smi); TEST_ASSERT(m); std::vector atoms(m->getNumAtoms()); initCanonAtoms(*m, atoms, true); atomcomparefunctor3 ftor(&atoms.front(), *m); RDKit::Canon::canon_atom *data = &atoms.front(); int *count = (int *)malloc(atoms.size() * sizeof(int)); int *order = (int *)malloc(atoms.size() * sizeof(int)); int activeset; int *next = (int *)malloc(atoms.size() * sizeof(int)); int *changed = (int *)malloc(atoms.size() * sizeof(int)); memset(changed, 1, atoms.size() * sizeof(int)); char *touched = (char *)malloc(atoms.size() * sizeof(char)); memset(touched, 0, atoms.size() * sizeof(char)); RDKit::Canon::CreateSinglePartition(atoms.size(), order, count, data); RDKit::Canon::ActivatePartitions(atoms.size(), order, count, activeset, next, changed); // std::cerr<<"----------------------------------"<getNumAtoms();++i){ // std::cerr<getNumAtoms();++i){ // std::cerr<getNumAtoms(); ++i) { // std::cerr< atomRanks; RDKit::Canon::rankMolAtoms(*m, atomRanks); boost::dynamic_bitset<> seen(m->getNumAtoms()); for (unsigned int i = 0; i < m->getNumAtoms(); ++i) { TEST_ASSERT(!seen[atomRanks[i]]); seen.set(atomRanks[i], 1); } // std::copy(atomRanks.begin(),atomRanks.end(),std::ostream_iterator(std::cerr," ")); // std::cerr< atomRanks; RDKit::Canon::rankMolAtoms(*m, atomRanks); boost::dynamic_bitset<> seen(m->getNumAtoms()); for (unsigned int i = 0; i < m->getNumAtoms(); ++i) { // std::cerr< atomRanks; RDKit::Canon::rankMolAtoms(*m, atomRanks); boost::dynamic_bitset<> seen(m->getNumAtoms()); for (unsigned int i = 0; i < m->getNumAtoms(); ++i) { // std::cerr< atomRanks; RDKit::Canon::rankMolAtoms(*m, atomRanks); boost::dynamic_bitset<> seen(m->getNumAtoms()); for (unsigned int i = 0; i < m->getNumAtoms(); ++i) { // std::cerr< atomRanks; RDKit::Canon::rankMolAtoms(*m, atomRanks, false); boost::dynamic_bitset<> seen(m->getNumAtoms()); for (unsigned int i = 0; i < m->getNumAtoms(); ++i) { // std::cerr<<" "< atomRanks; RDKit::Canon::rankMolAtoms(*m, atomRanks, false); boost::dynamic_bitset<> seen(m->getNumAtoms()); for (unsigned int i = 0; i < m->getNumAtoms(); ++i) { TEST_ASSERT(!seen[atomRanks[i]]); seen.set(atomRanks[i], 1); } delete m; } BOOST_LOG(rdInfoLog) << "Done" << std::endl; }; namespace { ROMol *_renumber(const ROMol *m, std::vector &nVect, const std::string & /*inSmiles*/) { ROMol *nm = MolOps::renumberAtoms(*m, nVect); TEST_ASSERT(nm); TEST_ASSERT(nm->getNumAtoms() == m->getNumAtoms()); TEST_ASSERT(nm->getNumBonds() == m->getNumBonds()); // MolOps::assignStereochemistry(*nm, true, true); // for (unsigned int ii = 0; ii < nm->getNumAtoms(); ++ii) { // if (nm->getAtomWithIdx(ii)->hasProp("_CIPCode")) { // TEST_ASSERT(m->getAtomWithIdx(nVect[ii])->hasProp("_CIPCode")); // std::string ocip = // m->getAtomWithIdx(nVect[ii])->getProp("_CIPCode"); // std::string ncip = // nm->getAtomWithIdx(ii)->getProp("_CIPCode"); // if (ocip != ncip) { // std::cerr << " cip mismatch: " << inSmiles << std::endl; // std::cerr << " " << nVect[ii] << ": " << ocip << " -> " << ii // << ": " << ncip << std::endl; // std::cerr << " " << MolToSmiles(*nm, true) << std::endl; // } // TEST_ASSERT(ocip == ncip); // } // } return nm; } void _renumberTest(const ROMol *m, std::string inSmiles, unsigned int numRenumbers) { PRECONDITION(m, "no molecule"); // std::cerr<<">>>>>>>>>>>>>>>>>>>>>>>>>>>"< idxV(m->getNumAtoms()); for (unsigned int i = 0; i < m->getNumAtoms(); ++i) { idxV[i] = i; } for (unsigned int i = 0; i < numRenumbers; ++i) { // std::cerr<<"---------------------------------------------------"< nVect(idxV); std::shuffle(nVect.begin(), nVect.end(), std::mt19937(0xf00d)); // for(unsigned int j=0;jgetNumAtoms();++j){ // std::cerr<<"Renumber: "<"<setProp(common_properties::_StereochemDone, 1); std::string smi = MolToSmiles(*nm, true); if (smi != osmi) { std::cerr << " input: " << inSmiles << ", Renumbering round: " << i << std::endl; std::cerr << osmi << std::endl; std::cerr << smi << std::endl; m->setProp("_Name", "orig"); std::cerr << MolToMolBlock(*m) << std::endl; nm->setProp("_Name", "renumber"); std::cerr << MolToMolBlock(*nm) << std::endl; for (unsigned int j = 0; j < m->getNumAtoms(); ++j) { std::cerr << "Renumber: " << nVect[j] << "->" << j << std::endl; } } delete nm; TEST_ASSERT(smi == osmi); } } void _renumberTest2(const ROMol *m, std::string inSmiles, unsigned int numRenumbers) { PRECONDITION(m, "no molecule"); unsigned int nAtoms = m->getNumAtoms(); std::vector idxV(m->getNumAtoms()); for (unsigned int i = 0; i < m->getNumAtoms(); ++i) { idxV[i] = i; } for (unsigned int i = 0; i < numRenumbers; ++i) { std::vector nVect(idxV); std::shuffle(nVect.begin(), nVect.end(), std::mt19937(0xf00d)); ROMol *nm = _renumber(m, nVect, inSmiles); UINT_VECT ranks(nAtoms); Canon::rankMolAtoms(*nm, ranks, true); char *ranksSet = (char *)malloc(nAtoms * sizeof(char)); memset(ranksSet, 0, nAtoms * sizeof(char)); for (unsigned int rank : ranks) { ranksSet[rank] = 1; } for (unsigned int i = 0; i < nAtoms; i++) { if (ranksSet[i] != 1) { std::cerr << "Molecule has non unique ranks: " << MolToSmiles(*nm, true) << ", Renumbering round: " << i << std::endl; for (unsigned int i = 0; i < nAtoms; i++) { std::cerr << "AtomIdx: " << i << " Rank: " << ranks[i] << std::endl; } } TEST_ASSERT(ranksSet[i] == 1); } delete nm; free(ranksSet); } } } // namespace void test7a() { BOOST_LOG(rdInfoLog) << "testing some specific ordering problems" << std::endl; std::string rdbase = getenv("RDBASE"); std::string smi1, smi2; { std::string fName = rdbase + "/Code/GraphMol/test_data/canon_reorder1.mol"; RWMol *m = MolFileToMol(fName, false, false); TEST_ASSERT(m); MolOps::sanitizeMol(*m); std::vector atomRanks; // std::cerr <<"\n\n\n\n\n\n\n\n\n\n\n\n>--------------" << std::endl; RDKit::Canon::rankMolAtoms(*m, atomRanks, false); // std::cerr <<"---------------" << std::endl; // for(unsigned int i=0;igetNumAtoms();++i){ // std::cerr<<" "< atomRanks; // std::cerr <<">--------------" << std::endl; RDKit::Canon::rankMolAtoms(*m, atomRanks, false); // std::cerr <<"---------------" << std::endl; // for(unsigned int i=0;igetNumAtoms();++i){ // std::cerr<<" "< atomRanks; // std::cerr<(std::cerr," ")); // std::cerr< atomRanks; // std::cerr<(std::cerr," ")); // std::cerr< atomRanks[5]); TEST_ASSERT(atomRanks[4] > atomRanks[5]); delete m; } { // make sure we aren't breaking ties std::string smi = "C[C@](C)(Cl)I"; RWMol *m = SmilesToMol(smi, 0, 0); TEST_ASSERT(m); MolOps::sanitizeMol(*m); std::vector atomRanks; // std::cerr<(std::cerr," ")); // std::cerr< atomRanks; // std::cerr<(std::cerr," ")); // std::cerr< atomRanks[1]); TEST_ASSERT(atomRanks[0] < atomRanks[9]); TEST_ASSERT(atomRanks[2] == atomRanks[6]); TEST_ASSERT(atomRanks[7] == atomRanks[11]); TEST_ASSERT(atomRanks[3] == atomRanks[5]); TEST_ASSERT(atomRanks[2] > atomRanks[3]); TEST_ASSERT(atomRanks[2] > atomRanks[11]); TEST_ASSERT(atomRanks[3] < atomRanks[11]); delete m; } { // this one was a chiral ranking problem std::string smi = "COC(C)CC(C)(C)O"; RWMol *m = SmilesToMol(smi, 0, 0); TEST_ASSERT(m); MolOps::sanitizeMol(*m); std::vector atomRanks; // std::cerr<(std::cerr," ")); // std::cerr< atomRanks[8]); TEST_ASSERT(atomRanks[5] > atomRanks[2]); delete m; } { // are double bonds being handled correctly? std::string smi = "OC[C@H](F)C=O"; RWMol *m = SmilesToMol(smi, 0, 0); TEST_ASSERT(m); MolOps::sanitizeMol(*m); std::vector atomRanks; // std::cerr<(std::cerr," ")); // std::cerr< atomRanks; // std::cerr<(std::cerr," ")); // std::cerr< atomRanks[5]); TEST_ASSERT(atomRanks[1] > atomRanks[4]); delete m; } { // are double bonds being handled correctly? std::string smi = "CC[C@](C)(CF)C=O"; RWMol *m = SmilesToMol(smi, 0, 0); TEST_ASSERT(m); MolOps::sanitizeMol(*m); std::vector atomRanks; // std::cerr<(std::cerr," ")); // std::cerr< atomRanks[6]); TEST_ASSERT(atomRanks[1] < atomRanks[4]); delete m; } BOOST_LOG(rdInfoLog) << "Finished" << std::endl; } void test10() { BOOST_LOG(rdInfoLog) << "testing unique ranks in w.r.t. renumbering." << std::endl; unsigned int i = 0; while (smis[i] != "EOS") { std::string smiles = smis[i++]; // std::cerr<< ">>>Molecule: " << smiles << std::endl; ROMol *m = SmilesToMol(smiles); TEST_ASSERT(m); MolOps::assignStereochemistry(*m, true); _renumberTest2(m, smiles, 1); delete m; } BOOST_LOG(rdInfoLog) << "Finished" << std::endl; } void test11() { BOOST_LOG(rdInfoLog) << "testing mol fragments." << std::endl; { std::string smi = "C[C@H]([C@H](c1ccccc1)O)N2CCCCC2.C[C@@H]([C@H](c1ccccc1)O)N2CCCCC2"; ROMol *m = SmilesToMol(smi); TEST_ASSERT(m); std::vector vfragsmi; std::vector> frags; unsigned int numFrag = MolOps::getMolFrags(*m, frags); for (unsigned i = 0; i < numFrag; ++i) { std::string smii = MolFragmentToSmiles(*m, frags[i], nullptr, nullptr, nullptr, true); // std::cout << "Test "<< smii << std::endl; vfragsmi.push_back(smii); } std::string smi1 = MolToSmiles(*m, true); delete m; smi = "C[C@@H]([C@H](c1ccccc1)O)N2CCCCC2.C[C@H]([C@H](c1ccccc1)O)N2CCCCC2"; m = SmilesToMol(smi); TEST_ASSERT(m); std::string smi2 = MolToSmiles(*m, true); delete m; // std::cout << smi1 << "\n" << smi2 << std::endl; TEST_ASSERT(smi1 == smi2); } BOOST_LOG(rdInfoLog) << "Finished" << std::endl; } void test12() { BOOST_LOG(rdInfoLog) << "testing protein round-tripping." << std::endl; std::string rdbase = getenv("RDBASE"); { std::string fName = rdbase + "/Code/GraphMol/FileParsers/test_data/2FVD.pdb"; ROMol *m = PDBFileToMol(fName); TEST_ASSERT(m); std::string smi1 = MolToSmiles(*m, true); delete m; m = SmilesToMol(smi1); TEST_ASSERT(m); std::string smi2 = MolToSmiles(*m, true); delete m; // std::cout << smi1 << "\n" << smi2 << std::endl; TEST_ASSERT(smi1 == smi2); } BOOST_LOG(rdInfoLog) << "Finished" << std::endl; } void testGithub1567() { BOOST_LOG(rdInfoLog) << "testing github #1567: Non-canonical result from MolFragmentToSmiles()" << std::endl; { ROMol *m1 = SmilesToMol("CC1CN(Cc2cccc(C)c2)C1"); TEST_ASSERT(m1); int m1Ats_a[6] = {1, 12, 3, 4, 5, 11}; std::vector m1Ats(m1Ats_a, m1Ats_a + 6); int m1Bnds_a[5] = {12, 11, 3, 4, 13}; std::vector m1Bnds(m1Bnds_a, m1Bnds_a + 5); std::string smi1 = MolFragmentToSmiles(*m1, m1Ats, &m1Bnds); ROMol *m2 = SmilesToMol("CN(CCC)Cc1cccc(C)c1"); TEST_ASSERT(m2); int m2Ats_a[6] = {3, 2, 1, 5, 6, 12}; std::vector m2Ats(m2Ats_a, m2Ats_a + 6); int m2Bnds_a[5] = {2, 1, 4, 5, 12}; std::vector m2Bnds(m2Bnds_a, m2Bnds_a + 5); std::string smi2 = MolFragmentToSmiles(*m2, m2Ats, &m2Bnds); TEST_ASSERT(smi1 == smi2); delete m1; delete m2; } BOOST_LOG(rdInfoLog) << "Finished" << std::endl; } void testCanonicalDiastereomers() { // FIX: this is another one that we dno't currently handle properly #if 0 BOOST_LOG(rdInfoLog) << "testing diastereomer problem." << std::endl; auto m1 = "F[C@@H](Cl)[C@H](F)Cl"_smiles; auto m2 = "F[C@H](Cl)[C@@H](F)Cl"_smiles; auto smi1 = MolToSmiles(*m1); auto smi2 = MolToSmiles(*m2); TEST_ASSERT(smi1 != smi2); BOOST_LOG(rdInfoLog) << "Finished" << std::endl; #endif } void testRingsAndDoubleBonds() { // FIX: we don't currently handle this case properly #if 0 BOOST_LOG(rdInfoLog) << "testing some particular ugly para-stereochemistry examples." << std::endl; std::vector smis = {"C/C=C/C=C/C=C/C=C/C", "C/C=C1/C[C@H](O)C1", "C/C=C1/CC[C@H](O)CC1"}; for (const auto smi : smis) { SmilesParserParams ps; ps.sanitize = false; ps.removeHs = false; std::unique_ptr mol(SmilesToMol(smi, ps)); TEST_ASSERT(mol); mol->setProp(common_properties::_StereochemDone, 1); mol->updatePropertyCache(); MolOps::setBondStereoFromDirections(*mol); std::cerr << " " << MolToSmiles(*mol) << std::endl; _renumberTest(mol.get(), smi, 500); std::cerr << " " << MolToSmiles(*mol) << std::endl; } BOOST_LOG(rdInfoLog) << "Finished" << std::endl; #endif } int main() { RDLog::InitLogs(); #if 1 test1(); test2(); test3(); test4(); test5(); test6(); test7a(); test9(); test10(); test11(); test12(); test7(); test8(); testGithub1567(); #endif testRingsAndDoubleBonds(); testCanonicalDiastereomers(); return 0; }