// Copyright 2021 Chiral Ltd.
// Licensed under the Apache-2.0 license (https://opensource.org/licenses/Apache-2.0)
// This file may not be copied, modified, or distributed
// except according to those terms.
//! Test on molecules from ChEMBL database
use std::io::prelude::*;
use graph_symmetry::ext::molecule;
use graph_symmetry::core;
static CHEMBL_FILE: &str = "";
fn read_lines
(filename: P) -> std::io::Result>> where P: AsRef, {
let file = std::fs::File::open(filename)?;
Ok(std::io::BufReader::new(file).lines())
}
fn clean_smiles(smiles_origin: &String) -> String {
let mut parts: Vec<&str> = smiles_origin.split('.').collect();
match parts.len() {
1 => String::from(parts[0]),
0 => panic!("Invalid smiles!!!"),
_ => {
parts.sort_by(|a, b| a.len().cmp(&b.len()));
String::from(parts[parts.len()-1])
}
}
}
#[test]
#[ignore]
fn chembl() {
if !std::path::Path::new(CHEMBL_FILE).exists() {
panic!("Can't find the chembl data file! Please set CHEMBL_FILE correctly!")
}
if let Ok(lines) = read_lines(CHEMBL_FILE) {
let mut count: usize = 0;
let mut count_large_molecule: usize = 0;
let skip: usize = 0;
for line in lines {
count += 1;
if count == 1 { continue; } // skip headline
if count <= skip { continue; }
if let Ok(chembl_line) = line {
let parts: Vec<&str> = chembl_line.split('\t').collect();
if parts.len() == 4 {
let smiles: String = clean_smiles(&String::from(parts[1]));
let mol = molecule::Molecule::from_smiles(&smiles);
if mol.atoms.len() == 0 {
continue;
}
if mol.atoms.len() > 250 {
println!("large molecule");
count_large_molecule += 1;
continue; // ignore large molecule
}
println!("\n{}\n{}\n{}", count, smiles, mol.smiles_with_index(&smiles, &vec![]));
let mut orbits_partitioned: Vec = vec![];
let mut orbits_symmetry: Vec = vec![];
let mut numbering: Vec = vec![];
molecule::canonical_numbering_and_symmetry_perception(&mol.atoms, &mut orbits_partitioned, &mut orbits_symmetry, &mut numbering);
if !core::orbit_ops::orbits_equal(&orbits_partitioned, &orbits_symmetry) {
core::orbit_ops::orbits_sort(&mut orbits_partitioned);
core::orbit_ops::orbits_sort(&mut orbits_symmetry);
println!("GIAP failed {}:\n{}\nGIAP orbits {:?}\nCNAP orbits {:?}\n", count, mol.smiles_with_index(&smiles, &vec![]), orbits_partitioned, orbits_symmetry);
}
} else {
println!("Parsing Error on {}", chembl_line)
}
}
}
println!("large molecule count: {}", count_large_molecule);
}
}