from dendropy import TaxonNamespace, Tree from dendropy.calculate import treecompare from random import sample import time num_iter = 100 with open("sim_trees", "r") as f: trees = [i.split("\t") for i in f.readlines()] taxa_sizes = [len(tree) for tree in trees] print("Contract") with open("dendropy-contract-times.csv", "w") as f: for ((t1_str, t2_str), tree_size) in zip(trees, taxa_sizes): num_sub_taxa = 200//2 subsample_list = [[str(i) for i in sample(list(range(200)),num_sub_taxa)] for _ in range(num_iter)] start_time = time.time() for i in range(num_iter): tns = TaxonNamespace() tree1 = Tree.get( data=t1_str, schema="newick", taxon_namespace=tns) x = tree1.extract_tree_with_taxa_labels(subsample_list[i]) f.write(f"{len(tree1)},{((time.time()-start_time)*1000)/num_iter}\n") print("Postord") with open("dendropy-postord-times.csv", "w") as f: for (t1_str, t2_str) in trees: start_time = time.time() for i in range(num_iter): tns = TaxonNamespace() tree1 = Tree.get( data=t1_str, schema="newick", taxon_namespace=tns) x = list(tree1.postorder_node_iter()) f.write(f"{len(tree1)},{((time.time()-start_time)*1000)/num_iter}\n") print("rfs") with open("dendropy-rfs-times.csv", "w") as f: for (t1_str, t2_str) in trees: start_time = time.time() for i in range(num_iter): tns = TaxonNamespace() tree1 = Tree.get( data=t1_str, schema="newick", taxon_namespace=tns) tree2 = Tree.get( data=t2_str, schema="newick", taxon_namespace=tns) tree1.encode_bipartitions() tree2.encode_bipartitions() treecompare.false_positives_and_negatives(tree1, tree2) f.write(f"{len(tree1)},{((time.time()-start_time)*1000)/num_iter}\n")