#!/usr/bin/env python import gzip import os import sys import unittest import gemmi from common import full_path, get_path_for_tempfile def is_written_to_pdb(line, via_cif): if line[:6] in ['COMPND', 'SOURCE', 'MDLTYP', 'AUTHOR', 'REVDAT', 'JRNL ', 'SEQADV', 'HET ', 'HETNAM', 'FORMUL', 'SITE ', 'MASTER', 'CONECT']: return False # ORIGX is written only if it is a non-identity matrix # SCALE is written only if it is non-default if line[:5] in ['ORIGX', 'SCALE']: return False if line[:6] == 'REMARK' and via_cif and line[7:10] not in [' 2', '350']: return False return True # $ zgrep -P '(^HEADER|CRYST1|SSBOND|ATOM.*SG)' pdb5cfg.ent.gz SSBOND_FRAGMENT = """\ HEADER LYASE 08-JUL-15 5CFG SSBOND 1 CYS A 138 CYS A 138 1555 2555 2.48 CRYST1 86.530 45.120 77.980 90.00 105.15 90.00 C 1 2 1 4 ATOM 166 SG CYS A 65 17.771 -16.223 -10.059 1.00 39.15 S ATOM 396 SG CYS A 93 10.163 -20.624 -11.577 1.00 25.81 S ATOM 444 SG CYS A 99 12.757 -34.484 -2.237 1.00 44.66 S ATOM 744 SG CYS A 138 0.524 -16.872 -1.123 1.00 38.43 S ATOM 1309 SG CYS A 208 10.668 -20.500 -14.891 1.00 25.60 S ATOM 2024 SG CYS A 296 14.668 -4.407 -16.359 1.00 25.70 S ATOM 2128 SG CYS A 310 24.141 -22.158 -17.213 1.00 20.07 S """ # noqa: W291 - trailing whitespace SHORT_SSBOND = """\ SSBOND 1 CYS A 6 CYS A 11 ATOM 37 SG CYS A 6 38.416 25.985 18.085 1.00 14.07 16 S ATOM 69 SG CYS A 11 36.989 25.994 19.570 1.00 14.23 16 S """ # fragment of $CCP4/examples/toxd/toxd_mod_p1.pdb without chain names BLANK_CHAIN_FRAGMENT = """\ CRYST1 50.000 50.00 50.00 90.00 90.00 90.00 ATOM 1 N GLN 1 16.207 -7.425 8.244 1.00 35.23 ATOM 2 CA GLN 1 16.175 -6.019 8.805 1.00 35.13 ATOM 3 C GLN 1 14.890 -5.752 9.585 1.00 33.49 ATOM 4 O GLN 1 14.563 -6.517 10.541 1.00 35.33 ATOM 5 N PRO 2 14.235 -4.626 9.270 1.00 30.05 ATOM 6 CA PRO 2 12.967 -4.255 9.939 1.00 26.63 ATOM 7 C PRO 2 11.955 -5.328 9.522 1.00 24.15 ATOM 8 O PRO 2 12.217 -6.028 8.540 1.00 22.83 ATOM 9 CB PRO 2 12.617 -2.895 9.427 1.00 26.18 ATOM 10 CG PRO 2 13.342 -2.761 8.127 1.00 28.07 ATOM 11 CD PRO 2 14.574 -3.663 8.212 1.00 29.12 ATOM 12 N ARG 3 10.892 -5.446 10.311 1.00 21.86 ATOM 13 CA ARG 3 9.875 -6.426 10.119 1.00 19.82 """ # from https://github.com/project-gemmi/gemmi/issues/37 AMBER_FRAGMENT = """\ ATOM 7 CB VAL 1 -14.375 -11.856 27.866 1.00 0.00 ATOM 8 HB VAL 1 -14.217 -11.118 27.080 1.00 0.00 ATOM 9 CG1 VAL 1 -13.033 -12.232 28.471 1.00 0.00 ATOM 10 HG11 VAL 1 -12.398 -12.673 27.702 1.00 0.00 """ FRAGMENT_WITH_HG = """\ HETATM 4406 HG HG P 693 28.820 31.751 40.919 0.20 25.99 HETATM 4407 HG1 HG P 694 27.455 32.086 39.686 0.20 35.18 """ # from $CCP4/examples/data/insulin.pdb UNORDERED_ALTLOC_FRAGMENT = """\ ATOM 54 CB THR A 8 21.486 49.557 34.680 1.00 17.33 C ANISOU 54 CB THR A 8 2677 1737 2168 -147 348 -77 C ATOM 55 N ASER A 9 22.340 46.718 36.111 0.50 15.13 N ANISOU 55 N ASER A 9 2397 1624 1726 -74 278 -66 N ATOM 56 CA ASER A 9 22.426 45.878 37.287 0.50 14.83 C ANISOU 56 CA ASER A 9 2219 1729 1687 -83 202 -92 C ATOM 57 C ASER A 9 23.045 44.544 36.849 0.50 13.99 C ANISOU 57 C ASER A 9 2072 1701 1540 -107 221 -92 C ATOM 58 O ASER A 9 23.264 44.329 35.667 0.50 13.72 O ANISOU 58 O ASER A 9 2090 1725 1397 -111 331 -63 O ATOM 59 CB ASER A 9 21.034 45.674 37.848 0.50 15.28 C ANISOU 59 CB ASER A 9 2236 1828 1740 0 251 -242 C ATOM 60 OG ASER A 9 20.347 46.901 38.012 0.50 19.98 O ANISOU 60 OG ASER A 9 2799 2271 2521 150 548 90 O ATOM 61 N BSER A 9 22.313 46.728 36.138 0.50 14.71 N ANISOU 61 N BSER A 9 2322 1562 1703 -78 282 -86 N ATOM 62 CA BSER A 9 22.511 45.877 37.303 0.50 14.08 C ANISOU 62 CA BSER A 9 2078 1645 1625 -106 203 -118 C ATOM 63 C BSER A 9 22.951 44.499 36.837 0.50 13.55 C ANISOU 63 C BSER A 9 1971 1640 1538 -117 232 -119 C ATOM 64 O BSER A 9 22.899 44.194 35.639 0.50 12.89 O ANISOU 64 O BSER A 9 1807 1651 1438 -89 415 -90 O ATOM 65 CB BSER A 9 21.244 45.783 38.133 0.50 14.46 C ANISOU 65 CB BSER A 9 1987 1655 1851 -61 227 -231 C ATOM 66 OG BSER A 9 20.199 45.110 37.441 0.50 13.08 O ANISOU 66 OG BSER A 9 1812 1758 1398 -16 450 56 O ATOM 67 N VAL A 10 23.342 43.662 37.798 1.00 13.44 N ANISOU 67 N VAL A 10 2079 1653 1371 -123 133 -148 N """ # noqa: W291 - trailing whitespace def read_lines_and_remove(path): with open(path) as f: out_lines = f.readlines() os.remove(path) return out_lines class TestMol(unittest.TestCase): def test_residue(self): res = gemmi.Residue() self.assertEqual(res.label_seq, None) self.assertEqual(res.seqid.num, None) res.label_seq = 1 self.assertEqual(res.label_seq, 1) res.seqid.num = 2 self.assertEqual(res.seqid.num, 2) res.label_seq = None self.assertEqual(res.label_seq, None) res.seqid.num = None self.assertEqual(res.seqid.num, None) for name in ['HOH', 'hoh', 'DOD', 'h2o', 'H2O', 'WAT']: res.name = name self.assertTrue(res.is_water()) for name in ['SO4', '', 'HO', 'hoho', 'oho']: res.name = name self.assertFalse(res.is_water()) def test_read_5i55(self): cell = gemmi.read_structure(full_path('5i55.cif')).cell self.assertAlmostEqual(cell.a, 29.46) self.assertAlmostEqual(cell.b, 10.51) self.assertAlmostEqual(cell.c, 29.71) self.assertEqual(cell.alpha, 90) self.assertAlmostEqual(cell.beta, 111.98) self.assertEqual(cell.gamma, 90) def test_read_5i55_again(self): block = gemmi.cif.read(full_path('5i55.cif'))[0] st = gemmi.make_structure_from_block(block) self.assertEqual(st.info['_entry.id'], '5I55') center = st[0].calculate_center_of_mass() # PyMOL>print cmd.centerofmass() pymol_ctr = [15.468438991742687, 4.8312495347721045, 20.607400844016833] self.assertTrue(center.dist(gemmi.Position(*pymol_ctr)) < 1e-7) chain, = st[0] a, b, c, d = chain.subchains() ent_a = st.get_entity_of(a) self.assertEqual(ent_a.name, '1') self.assertEqual(ent_a.entity_type, gemmi.EntityType.Polymer) self.assertEqual(ent_a.polymer_type, gemmi.PolymerType.PeptideL) ent_b = st.get_entity_of(b) self.assertEqual(ent_b.entity_type, gemmi.EntityType.NonPolymer) self.assertEqual(ent_b.polymer_type, gemmi.PolymerType.Unknown) ent_d = st.get_entity('4') self.assertEqual(ent_d.subchains, ['D']) self.assertEqual(ent_d.entity_type, gemmi.EntityType.Water) self.assertEqual(ent_d.polymer_type, gemmi.PolymerType.Unknown) output_block = st.make_mmcif_document().sole_block() cnames = block.get_mmcif_category_names() cnames_out = [name for name in output_block.get_mmcif_category_names() if len(output_block.find_mmcif_category(name)) > 0] common_categories = [name for name in cnames_out if name in cnames] common_categories.sort() cc = ['_atom_site.', '_atom_type.', '_cell.', '_chem_comp.', '_diffrn.', '_diffrn_detector.', '_diffrn_radiation.', '_diffrn_source.', '_entity.', '_entity_poly.', '_entity_poly_seq.', '_entry.', '_exptl.', '_exptl_crystal.', '_pdbx_database_status.', '_pdbx_struct_assembly.', '_pdbx_struct_assembly_gen.', '_pdbx_struct_oper_list.', '_refine.', '_reflns.', '_software.', '_struct.', '_struct_asym.', '_struct_conf.', '_struct_conf_type.', '_struct_conn.', '_struct_conn_type.', '_struct_keywords.', '_struct_ref.', '_struct_ref_seq.', '_symmetry.'] self.assertEqual(common_categories, cc) for name in common_categories: cat_in = block.get_mmcif_category(name) cat_out = output_block.get_mmcif_category(name) for tag, values_out in cat_out.items(): if tag == 'ccp4_link_id': continue values_in = cat_in[tag] self.assertEqual(len(values_in), len(values_out)) for (a, b) in zip(values_in, values_out): try: if a == b or abs(float(a) - float(b)) < 2e-4: continue except ValueError: pass self.assertTrue(name+tag in ['_struct_conf.id', '_chem_comp.type']) for name_out in cnames_out: self.assertTrue(name_out in cnames) def test_5i55_predefined_removals(self, clear_entities=False): st = gemmi.read_structure(full_path('5i55.cif')) if clear_entities: self.assertEqual(len(st.entities), 4) st.entities = gemmi.EntityList() self.assertEqual(len(st.entities), 0) lys12 = st[0]['A']['12']['LYS'] count_b = sum(a.altloc == 'B' for a in lys12) model = st[0] # one author-chain and 4 label-chains: AA, 2 x ligand, waters self.assertEqual(len(model), 1) self.assertEqual(len(model.subchains()), 4) n_waters = len(model.get_subchain('D')) n_sites = model.count_atom_sites() occ_sum = model.count_occupancies() self.assertEqual(n_sites, occ_sum + count_b) st.remove_waters() self.assertEqual(len(model.subchains()), 3) self.assertEqual(model.count_atom_sites(), n_sites - n_waters) self.assertEqual(model.count_occupancies(), occ_sum - n_waters) st.remove_empty_chains() self.assertEqual(len(model.subchains()), 3) n_res = len(model['A']) st.remove_ligands_and_waters() self.assertEqual(len(model['A']), n_res - 2) mse = model['A'][0] self.assertEqual(len(mse), 8) mse.trim_to_alanine() self.assertEqual([a.name for a in mse], ['N', 'CA', 'C', 'O', 'CB']) model['A'].trim_to_alanine() # ALA has 5 atoms, except the last one which has OXT (hence +1) expected_count = sum(4 + (r.name != 'GLY') for r in model['A']) + 1 self.assertEqual(model.count_occupancies(), expected_count) st.remove_alternative_conformations() self.assertEqual(model.count_occupancies(), expected_count - 5/2.) self.assertTrue(not any(a.has_altloc() for a in lys12)) def test_5i55_predefined_removals2(self): self.test_5i55_predefined_removals(clear_entities=True) def test_rnase_predefined_removals(self, add_entities=False): st = gemmi.read_structure(full_path('rnase_frag.pdb')) if add_entities: self.assertEqual(len(st.entities), 0) st.assign_subchains() st.ensure_entities() self.assertEqual(len(st.entities), 4) model = st[0] nres_a = len(model['A']) nres_b = len(model['B']) st.remove_ligands_and_waters() # removes SO4 from each chain self.assertEqual(len(model['A']), nres_a - 1) self.assertEqual(len(model['B']), nres_b - 1) self.assertEqual(len(model['W']), 0) self.assertEqual(len(model), 3) st.remove_empty_chains() self.assertEqual(len(model), 2) def test_rnase_predefined_removals2(self): self.test_rnase_predefined_removals(add_entities=True) def test_3dg1(self): st = gemmi.read_structure(full_path('3dg1_final.cif')) self.assertEqual(st.info['_entry.id'], '3DG1') self.assertEqual(len(st[0]), 1) chain = st[0]['A'] for res in chain[-2:]: self.assertEqual(res.name, 'HOH') for res in chain: for atom in res: n_images = st.cell.is_special_position(atom.pos) self.assertEqual(atom.occ * (n_images + 1), 1.0) def test_software_category(self): doc = gemmi.cif.read_file(full_path('3dg1_final.cif')) input_block = doc.sole_block() st = gemmi.make_structure_from_block(input_block) output_block = st.make_mmcif_document().sole_block() software = output_block.get_mmcif_category('_software') del software['date'] assert software['version'][0] is False software['version'][0] = None self.assertEqual(input_block.get_mmcif_category('_software'), software) def test_5moo_header(self): st = gemmi.read_structure(full_path('5moo_header.pdb')) block = st.make_mmcif_document().sole_block() refine = block.get_mmcif_category('_refine') self.assertEqual(refine['ls_d_res_high'], ['1.44', '1.43']) self.assertEqual(refine['pdbx_starting_model'], ['4I8H'] * 2) self.assertEqual(list(block.find_values('_diffrn.ambient_temp')), ['295', '295']) def check_1pfe(self, st): self.assertAlmostEqual(st.cell.a, 39.374) self.assertEqual(st.cell.gamma, 120) self.assertEqual(st.name, '1PFE') self.assertEqual(st.spacegroup_hm, 'P 63 2 2') self.assertEqual(st.info['_entry.id'], '1PFE') self.assertEqual(len(st), 1) self.assertEqual(len(st[0]), 2) label_to_auth_name = {sub.subchain_id(): ch.name for ch in st[0] for sub in ch.subchains()} self.assertEqual(label_to_auth_name, dict(A='A', B='B', C='A', D='B', E='B', F='A', G='B')) self.assertEqual(len(st[0]['A']['1']), 1) chain_a = st[0]['A'] self.assertEqual(chain_a[0].label_seq, 1) self.assertEqual(chain_a['1'][0].seqid.num, 1) b3 = st[0]['B']['3'] self.assertEqual(len(b3), 2) self.assertEqual(repr(b3[0]), repr(st[0]['B'][2])) self.assertEqual(b3[0].name, 'N2C') self.assertEqual(b3[-1].name, 'NCY') chain_c = st[0].get_subchain('C') self.assertEqual(len(chain_c), 1) res_cl = list(chain_c)[0] self.assertEqual(res_cl.name, 'CL') self.assertEqual(len(res_cl), 1) atom_cl = res_cl.find_atom('CL', '*') self.assertAlmostEqual(atom_cl.occ, 0.17) self.assertEqual(atom_cl.element.name, 'Cl') return st def test_previous_next_residue(self): st = gemmi.read_structure(full_path('1pfe.cif.gz'), merge_chain_parts=False) chain_b = st[0]['B'] res = chain_b['6']['ALA'] res = chain_b.next_residue(res) self.assertEqual(res.name, 'NCY') res = chain_b.next_residue(res) self.assertEqual(res.name, 'MVA') self.assertIsNone(chain_b.next_residue(res)) res = chain_b.previous_residue(res) self.assertEqual(res.name, 'NCY') res = chain_b.previous_residue(res) self.assertEqual(res.name, 'ALA') res = chain_b.next_residue(chain_b['7']['N2C']) self.assertEqual(res.name, 'MVA') self.assertEqual(chain_b.previous_residue(res).name, 'NCY') self.assertIsNone(chain_b.previous_residue(chain_b[0])) def test_read_1pfe_cif(self): st = gemmi.read_structure(full_path('1pfe.cif.gz')) self.check_1pfe(st) # write structure to cif and read it back out_name = get_path_for_tempfile(suffix='.cif') st.make_mmcif_document().write_file(out_name) st2 = gemmi.read_structure(out_name) os.remove(out_name) self.check_1pfe(st2) def test_read_1pfe_json(self): st = gemmi.read_structure(full_path('1pfe.json')) self.check_1pfe(st) def test_read_1orc(self): st = gemmi.read_structure(full_path('1orc.pdb')) self.assertEqual(st.resolution, 1.54) self.assertAlmostEqual(st.cell.a, 34.77) self.assertEqual(st.cell.alpha, 90) self.assertEqual(len(st.ncs), 0) model = st[0] self.assertEqual(len(model), 1) self.assertEqual(len(model.subchains()), 2) A = model['A'] waters = A.get_waters() self.assertEqual(len(waters), 57) # FORMUL 2 HOH *57(H2 O) self.assertTrue(all(res.name == 'HOH' for res in waters)) self.assertTrue(A['3']) self.assertFalse(A['0']) self.assertEqual([res.seqid.num for res in A if res.seqid.icode != ' '], [56] * 5) self.assertEqual(len(A['55']), 1) self.assertEqual(len(A['55B']), 0) self.assertEqual(len(A['56B']), 1) self.assertEqual(A['56'][0].seqid.icode, ' ') self.assertEqual(A['56c'][0].seqid.icode, 'C') result = gemmi.align_sequence_to_polymer(st.entities[0].full_sequence, A.get_polymer(), gemmi.PolymerType.Unknown) self.assertEqual(result.cigar_str(), '2I64M5I') def write_and_read(self, st, via_cif): if via_cif: st.setup_entities() st.assign_label_seq_id() doc = st.make_mmcif_document() st = gemmi.make_structure_from_block(doc[0]) out_name = get_path_for_tempfile() st.write_pdb(out_name) return read_lines_and_remove(out_name) def test_read_write_1orc(self, via_cif=False): path = full_path('1orc.pdb') with open(path) as f: expected = [line for line in f if is_written_to_pdb(line, via_cif)] st = gemmi.read_structure(path) out_lines = self.write_and_read(st, via_cif) self.assertEqual(expected, out_lines) def test_read_write_1orc_via_cif(self): self.test_read_write_1orc(via_cif=True) def test_read_write_1lzh(self, via_cif=False): path = full_path('1lzh.pdb.gz') mode = 'rt' if sys.version_info >= (3,) else 'r' with gzip.open(path, mode=mode) as f: expected = [line for line in f if is_written_to_pdb(line, via_cif)] out_lines = self.write_and_read(gemmi.read_structure(path), via_cif) self.assertEqual(expected[0], out_lines[0]) # TITLE lines differ because the text is broken at different word self.assertEqual(expected[3:], out_lines[3:]) def test_read_write_1lzh_via_cif(self): self.test_read_write_1lzh(via_cif=True) def test_ncs_in_1lzh(self): st = gemmi.read_structure(full_path('1lzh.pdb.gz')) self.assertEqual(len(st.ncs), 1) A, B = st[0] for ra, rb in zip(A, B): pa = ra['CA'][0].pos pb = rb['CA'][0].pos image_of_pb = st.ncs[0].apply(pb) self.assertTrue(pa.dist(image_of_pb) < 0.01) def test_read_write_5cvz_final(self, via_cif=False): path = full_path('5cvz_final.pdb') with open(path) as f: expected = [line.rstrip() for line in f if is_written_to_pdb(line, via_cif) # SCALE is not written b/c CRYST1 has more precision. and line[:5] != 'SCALE'] st = gemmi.read_structure(path) if via_cif: # input file w/o TER record -> subchains not setup automatically st.setup_entities() doc = st.make_mmcif_document() st = gemmi.make_structure_from_block(doc[0]) out_lines = self.write_and_read(st, via_cif=False) if via_cif: out_lines = [line for line in out_lines # input file has no REMARK 2, but it gets generated # from REMARK 3 when going pdb->cif->pdb if line[:10] != 'REMARK 2' and line[:5] != 'TER '] self.assertEqual(expected, [line.rstrip() for line in out_lines]) def test_read_write_5cvz_final_via_cif(self): self.test_read_write_5cvz_final(via_cif=True) def test_read_write_4oz7(self, via_cif=False): path = full_path('4oz7.pdb') with open(path) as f: expected = [line for line in f if is_written_to_pdb(line, via_cif)] st = gemmi.read_structure(path, merge_chain_parts=False) out_lines = self.write_and_read(st, via_cif) self.assertEqual(expected, out_lines) def test_read_write_4oz7_via_cif(self): self.test_read_write_4oz7(via_cif=True) def test_pdb_element_names(self): pdb_line = "HETATM 4154 MG MG A 341 1.384 19.340 11.968" \ " 1.00 67.64 MG" for line in [pdb_line, pdb_line.strip(' MG'), pdb_line[:-2] + ' ']: st = gemmi.read_pdb_string(line) residue = st[0].sole_residue('A', gemmi.SeqId(341, ' ')) mg_atom = residue.sole_atom('MG') self.assertEqual(mg_atom.element.name, 'Mg') self.assertAlmostEqual(mg_atom.b_iso, 67.64, delta=1e-6) mg_atom.element = gemmi.Element('Cu') self.assertEqual(mg_atom.element.name, 'Cu') def test_pdb_misaligned_element(self): pdb_line = "ATOM 7 S SUB A 7 34.489 -14.293 34.343" \ " 0.29 43.77 S" for line in [pdb_line, pdb_line + '\n', pdb_line + '\r\n']: st = gemmi.read_pdb_string(line) atom = st[0].sole_residue('A', gemmi.SeqId('7')).sole_atom('S') self.assertEqual(atom.element.name, 'S') def test_pdb_element_names_from_amber(self): st = gemmi.read_pdb_string(AMBER_FRAGMENT) residue = st[0][''][0] self.assertEqual(residue.sole_atom('CB').element, gemmi.Element('C')) self.assertEqual(residue.sole_atom('HB').element, gemmi.Element('H')) self.assertEqual(residue.sole_atom('CG1').element, gemmi.Element('C')) self.assertEqual(residue.sole_atom('HG11').element, gemmi.Element('H')) chain = gemmi.read_pdb_string(FRAGMENT_WITH_HG)[0]['P'] self.assertEqual(chain[0].sole_atom('HG').element, gemmi.Element('Hg')) self.assertEqual(chain[1].sole_atom('HG1').element, gemmi.Element('Hg')) def test_4hhh_frag(self): path = full_path('4hhh_frag.pdb') with open(path) as f: frag = f.read() st = gemmi.read_pdb_string(frag) in_headers = frag.splitlines() out_headers = st.make_pdb_headers().splitlines() self.assertEqual(in_headers[0], out_headers[0]) # the difference 4555 vs 2555 doesn't matter for us self.assertEqual(in_headers[1], out_headers[1].replace(' 4555 ', ' 2555 ')) self.assertEqual(in_headers[2], out_headers[2]) def test_blank_mmcif(self): input_block = gemmi.cif.Block('empty') st = gemmi.make_structure_from_block(input_block) self.assertEqual(st.name, 'empty') output_block = st.make_mmcif_document().sole_block() self.assertEqual(output_block.get_mmcif_category_names(), []) def test_blank_chain(self): st = gemmi.read_pdb_string(BLANK_CHAIN_FRAGMENT) out_name = get_path_for_tempfile() st.write_minimal_pdb(out_name) out = read_lines_and_remove(out_name) # CRYST1 differs (50.000 not 50.00 and added P1). # ATOM lines have added element names. trimmed_out = [line[:66] for line in out[1:]] self.assertEqual(trimmed_out, BLANK_CHAIN_FRAGMENT.splitlines()[1:]) def test_ncs(self): st = gemmi.read_structure(full_path('5cvz_final.pdb')) self.assertEqual(st.resolution, 3.29) first_atom = st[0].sole_residue('A', gemmi.SeqId(17, ' '))[0] ne2 = st[0].sole_residue('A', gemmi.SeqId('63')).sole_atom('NE2') direct_dist = first_atom.pos.dist(ne2.pos) self.assertAlmostEqual(direct_dist, 34.89, delta=1e-2) nearest_image = st.cell.find_nearest_image(first_atom.pos, ne2.pos) nearest_dist = nearest_image.dist() self.assertAlmostEqual(nearest_dist, 8.02, delta=1e-2) # test __getitem__(splice) - unrelated to NCS (sometimes we put # unrelated tests together to avoid the same file again) chain = st[0]['A'] res = chain.next_residue(chain[:5][0]) self.assertTrue(res is chain[1]) def test_ssbond(self): st = gemmi.read_pdb_string(SSBOND_FRAGMENT) out = st.make_pdb_headers() self.assertEqual(out.splitlines(), SSBOND_FRAGMENT.splitlines()[:3]) def test_ssbond_again(self): st = gemmi.read_pdb_string(SSBOND_FRAGMENT) doc = st.make_mmcif_document() st2 = gemmi.make_structure_from_block(doc[0]) out = st2.make_pdb_headers() self.assertEqual(out.splitlines(), SSBOND_FRAGMENT.splitlines()[:3]) def test_short_ssbond(self): st = gemmi.read_pdb_string(SHORT_SSBOND) out = st.make_pdb_headers() self.assertEqual(out.splitlines()[0], SHORT_SSBOND.splitlines()[0] + " 1555 1555 2.06 ") def test_add_remove(self): st = gemmi.read_pdb_string(SSBOND_FRAGMENT) st.add_model(st[0]) st.renumber_models() res = st[0].sole_residue('A', gemmi.SeqId('310')) self.assertEqual(len(res), 1) res.remove_atom('SG', ' ') self.assertEqual(len(res), 0) res = st[1].sole_residue('A', gemmi.SeqId('310')) self.assertEqual(len(res), 1) self.assertEqual(len(st[0]['A']), 7) del st[0]['A'][3] self.assertEqual(len(st[0]['A']), 6) self.assertEqual(len(st), 2) self.assertEqual(st[0].name, '1') del st['1'] self.assertEqual(len(st), 1) self.assertEqual(st[0].name, '2') st.renumber_models() self.assertEqual(st[0].name, '1') st.add_model(st[0]) st.add_model(st[0]) st.renumber_models() self.assertEqual(st[0].name, '1') self.assertEqual(st[-1].name, '3') del st[:-1] self.assertEqual(len(st), 1) self.assertEqual(st[0].name, '3') del st[0] self.assertEqual(len(st), 0) def test_remove2(self): st = gemmi.read_structure(full_path('1pfe.cif.gz')) model = st[0] self.assertEqual(len(model), 2) b = model['B'] self.assertEqual(b[0].name, 'DSN') del b['1']['DSN'] self.assertEqual(b[0].name, 'ALA') del b[0] self.assertEqual(b[0].name, 'N2C') # test append_residues() self.assertEqual(len(b), 20) b.append_residues(b[:5], min_sep=10) self.assertEqual(len(b), 25) # test append_residues() with empty chain new_chain = gemmi.Chain('X') new_chain.append_residues(b[:5], min_sep=1) self.assertEqual(len(new_chain), 5) # test adding and removing chains model.add_chain(new_chain, unique_name=False) model.add_chain(new_chain, unique_name=True) model.add_chain(new_chain) self.assertEqual([chain.name for chain in model], list('ABXCX')) del model[2:] model.add_chain(new_chain, unique_name=True) self.assertEqual([chain.name for chain in model], list('ABX')) del model[-1] del model['A'] self.assertEqual(len(model), 1) self.assertEqual(model[0].name, 'B') doc = st.make_mmcif_document() ref_seq = doc[0].get_mmcif_category('_struct_ref_seq') self.assertEqual(ref_seq['pdbx_strand_id'], ['B']) def test_first_conformer(self): model = gemmi.read_structure(full_path('1pfe.cif.gz'))[0] b = model['B'] self.assertEqual([res.name for res in b if not res.is_water()], ['DSN', 'ALA', 'N2C', 'NCY', 'MVA', 'DSN', 'ALA', 'NCY', 'N2C', 'MVA', 'QUI', 'QUI']) self.assertEqual([res.name for res in b.first_conformer() if not res.is_water()], ['DSN', 'ALA', 'N2C', 'MVA', 'DSN', 'ALA', 'NCY', 'MVA', 'QUI', 'QUI']) polymer = b.get_polymer() self.assertEqual([res.name for res in polymer], ['DSN', 'ALA', 'N2C', 'NCY', 'MVA', 'DSN', 'ALA', 'NCY', 'N2C', 'MVA']) self.assertEqual([res.name for res in polymer.first_conformer()], ['DSN', 'ALA', 'N2C', 'MVA', 'DSN', 'ALA', 'NCY', 'MVA']) self.assertEqual(len(polymer), 10) self.assertEqual(polymer.length(), 8) # The bond between 4 MVA(v) and 5 DSN(s) is betwenn C and OG # (so it's not a peptide bond as expected). Depending on the heuristic # used to determine gaps, this sequence could have a gap in the middle. self.assertEqual(polymer.make_one_letter_sequence(), 'sAXvsAXv') self.assertEqual([res.name for res in b.get_ligands()], ['QUI', 'QUI']) res1 = model.sole_residue('A', gemmi.SeqId('1')) self.assertEqual([atom.name for atom in res1.first_conformer()], [atom.name for atom in res1 if atom.altloc != 'B']) def test_different_altloc_order(self): st = gemmi.read_pdb_string(UNORDERED_ALTLOC_FRAGMENT) chain = st[0]['A'] cb = chain['9']['SER']['CB'] cb_numbers = [atom.serial for atom in cb] self.assertEqual(cb_numbers, [59, 65]) self.assertEqual(cb[0].serial, 59) self.assertEqual(cb[1].serial, 65) self.assertEqual(cb[-1].serial, 65) self.assertEqual(cb[-2].serial, 59) self.assertEqual(chain.count_atom_sites(), 14) self.assertEqual(chain.count_occupancies(), 8) st.remove_alternative_conformations() self.assertEqual(chain.count_atom_sites(), 8) def test_extract_sequence_info(self): st = gemmi.read_structure(full_path('5cvz_final.pdb')) st.add_entity_types() polymer = st[0][0].get_polymer() self.assertEqual(polymer.check_polymer_type(), gemmi.PolymerType.PeptideL) expected = ('AAATSLVYDTCYVTLTERATTSFQRQSFPTLKGMGDRAFQVVAFTIQGVS' 'AAPLMYNARLYNPGDTDSVHATGVQLMGTVPRTVRLTPRVGQNNWFFGNT' 'EEAETILAIDGLVSTKGANAPSNTVIVTGCFRLAPSELQSS') self.assertEqual(polymer.make_one_letter_sequence(), expected) def test_string_align(self): result = gemmi.align_string_sequences(list('AABCC'), list('ABC'), [True]) self.assertEqual(result.score, 0) self.assertEqual(result.cigar_str(), '1I3M1I') self.assertEqual(result.add_gaps('AABCC', 1), 'AABCC') self.assertEqual(result.add_gaps('ABC', 2), '-ABC-') self.assertEqual(result.calculate_identity(), 100.) self.assertEqual(result.calculate_identity(1), 60.) self.assertEqual(result.calculate_identity(2), 100.) self.assertEqual(result.match_string, ' ||| ') result = gemmi.align_string_sequences(list('SIMILARITY'), list('PILLAR'), []) self.assertEqual(result.match_count, 4) self.assertEqual(result.cigar_str(), '3M1I3M3I') def test_hemoglobin_alignment(self): # based on example from # http://biopython.org/DIST/docs/tutorial/Tutorial.html hba_human = ("MVLSPADKTNVKAAWGKVGAHAGEYGAEALERMFLSFPTTKTYFPHFDLSHGSAQ" "VKGHGKKVADALTNAVAHVDDMPNALSALSDLHAHKLRVDPVNFKLLSHCLLVTL" "AAHLPAEFTPAVHASLDKFLASVSTVLTSKYR") hbb_human = ("MVHLTPEEKSAVTALWGKVNVDEVGGEALGRLLVVYPWTQRFFESFGDLSTPDAV" "MGNPKVKAHGKKVLGAFSDGLAHLDNLKGTFATLSELHCDKLHVDPENFRLLGNV" "LVCVLAHHFGKEFTPPVQAAYQKVVAGVANALAHKYH") hba_seq = gemmi.expand_protein_one_letter_string(hba_human) hbb_seq = gemmi.expand_protein_one_letter_string(hbb_human) id_score = gemmi.AlignmentScoring() id_score.match = 1 id_score.mismatch = 0 id_score.gapo = 0 id_score.gape = 0 result = gemmi.align_string_sequences(hba_seq, hbb_seq, [], id_score) # "80 different alignments with the score 72" self.assertEqual(result.score, 72) blosum62 = gemmi.prepare_blosum62_scoring() blosum62.gapo = -9 result = gemmi.align_string_sequences(hba_seq, hbb_seq, [], blosum62) # BioPython equivalent is: # pairwise2.align.globalds(seq1.seq, seq2.seq, blosum62, -10, -1) self.assertEqual(result.score, 290) def test_assembly(self): st = gemmi.read_structure(full_path('1pfe.cif.gz'), merge_chain_parts=False) model = st[0] self.assertEqual([ch.name for ch in model], ['A', 'B', 'A', 'B', 'A', 'B']) a_mass = sum(ch.calculate_mass() for ch in model if ch.name == 'A') b_mass = sum(ch.calculate_mass() for ch in model if ch.name == 'B') model_mass = model.calculate_mass() self.assertAlmostEqual(model_mass, a_mass + b_mass) self.assertEqual(len(st.assemblies), 1) asem = st.assemblies[0] bio = gemmi.make_assembly(asem, model, gemmi.HowToNameCopiedChain.Short) self.assertEqual([ch.name for ch in bio], ['A', 'B', 'C', 'D']) self.assertAlmostEqual(bio.calculate_mass(), 2 * model_mass) self.assertAlmostEqual(bio[0].calculate_mass(), a_mass) self.assertAlmostEqual(bio[1].calculate_mass(), b_mass) self.assertAlmostEqual(bio[2].calculate_mass(), a_mass) self.assertAlmostEqual(bio[3].calculate_mass(), b_mass) bio = gemmi.make_assembly(asem, model, gemmi.HowToNameCopiedChain.AddNumber) self.assertEqual([ch.name for ch in bio], ['A1', 'B1', 'A2', 'B2']) def test_assembly_naming(self): st = gemmi.read_structure(full_path('4oz7.pdb')) model = st[0] a1 = st.assemblies[1] bio = gemmi.make_assembly(a1, model, gemmi.HowToNameCopiedChain.AddNumber) self.assertEqual([ch.name for ch in bio], ['B1']) bio = gemmi.make_assembly(a1, model, gemmi.HowToNameCopiedChain.Dup) self.assertEqual([ch.name for ch in bio], ['B']) bio = gemmi.make_assembly(a1, model, gemmi.HowToNameCopiedChain.Short) self.assertEqual([ch.name for ch in bio], ['B']) if __name__ == '__main__': unittest.main()