#!/usr/bin/env python3 from argparse import ArgumentParser, FileType import os from typing import IO def main() -> None: parser = ArgumentParser() parser.add_argument("data", type=FileType("r", encoding="utf-8"), help="aaindex file to parse") args = parser.parse_args() name, _ = os.path.splitext(os.path.basename(args.data.name)) name = name.replace("-", "_")[3:] with open(f"{name}.rs", "w", encoding="utf-8") as out: run(args.data, out) def run(data: IO, out: IO) -> None: name, _ = os.path.splitext(out.name) name = name.upper() lines = data.readlines() i_line: list[str] = [] first_aa_line: list[str] = [] second_aa_line: list[str] = [] for i, line in enumerate(lines): if line.startswith("I"): i_line = line.strip().split() first_aa_line = lines[i + 1].strip().split() second_aa_line = lines[i + 2].strip().split() break if line.startswith("//"): return aa_index: dict[str, str] = {} # get rid of the I on the I line i_line.pop(0) for i, aa_pair in enumerate(i_line): first, second = aa_pair.split("/") aa_index[first] = first_aa_line[i] aa_index[second] = second_aa_line[i] print(f'''// Autogenerated by aaindex2rust.py, do not change manually // {name} amino acid featurisation use phf::phf_map; use crate::encodings::get_value; static {name}_MAP: phf::Map = phf_map! {{''', file=out) for aa, raw_weight in aa_index.items(): weight = float(raw_weight) print(f" '{aa}' => {weight:0.2f},", file=out) print(f''' }}; const {name}_MEAN: f64 = {MAGIC_VALUES[name][0]}; const {name}_STDEV: f64 = {MAGIC_VALUES[name][1]}; pub fn get(c: char) -> f64 {{ get_value( &{name}_MAP, c, {name}_MEAN, {name}_STDEV, true, ) }}''', file=out) MAGIC_VALUES: dict[str, tuple[float, float]] = { "ALPHA_HELIX": (1.0, 0.273970801363941), "BETA_SHEET": (1.0285, 0.35896065243979), "BETA_TURN": (0.9915, 0.357718814154358), "HYDROGENBOND": (0.85, 1.01365674663566), "HYDROPHOBICITY_NEU1": (0.057, 0.685318174281115), "HYDROPHOBICITY_NEU2": (-0.003, 0.211898560636924), "HYDROPHOBICITY_NEU3": (0.0945, 0.184457989797135), "ISOELECTRIC": (6.0265, 1.72439344408403), "POLAR_GRANTHAM": (8.325, 2.62237964452136), "POLAR_RADZICKA": (0.2135, 0.879040812476872), "POLAR_ZIMMERMAN": (13.594, 21.3592018577474), "VOLUME": (145.195, 40.0461543097462), } if __name__ == "__main__": main()