use strict; # Script to generate source code for a classifier based on a # config-file. Useful for having default classifiers statically in # memory. It generates a constant classifier that should be placed in # its own source-file. # This interpreter has stricter requirements on the input format than # the one in the C-code, since it is only for internal use. It also # doesn't do any syntax checks, so it will only work on valid config # files. # The script should be run from the same directory as it is stored, # because it depends on the subdir 'rsa/' to gengerate reference SASA # values for RSA output. The file classifier_protor.c in the # source-directory was generated using the command # # perl config2c.pl protor ../share/protor.config # # where 'protor' will be the variable name prefix and the name used in # output generated from this classifier. It is also referred to in # freesasa.h. # Ideally RSA values should be stored in the configuration file, in a # separate section. This will be added in the future. my %types; my %atoms; my %residues; my %rsa; my $name; my $n_classes = 0; my $n_residues = 0; my $atom_flag = 0; my $type_flag = 0; (scalar @ARGV == 2) or die "usage : $0 \n"; my $prefix = shift @ARGV; my $config_file = shift @ARGV; open(my $input, "<$config_file") or die "Can't open $config_file. $!"; while (<$input>) { next if (/^#/); chomp; $_ =~ s/^(.*)#.*/$1/; # strip comments next if (/^\s*$/); if (/^types:/) { $type_flag = 1; $atom_flag = 0; next; } if (/^atoms:/) { $type_flag = 0; $atom_flag = 1; next; } if (/^name:/) { $_ =~ s/name:\s+(\S+)/$1/; $name = $_; next; } if ($type_flag) { my ($name,$radius,$class) = split /\s+/, $_; my $class_code; $types{$name}{radius} = $radius; if ($class =~ /apolar/i) {$class_code = 'FREESASA_ATOM_APOLAR';} elsif ($class =~ /polar/i) {$class_code = 'FREESASA_ATOM_POLAR';} else {die "Only classes 'polar' and 'apolar' allowed";} $types{$name}{class} = $class_code; } if ($atom_flag) { my ($res,$atom,$type) = split /\s+/, $_; $atoms{$res}{$atom} = $type; if (! exists $residues{$res}) { $residues{$res} = $n_residues; ++$n_residues; } } } close($input); # generate RSA values my @pdb = `ls rsa/*.pdb`; foreach my $p (@pdb) { chomp $p; my $name = substr($p,4,3); my @rsa_atoms = `cat $p`; my (@pol_atoms, @apol_atoms); foreach (@rsa_atoms) { next if (! /^ATOM/); next if (! (substr($_,25,1) eq '2')); next if (/H\s*$/); #skip hydrogen my $atom = substr($_,12,4); next if (substr($_,1,1) eq "H"); my $c; $atom =~ s/\s//g; if (exists $atoms{$name}{$atom}) { $c = $types{$atoms{$name}{$atom}}{class}; } elsif (exists $atoms{'ANY'}{$atom}) { $c = $types{$atoms{'ANY'}{$atom}}{class}; } else { die "Atom $name $atom not in classifier"; } push(@apol_atoms, $atom) if ($c =~ /FREESASA_ATOM_APOLAR/); push(@pol_atoms, $atom) if ($c =~ /FREESASA_ATOM_POLAR/); } my $select_total = '--select="res2, resi 2"'; my $select_bb = '--select="bb, resi 2 and name c+n+o+ca"'; my $select_sc = '--select="sc, resi 2 and not name c+n+o+ca"'; my ($select_apol, $select_pol); if (scalar @apol_atoms > 0) { $select_apol = '--select="apol, resi 2 and name ' . join('+',@apol_atoms) . '"'; } if (scalar @pol_atoms > 0) { $select_pol = '--select="pol, resi 2 and name ' . join('+',@pol_atoms) . '"'; } my @data = `../src/freesasa $p -c $config_file -n 1000 $select_total $select_bb $select_sc $select_apol $select_pol`; my %subarea; $subarea{pol} = $subarea{apol} = 0; $subarea{name} = $name; foreach (@data) { if (/^res2 :\ +(\d+.\d+)/) { $subarea{total} = $1; } if (/^bb :\ +(\d+.\d+)/) { $subarea{bb} = $1; } if (/^sc :\ +(\d+.\d+)/) { $subarea{sc} = $1; } if (/^pol :\ +(\d+.\d+)/) { $subarea{pol} = $1; } if (/^apol :\ +(\d+.\d+)/) { $subarea{apol} = $1; } } $rsa{$name} = "\{\"$name\", $subarea{total}, $subarea{bb}, ". "$subarea{sc}, $subarea{pol}, $subarea{apol}, 0\}"; } my @res_array = sort keys %residues; print "#include \"classifier.h\"\n\n"; print "/* Autogenerated code from the script config2c.pl */\n\n"; print "static const char *$prefix\_residue_name[] = {"; print "\"$_\", "foreach (@res_array); print "};\n"; foreach my $res (@res_array) { my @atom_names = keys %{$atoms{$res}}; print "static const char *$prefix\_$res\_atom_name[] = {"; print "\"$_\", " foreach (@atom_names); print "};\n"; print "static double $prefix\_$res\_atom_radius[] = {"; print $types{$atoms{$res}{$_}}{radius},", " foreach (@atom_names); print "};\n"; print "static int $prefix\_$res\_atom_class[] = {"; print $types{$atoms{$res}{$_}}{class},", " foreach (@atom_names); print "};\n"; print "static struct classifier_residue $prefix\_$res\_cfg = {\n"; print " ", scalar keys %{$atoms{$res}}, ",\n"; print " \"$res\",\n"; print " (char**) $prefix\_$res\_atom_name,\n"; print " (double*) $prefix\_$res\_atom_radius,\n"; print " (freesasa_atom_class*) $prefix\_$res\_atom_class,\n"; if (exists $rsa{$res}) { print " $rsa{$res},"; } else { print " {NULL, 0, 0, 0, 0, 0},"; } print"\n};\n\n" } print "static struct classifier_residue *$prefix\_residue_cfg[] = {\n "; foreach my $res (@res_array) { print "&$prefix\_$res\_cfg, "; } print "};\n\n"; print "const freesasa_classifier freesasa_$prefix\_classifier = {\n"; print " $n_residues,"; print " (char**) $prefix\_residue_name,\n"; print " \"$name\",\n"; print " (struct classifier_residue **) $prefix\_residue_cfg,\n"; print "};\n\n";