#!/usr/bin/perl -w # # This script checks that everything that exists is documented, # and vice versa. # # It does some pretty grotty ad-hoc parsing of both Rust sources, # and the markdown in the reference manual. # # At the bottom of the file, in __DATA__, is a table of expected # exceptions. # # # With --json it outputs information about what is found where, # as a JSON document. (The schema is not currently documented.) # (In this mode it doesn't perform most of the checks.) # # Implementation note: # # (I experimented with a more formal approach, including having the # `bizarre` version of d-d-macros collect the fixed strings that a keyword # was compared with during parsing. It was difficult to properly # categorise the keyword and the mechanism was confusing and invasive.) use strict; use JSON qw(to_json); our ($whole, $file); our $print_progress = 1; sub progress (@) { if ($print_progress) { print @_; } } sub open_file ($) { ($file) = @_; open F, '<', $file or die "$file: $!"; progress " reading $file\n"; } sub end_file () { F->error and die "$file: $!"; } sub read_file ($) { open_file $_[0]; local $/ = undef; $whole = // die "$file: $!"; end_file(); } our %grokked; our %ref_lines; sub grokked ($$@) { my ($info, $note, @out) = @_; progress " grokked $info [$note]\n @out\n\n"; foreach my $kw (@out) { $grokked{$kw} .= $note; } } sub grok_match ($$$$) { my ($file, $tag, $note, $unreach) = @_; read_file($file); my @out; my $any; while ($whole =~ m{ ( \ + ) // .* \#\# \ maint/check-keywords-documented \ $tag \ \#\# .* \n ( (?: \1 .* \n ) + ) }gmx) { $any++; my $arm = $2; $arm =~ s{ \=\> .* $unreach .* \n }{}x or die "$tag arm not => $unreach"; $arm !~ m{\=\>} or die "----- ARM -----\n$arm\n----- ARM -----\n". "multiple arms pasted together, parsing failed?"; push @out, $arm =~ m{ SD :: (\w+) }gmx; } die "// ## maint/check-keywords-documented $tag ## not found" unless $any; grokked $tag, $note, map { lc } @out; } sub grok_not_in_bool () { grok_match('macros/boolean.rs', 'NotInBool', 'e', qr{void::unreachable}); } sub grok_bool_only () { grok_match('macros/expand.rs', 'BoolOnly', 'b', qr{append_bool_only}); } sub grok_enum () { read_file('macros/syntax.rs'); $whole =~ m{ ^ pub \ enum \ SubstDetails\b .* \{ \n ( (?: \ {4} .* \n | \n )* ) ^ \} $ }mx or die "pub enum SubstDetails not found"; my $enum_body = $1; my @out = $enum_body =~ m{^ \ {4} (\w+) \b}gmx; grokked 'enum variants', 'v', map { lc } @out; } sub grok_cases () { open_file('macros/paste.rs'); my (@outk, @outh); while () { next unless m/^define_cases\! *\{$/..m/^\}$/; push @outk, m{\"(\w+)\"}g; push @outh, m{\bAs(\w+)\b}g; } grokked 'define_cases (kwd)', 'c', @outk; grokked 'define_cases (heck)', 'h', @outh; end_file(); } sub grok_dbg_allkw () { open_file('macros/dbg_allkw.rs'); my (@out_kw, @out_bool); while () { my $dollar_re; my $out; if (m/^\s* expand\!/x) { $dollar_re = qr{\$}; $out = \@out_kw; } elsif (m/^\s* (bool)\!/x) { $dollar_re = qr{}; $out = \@out_bool; } else { next; } my $name_re = qr{\#?\w+}; die "couldn't grok dbg_allkw macro invocation ($dollar_re) $_ ?" unless m/^\ * \w+\! \ * \{ \ * c, \ * $dollar_re ($name_re) \ * \}$/x || m/^\ * \w+\! \ * \{ \ * c, \ * $dollar_re \{ ($name_re) [^{}]* \} \ * \}$/x; next if $1 =~ m{^\#}; push @$out, $1; } grokked 'dbg_all_keywords (expand!)', 'd', @out_kw; grokked 'dbg_all_keywords (bool!)', 'D', @out_bool; end_file(); } sub grok_document () { open_file 'doc/reference.md'; my $in; my %out; my @case_cols; my $lno = 0; my $last_heading_lno; while () { $lno++; if (m{\#\# maint/check-keywords-documented (\w+) \#\#}) { $in = $1; next; } if (m{^\#\# }) { $in = undef; $last_heading_lno = $lno; next; } next unless defined $in; my $out = \@{ $out{$in} }; my $found = sub { my ($div_prefix_char, $lno, @l) = @_; @l = grep { $_ ne 'CASE_CHANGE' } @l; push @$out, @l; foreach my $kw (@l) { push @{ $ref_lines{"$div_prefix_char:$kw"} }, $lno; } }; if ($in eq 'cases') { next unless m{^\|}; my @cols = split m{\|}; if (!@case_cols) { @case_cols = @cols; next; } next if m{^\|-----}; foreach my $i (0..$#cols) { my @l = $cols[$i] =~ m{\`(\w+)\`}g; if ($case_cols[$i] =~ m{\`heck\`}) { push @{ $out{heck} }, @l; } elsif ($case_cols[$i] =~ m{\`CASE_CHANGE\`}) { $found->('x', $last_heading_lno, @l); } } } elsif ($in eq 'conditions') { next unless m{^\#\#\# }; $found->('c', $lno, m{\`(\w+)[^\`]*\`}g); } elsif ($in eq 'expansions') { next unless m{^\#\#\# }; # remove inner { }, repeatedly while (s{ ( \{ [^{}]* ) \{ [^{}]* \} ( .* \} ) }{$1 $2}x) { } $found->('x', $lno, m{\`\$(\w+)\`}g); $found->('x', $lno, m{\`\$\{(\w+)[^\}]+\}\`}g); } else { die "bad maint/check-keywords-documented $in !"; } } end_file(); my $grokked = sub { my ($note, $in) = @_; grokked $in, $note, @{ $out{$in} }; }; $grokked->('E', 'expansions'); $grokked->('C', 'cases'); $grokked->('H', 'heck'); $grokked->('B', 'conditions'); } our @discrepant; our %note_descriptions; sub correspondences () { my %expected; while () { s{^\s+}{}; s{\s+$}{}; next if m{^\#}; next unless m{\S}; if (m{^\: \s+ (\S) \s+ (\S.*)}x) { $note_descriptions{$1} = $2; next; } my ($exp, @l) = split /\s+/; die unless @l; foreach my $kw (@l) { die "duplicate exception for $kw " if $expected{$kw}; $expected{$kw} = $exp; } } foreach my $kw (sort keys %grokked) { $_ = $grokked{$kw}; # Expected combinations, which are not troubling: next if m{^vbDB$}; # ordinary bool-only keyword next if m{^vedE$}; # ordinary expansion-only keyword next if m{^vdDEB$}; # ordinary keyword, both bool and expansion next if m{^hH$}; # heck keyword (only) next if m{^cCC?$}; # case changing (possibly also example) next if m{^chCH$}; # case changing and heck keyword # This one is unusual, is it in the expected exception table? my $exp = $expected{$kw}; if ($exp) { delete $expected{$kw}; next if $_ eq $exp; } push @discrepant, { Kw => $kw, Got => $_, Exp => $exp }; } foreach my $kw (keys %expected) { push @discrepant, { Kw => $kw, Exp => $expected{$kw} }; } } sub print_divider () { print "------------------------------------------------------------\n"; } sub print_discrepancies() { print "discrepancies\n"; foreach my $disc (@discrepant) { printf " keyword %-20s found %s", $disc->{Kw}, $disc->{Got}; if ($disc->{Exp}) { printf " expected %s", $disc->{Exp}; } print "\n"; } print_divider(); flush STDOUT; print STDERR <{Kw}; my $print_notes = sub { my ($heading, $notes, $if_none) = @_; if (!$notes) { printf STDERR "%s\n", $if_none; return; } printf STDERR "%s\n", $heading; if ($notes) { foreach my $note (split //, $notes) { printf STDERR " %s\n", $note_descriptions{$note} // "UNDESCRIBED `$note`"; } foreach my $note (keys %note_descriptions) { next if $notes =~ m/$note/; # elide "not:" for case changing stuff if there's no # hint that this is going to be relevant next if $note =~ m/[hc]/i and $notes !~ m/[hc]/i; printf STDERR " not: %s\n", $note_descriptions{$note}; } } else { printf STDERR " None.\n"; } }; $print_notes->( "Information and properties found in docs and source code:", $disc->{Got}, 'Keyword does not exist.' ); $print_notes->( "Entry in exceptions table -- expected to find:", $disc->{Exp}, "No entry in exceptions table.\n", ); printf STDERR "To suppress, add exceptions with note chars `%s`.\n", $disc->{Got} if $disc->{Got}; printf STDERR "|\n"; } printf STDERR < \%grokked, ref_lines => \%ref_lines, }) or die $!; } our $mode = 'check'; while (@ARGV) { $_ = shift @ARGV; if (m/^--json$/) { $mode = 'json'; } else { die "unsupported option $_"; } } my $mode_fn = ${*::}{"mode_$mode"} or die; $mode_fn->(); __DATA__ # Expected exceptions table # Each line is [ ...] # (The lines starting `:` are used for text in the discrepancy reports.) # # Note characters have the following meanings: # Found in source code: : v enum SubstDetails variant, in syntax.rs : e expansion-only variant, ie NotInBool, according to boolean.rs : c case change keyword, according to from define_cases! in paste.rs : h heck name for a case change, according to define_cases! in paste.rs : b BoolOnly variant, according to expand.rs # Found in reference documentation: : E Documented as expansion keyword : C Documented as case change keyword : H Documented as heck name of a case change : B Documented as condition (BoolOnly keyword) # Found in dbg_all_keywords: : d $dbg_all_keywords dumps the expansion : D $dbg_all_keywords dumps as a boolean # we use placeholders in the docs, and handle case changing # with its own combined SubstDetails variant ve changecase # These are combined SD variants dDEB fvis fdefvis tvis v vis dDEB fmeta vmeta tmeta v xmeta # Doc has both ${for fields ...} and ${for variants ...}, so E is repeated veEE for # User-defined keywords v userdefined