#!/usr/bin/env ruby # -*- coding: utf-8 -*- # Sync examples between the project's "examples" directory and # documentation targets. require 'tomlrb' require_relative('lib/config-hash') require_relative('lib/parse') # Find Cargo.toml lastdir = nil dir = Dir.pwd while lastdir != dir if File.exist?(path = File.join(dir, 'Cargo.toml')) Object.const_set(:CARGO_TOML, path) break else lastdir = dir dir = File.dirname(dir) end end if ! Object.const_defined?(:CARGO_TOML) raise 'Failed to find Cargo.toml' end CRATE_NAME = Tomlrb.parse(File.read(CARGO_TOML), symbolize_keys: true)[:package][:name] PREFIXES = { '.rs' => %r{[ \t]*//[/!] ?}, '.md' => %r{} } SUPPORTED_FILE_REGEXP = /\.(?:rs|md)\z/ IGNORE_PATTERNS = /\A(?:\.|\.\.)\z|~\z/ DEFAULT_CODE_CHUNK_ATTRS = { strip: ['ignore-blocks', 'inner-comments', 'license-header', 'leading-blank-lines', 'trailing-blank-lines', 'contents-after-ellipsis-comments'] } LICENSE_HEADER_TEXT = File.read(File.join(__dir__, 'data', 'license-header.rs')) # Given the `block` submatch from a BLOCK_REGEXP match, return the part inside # the outermost pair of braces with extraneous indentation removed. def normalize_block_contents(s) # Remove the enclosing braces and strip the common-prefix indent (on # non-blank lines) after normalizing tabs as spaces. lines = s.lines().map { |l| l.tr("\t", ' ' * 8) } strip_length = lines.select { |l| l =~ /[^\n ]/ }.map { |l| l =~ /[^ ]/ }.min || 0 strip_regexp = Regexp.new('\A[ ]{0,%u}' % strip_length) lines.map { |l| l.sub(strip_regexp, '').chop }.join("\n") end # Fetch the contents of a brace-enclosed block (if the input truly is # brace-enclosed). # # Preserves non-brace-enclosed strings as they are; strips all consecutive # directly newlines following the opening brace. def block_inner(s) if s.kind_of?(MatchData) s = s[:tcb_contents] || s[:block] end st = s[0] == '{' ? 1 : 0 st += 1 while s[st] =~ /\n/ en = s[-1] == '}' ? -2 : -1 s[st..en] end # Remove everything that follows an ellipsis comment inside a block def strip_block_contents_after_ellipsis_comments(s) out = s.dup ofs = 0 while ! (m = Rust::R::ELLIPSIS_COMMENT.match(out, ofs)).nil? strip_begin = m.end(0) ofs = strip_begin # Find the containing block block_start = out.rindex('{', m.begin(0)) next if block_start.nil? next if (m = Rust::R::BLOCK.match(out, block_start)).nil? # Preserve whitespcace before the closing brace block_inner_end = m.begin(0) + /\s*}\z/m.match(m[0]).begin(0) + 1 out.slice!(strip_begin...block_inner_end) end out end def strip!(output, what) for item in what case item when 'leading-blank-lines' output.sub!(/\A\n*/, '') when 'trailing-blank-lines' output.sub!(/\n*\z/, '') when 'ignore-blocks', 'ignored-blocks' output.gsub!(Rust::R::tagged_block(/\s*ignore\s*/), '') when 'license-header' output.sub!(LICENSE_HEADER_TEXT, '') when 'inner-comments' output.gsub!(Rust::R::INNER_COMMENT, '') when 'main' output.gsub!(Rust::R::fn('main'), '') when 'contents-after-ellipsis-comments' output.replace(strip_block_contents_after_ellipsis_comments(output)) else raise "Unknown strip item: #{item}" end end end # Retrieve a code sample from a file, transforming it appropriately for an # idiomatic Rust code sample. If `id` is non-`nil`, searches for and includes # only the named block (as tagged with a comment "//` id=#{id}" or # "/*` id=#{id}*/"). def fetch_external_code(attrs) input = File.read(attrs[:file]) output = input.dup tokens = Rust::Parser.parse_string(input) extern_crates = tokens.select { |tok| tok.type == :extern_crate } can_open_code_main = attrs.key?(:strip) && ! attrs[:strip].include?('main') # If there's only a single `extern crate` declaration, and it's the local # project and has not been aliased, expunge it. if extern_crates.length == 1 && extern_crates.first.value.effective_name == CRATE_NAME output.sub!(/#{Regexp.escape(extern_crates.first.string)}\s*\n/, '') can_open_code_main = true unless attrs.fetch(:preserve, []).include?('main') end main_fn = Rust::R.fn('main').match(input) # Strip various things strip!(output, attrs[:strip] || []) if attrs.key?(:id) id_tag = Rust::R::id_tag(attrs[:id]) # If a tagged block was requested, fetch it if ! (m = Rust::R::tagged_block(id_tag).match(input)).nil? contents = normalize_block_contents(block_inner(m)) output.sub(main_fn[0], contents) else $stderr.puts("No such tagged block \"#{attrs[:id]}\" in #{attrs[:file]}") return nil end elsif can_open_code_main output.sub(main_fn[0], normalize_block_contents(block_inner(main_fn[:block]))) else output end end class TextChunk < String end class CodeChunk # Contents of the code chunk -- the actual code, with prefix comments and # opening/closing lines removed. # # @!attribute [r] # @return [String] attr_reader :content # Attributes specified on the code chunk's opening line. Per the CommonMark # spec, the first word after the opening fence has been assigned to the # `:language` attribute. # # @!attribute [r] # @return [Hash] attr_reader :attributes # Common prefix found on all lines of the original code block. # # @!attribute [r] # @return [String] attr_reader :prefix # Raw code block, including all prefixes, opening and closing lines, etc. # # @!attribute [r] # @return [String] attr_reader :orig # Opening line of the raw code block, including prefix, code fence, and any # attributes. # # @!attribute [r] # @return [String] attr_reader :open_line # Closing line of the raw code block, including prefix and code fence. # # @!attribute [r] # @return [String] attr_reader :close_line def initialize(content:, file:, line:, open_line:, close_line:, orig:, prefix: '', attributes: {}) @file = file @line = line @content = content @attributes = attributes @open_line = open_line @close_line = close_line @prefix = prefix @orig = orig end def attributes_to_s '%s %s' % [@attributes[:language], @attributes.reject { |k,_| k == :language }.map { |k, v| '%s=%s' % [k.to_s, v.inspect] }.join(' ')] end # Fetch the code to be used for this block. If the def fetch_code() if @attributes.key?(:file) if (out = fetch_external_code(@attributes)).nil? $stderr.puts("Failed to load external code for code chunk at #{self.location_string}") @content else out end else if @attributes.key?(:id) $stderr.puts("WARNING: no source file specified for fenced code block \"#{@attributes[:id]}\" at #{@file}:#{@line}") end @content end end def to_s if ! @attributes.key?(:file) && ! @attributes.key?(:id) @orig else code = fetch_code() if code.nil? @orig else "#{@open_line}#{code.strip}\n#{@close_line}". gsub(/^(?!\s*#{Regexp.escape(@prefix.strip)})/, @prefix). gsub(/[ ]+$/, '') end end end def location_string '%s:%u' % [@file, @line] end def inspect '#<%s:%#x %s:%u %s>' % [self.class, self.object_id, @file, @line, @attributes.map { |k, v| '%s=%s' % [k.to_s, v.inspect] }.join(' ')] end end def chunk(filename, opts = {}) input = File.read(filename) opts = opts.merge(chunk_opts_for(filename)) chunk_string(input, opts) end # Divide the contents of a file containing Markdown syntax into a series of # alternating TextChunk and CodeChunk instances # # @param [String] input Markdown input string. def chunk_string(input, opts = {}) chunks = [] line = 0 prefix = opts[:prefix] || %r{} open_line_regexp = /^(?#{prefix})(?(?[ ]{0,3})(?`{3,}|~{3,}))\s*(?[^\n]*)\n/ while ! input.empty? pre, open_line, open_post = input.partition(open_line_regexp) unless pre.empty? chunks << TextChunk.new(pre) line += pre.count("\n") end line += open_line.count("\n") if open_line.empty? post = open_post else open_match = Regexp.last_match indent = open_match[:indent] fence = open_match[:fence] info_string = open_match[:info_string].strip open_prefix = open_match[:prefix] # We've got the open-fence of the code block; now grab the body and # close-fence. # Closing fence uses the same character as the opening fence, and must # be at least the same number of characters. It may not be followed by # anything but spaces on the same line, but may be indented up to three # spaces regardless of the indent on the opening fence. close_line_regexp = /(?#{prefix})(?[ ]{0,3}#{fence}#{fence[0]}*)\s*(?=\n|$)/ body, close_line, post = open_post.partition(close_line_regexp) close_match = Regexp.last_match code_line = line line += body.count("\n") + close_line.count("\n") if close_match.nil? binding.pry raise 'Unclosed code block at %s:%u' % [opts[:filename], code_line] end orig = open_line + body + close_line close_prefix = close_match[:prefix] attrs_ary = info_string.split($;, 2) attrs = opts.fetch(:attrs) { ConfigHash.new(defaults: DEFAULT_CODE_CHUNK_ATTRS) }.dup attrs[:language] = attrs_ary.shift if ! attrs_ary.empty? intern_keys = lambda { |hsh| hsh.kind_of?(Hash) ? hsh.collect { |k,v| [k.intern, intern_keys[v]] }.to_h : hsh } begin attrs.merge!(intern_keys[Tomlrb.parse(attrs_ary.shift)]) rescue Tomlrb::ParseError => err $stderr.puts('Error while parsing info string for code block at %s:%u: %s' % [opts[:filename], code_line, err.message]) raise err end end unindent_regexp = Regexp.new('[ ]{0,%u}' % indent.length) body.gsub!(/^#{prefix}#{unindent_regexp}/, '') # If the line prefixes differ, we've probably parsed # something incorrectly. if open_prefix != close_prefix raise 'Prefix mismatch: open fence had prefix %s, close fence had prefix %s' % [open_prefix.inspect, close_prefix.inspect] end chunks << CodeChunk.new(content: body, file: opts[:filename], line: code_line, open_line: open_line, close_line: close_line, prefix: open_prefix, attributes: attrs, orig: orig) end input = post end Chunks.new(chunks) end class Chunks < Array def to_s self.collect { |c| c.to_s }.join('') end end def chunk_opts_for(fname) {filename: fname, prefix: PREFIXES[File.extname(fname)]} end def collect_code_chunks(fname, opts, out = []) if File.directory?(fname) for entry in Dir.foreach(fname).reject { |e| e !~ SUPPORTED_FILE_REGEXP } collect_code_chunks(File.join(fname, entry), opts, out) end elsif File.file?(fname) out.concat(chunk(fname, opts).select { |c| c.kind_of?(CodeChunk) }) end out end class MissingAttributeError < ::RuntimeError def initialize(attr, chunks) @attr = attr @chunks = chunks super('Missing attribute `%s` on %u code block%s' % [@attr.to_s, @chunks.length, @chunks.length == 1 ? '' : 's']) end def to_s if @chunks.length == 1 'Missing attribute `%s` on code block at %s' % [@attr.to_s, @chunks.first.location_string] else "Missing attribute `%s` on %u code blocks:\n %s" % [@attr.to_s, @chunks.length, @chunks.collect { |c| c.location_string }.join("\n ")] end end end # Determine the number of elements in the shared prefix of a set of arrays # # @param [Array>] toks def common_prefix_len(toks) n = toks.map { |t| t.length }.min() is_prefix = lambda { |_n| toks[1..-1].all? { |t| t[0..._n] == toks.first[0..._n] }} while n > 1 && ! is_prefix[n] n -= 1 end n end SOURCE_FILE_TEMPLATE = < 1 chunks_ary = chunks_ary.map { |c| c.map { |t| t.string } } prefix_len = common_prefix_len(chunks_ary) prefix = chunks_ary.first[0...prefix_len] chunks_ary.each { |t| t.shift(prefix_len) } pre = prefix.join('') main = if have_ids 0...chunks.length.collect do |i| id = chunks[i].attributes[:id] "//` id=#{id} { \n" + chunks_ary[i].join('') + "//` }\n" end else chunks_ary.collect { |c| c.join('') } end.join("\n").gsub(/^/, ' ') [pre, main] else c = chunks_ary.first raise 'FIXME' end SOURCE_FILE_TEMPLATE % [pre, main] end def query?(prompt) $stderr.write("#{prompt} ") r = $stdin.gets() =~ /\A[yY]\n\z/ $stderr.puts("\n") r end def transform!(fname, opts) chunks_by_file = collect_code_chunks(fname, opts). reject { |c| c.attributes[:file].nil? }. group_by { |c| c.attributes[:file] } # Offer to write out missing example files. for src, chunks in chunks_by_file.reject { |f, _| File.exist?(f) } if query?("External file \"#{src}\" is missing. Create it?") output = combine_chunks_for_file(chunks) File.open(src, 'w') do |io| io.write(output) end end end if File.directory?(fname) for entry in Dir.foreach(fname).reject { |e| e =~ IGNORE_PATTERNS } transform!(File.join(fname, entry), opts) end elsif File.file?(fname) && fname =~ SUPPORTED_FILE_REGEXP chunks = chunk(fname, opts) if opts[:output].nil? fnew = fname + '.new' File.open(fnew, 'w') { |io| io.write(chunks.to_s) } diff = IO.popen(['diff', '-u', '--color=always', fname, fnew]) { |io| io.read() } if diff.empty? if opts[:verbose] $stderr.puts('%-32s -> no changes' % fname) end File.unlink(fnew) else $stderr.write(diff) if query?('Apply these changes?') File.rename(fnew, fname) else File.unlink(fnew) end end else opts[:output].write(chunks.to_s) end end end if caller[0].nil? require 'optparse' opts = { output: nil, verbose: false, attrs: ConfigHash.new(defaults: DEFAULT_CODE_CHUNK_ATTRS) } mode = :substitute OptionParser.new do |o| o.banner = 'Usage: %s [OPTION]... TARGET...' % $0 o.on('-E', '--extract', 'Extract code samples from each TARGET') { mode = :extract } o.on('-O', '--option KEY=VALUE', 'Pre-load an attribute\'s default for fenced code blocks') do |s| k, v = s.split('=', 2) opts[:attrs][k.intern] = v end o.on('-o', '--output=FILE', 'Write all output to FILE') do |out| opts[:output] = if out == '-' $stdout else File.open(out, 'w') end end o.on('-v', '--verbose', 'Be more chatty when processing files.') { opts[:verbose] = true } o.on('-h', '--help', 'Show this help') { $stdout.puts(o); exit(0); } end.parse! if mode == :substitute ARGV.each do |filename| transform!(filename, opts) end elsif mode == :extract ARGV.each do |filename| (opts[:output] || $stdout).write(fetch_code(opts[:attrs].merge(file: filename))) end else raise 'Unknown mode: %s' % mode.inspect end end