#!/usr/bin/env ruby
# -*- coding: utf-8 -*-

# Sync examples between the project's "examples" directory and
# documentation targets.

require 'tomlrb'
require_relative('lib/config-hash')
require_relative('lib/parse')

# Find Cargo.toml
lastdir = nil
dir = Dir.pwd
while lastdir != dir
  if File.exist?(path = File.join(dir, 'Cargo.toml'))
    Object.const_set(:CARGO_TOML, path)
    break
  else
    lastdir = dir
    dir = File.dirname(dir)
  end
end
if ! Object.const_defined?(:CARGO_TOML)
  raise 'Failed to find Cargo.toml'
end

CRATE_NAME = Tomlrb.parse(File.read(CARGO_TOML), symbolize_keys: true)[:package][:name]
PREFIXES = { '.rs' => %r{[ \t]*//[/!] ?},
             '.md' => %r{} }
SUPPORTED_FILE_REGEXP = /\.(?:rs|md)\z/
IGNORE_PATTERNS = /\A(?:\.|\.\.)\z|~\z/

DEFAULT_CODE_CHUNK_ATTRS = { strip: ['ignore-blocks',
                                     'inner-comments',
                                     'license-header',
                                     'leading-blank-lines',
                                     'trailing-blank-lines',
                                     'contents-after-ellipsis-comments'] }

LICENSE_HEADER_TEXT = File.read(File.join(__dir__, 'data', 'license-header.rs'))

# Given the `block` submatch from a BLOCK_REGEXP match, return the part inside
# the outermost pair of braces with extraneous indentation removed.
def normalize_block_contents(s)
  # Remove the enclosing braces and strip the common-prefix indent (on
  # non-blank lines) after normalizing tabs as spaces.
  lines = s.lines().map { |l| l.tr("\t", ' ' * 8) }
  strip_length = lines.select { |l| l =~ /[^\n ]/ }.map { |l| l =~ /[^ ]/ }.min || 0
  strip_regexp = Regexp.new('\A[ ]{0,%u}' % strip_length)
  lines.map { |l| l.sub(strip_regexp, '').chop }.join("\n")
end

# Fetch the contents of a brace-enclosed block (if the input truly is
# brace-enclosed).
#
# Preserves non-brace-enclosed strings as they are; strips all consecutive
# directly newlines following the opening brace.
def block_inner(s)
  if s.kind_of?(MatchData)
    s = s[:tcb_contents] || s[:block]
  end
  st = s[0] == '{' ? 1 : 0
  st += 1 while s[st] =~ /\n/

  en = s[-1] == '}' ? -2 : -1
  s[st..en]
end

# Remove everything that follows an ellipsis comment inside a block
def strip_block_contents_after_ellipsis_comments(s)
  out = s.dup
  ofs = 0
  while ! (m = Rust::R::ELLIPSIS_COMMENT.match(out, ofs)).nil?
    strip_begin = m.end(0)
    ofs = strip_begin
    # Find the containing block
    block_start = out.rindex('{', m.begin(0))
    next if block_start.nil?
    next if (m = Rust::R::BLOCK.match(out, block_start)).nil?
    # Preserve whitespcace before the closing brace
    block_inner_end = m.begin(0) + /\s*}\z/m.match(m[0]).begin(0) + 1
    out.slice!(strip_begin...block_inner_end)
  end
  out
end


def strip!(output, what)
  for item in what
    case item
    when 'leading-blank-lines'
      output.sub!(/\A\n*/, '')
    when 'trailing-blank-lines'
      output.sub!(/\n*\z/, '')
    when 'ignore-blocks', 'ignored-blocks'
      output.gsub!(Rust::R::tagged_block(/\s*ignore\s*/), '')
    when 'license-header'
      output.sub!(LICENSE_HEADER_TEXT, '')
    when 'inner-comments'
      output.gsub!(Rust::R::INNER_COMMENT, '')
    when 'main'
      output.gsub!(Rust::R::fn('main'), '')
    when 'contents-after-ellipsis-comments'
      output.replace(strip_block_contents_after_ellipsis_comments(output))
    else
      raise "Unknown strip item: #{item}"
    end
  end
end


# Retrieve a code sample from a file, transforming it appropriately for an
# idiomatic Rust code sample.  If `id` is non-`nil`, searches for and includes
# only the named block (as tagged with a comment "//` id=#{id}" or
# "/*` id=#{id}*/").
def fetch_external_code(attrs)
  input = File.read(attrs[:file])
  output = input.dup
  tokens = Rust::Parser.parse_string(input)
  extern_crates = tokens.select { |tok| tok.type == :extern_crate }

  can_open_code_main = attrs.key?(:strip) && ! attrs[:strip].include?('main')

  # If there's only a single `extern crate` declaration, and it's the local
  # project and has not been aliased, expunge it.
  if extern_crates.length == 1 && extern_crates.first.value.effective_name == CRATE_NAME
    output.sub!(/#{Regexp.escape(extern_crates.first.string)}\s*\n/, '')
    can_open_code_main = true unless attrs.fetch(:preserve, []).include?('main')
  end

  main_fn = Rust::R.fn('main').match(input)

  # Strip various things
  strip!(output, attrs[:strip] || [])

  if attrs.key?(:id)
    id_tag = Rust::R::id_tag(attrs[:id])

    # If a tagged block was requested, fetch it
    if ! (m = Rust::R::tagged_block(id_tag).match(input)).nil?
      contents = normalize_block_contents(block_inner(m))
      output.sub(main_fn[0], contents)
    else
      $stderr.puts("No such tagged block \"#{attrs[:id]}\" in #{attrs[:file]}")
      return nil
    end
  elsif can_open_code_main
    output.sub(main_fn[0], normalize_block_contents(block_inner(main_fn[:block])))
  else
    output
  end
end

class TextChunk < String
end
class CodeChunk
  # Contents of the code chunk -- the actual code, with prefix comments and
  # opening/closing lines removed.
  #
  # @!attribute [r]
  #   @return [String]
  attr_reader :content

  # Attributes specified on the code chunk's opening line. Per the CommonMark
  # spec, the first word after the opening fence has been assigned to the
  # `:language` attribute.
  #
  # @!attribute [r]
  #   @return [Hash<Symbol,Object>]
  attr_reader :attributes

  # Common prefix found on all lines of the original code block.
  #
  # @!attribute [r]
  #   @return [String]
  attr_reader :prefix

  # Raw code block, including all prefixes, opening and closing lines, etc.
  #
  # @!attribute [r]
  #   @return [String]
  attr_reader :orig

  # Opening line of the raw code block, including prefix, code fence, and any
  # attributes.
  #
  # @!attribute [r]
  #   @return [String]
  attr_reader :open_line

  # Closing line of the raw code block, including prefix and code fence.
  #
  # @!attribute [r]
  #   @return [String]
  attr_reader :close_line

  def initialize(content:, file:, line:,
                 open_line:, close_line:,
                 orig:,
                 prefix: '',
                 attributes: {})
    @file = file
    @line = line
    @content = content
    @attributes = attributes
    @open_line = open_line
    @close_line = close_line
    @prefix = prefix
    @orig = orig
  end
  def attributes_to_s
    '%s %s' % [@attributes[:language],
               @attributes.reject { |k,_| k == :language }.map  { |k, v| '%s=%s' % [k.to_s, v.inspect] }.join(' ')]
  end

  # Fetch the code to be used for this block.  If the 
  def fetch_code()
    if @attributes.key?(:file)
      if (out = fetch_external_code(@attributes)).nil?
        $stderr.puts("Failed to load external code for code chunk at #{self.location_string}")
        @content
      else
        out
      end
    else
      if @attributes.key?(:id)
        $stderr.puts("WARNING: no source file specified for fenced code block \"#{@attributes[:id]}\" at #{@file}:#{@line}")
      end
      @content
    end
  end

  def to_s
    if ! @attributes.key?(:file) && ! @attributes.key?(:id)
      @orig
    else
      code = fetch_code()
      if code.nil?
        @orig
      else
        "#{@open_line}#{code.strip}\n#{@close_line}".
          gsub(/^(?!\s*#{Regexp.escape(@prefix.strip)})/, @prefix).
          gsub(/[ ]+$/, '')
      end
    end
  end
  def location_string
    '%s:%u' % [@file, @line]
  end

  def inspect
    '#<%s:%#x %s:%u %s>' % [self.class, self.object_id, @file, @line,  @attributes.map { |k, v| '%s=%s' % [k.to_s, v.inspect] }.join(' ')]
  end
end

def chunk(filename, opts = {})
  input = File.read(filename)
  opts = opts.merge(chunk_opts_for(filename))
  chunk_string(input, opts)
end

# Divide the contents of a file containing Markdown syntax into a series of
# alternating TextChunk and CodeChunk instances
#
# @param [String] input Markdown input string.
def chunk_string(input, opts = {})
  chunks = []
  line = 0
  prefix = opts[:prefix] || %r{}
  open_line_regexp = /^(?<prefix>#{prefix})(?<open_fence>(?<indent>[ ]{0,3})(?<fence>`{3,}|~{3,}))\s*(?<info_string>[^\n]*)\n/
  while ! input.empty?
    pre, open_line, open_post = input.partition(open_line_regexp)
    unless pre.empty?
      chunks << TextChunk.new(pre) 
      line += pre.count("\n")
    end
    line += open_line.count("\n")

    if open_line.empty?
      post = open_post
    else
      open_match = Regexp.last_match
      indent = open_match[:indent]
      fence = open_match[:fence]
      info_string = open_match[:info_string].strip
      open_prefix = open_match[:prefix]

      # We've got the open-fence of the code block; now grab the body and
      # close-fence.

      # Closing fence uses the same character as the opening fence, and must
      # be at least the same number of characters.  It may not be followed by
      # anything but spaces on the same line, but may be indented up to three
      # spaces regardless of the indent on the opening fence.
      close_line_regexp = /(?<prefix>#{prefix})(?<close_fence>[ ]{0,3}#{fence}#{fence[0]}*)\s*(?=\n|$)/
      body, close_line, post = open_post.partition(close_line_regexp)
      close_match = Regexp.last_match

      code_line = line
      line += body.count("\n") + close_line.count("\n")

      if close_match.nil?
        binding.pry
        raise 'Unclosed code block at %s:%u' % [opts[:filename], code_line]
      end

      orig = open_line + body + close_line

      close_prefix = close_match[:prefix]
      attrs_ary = info_string.split($;, 2)
      attrs = opts.fetch(:attrs) { ConfigHash.new(defaults: DEFAULT_CODE_CHUNK_ATTRS) }.dup
      attrs[:language] = attrs_ary.shift
      if ! attrs_ary.empty?
        intern_keys = lambda { |hsh| hsh.kind_of?(Hash) ? hsh.collect { |k,v| [k.intern, intern_keys[v]] }.to_h : hsh }
        begin
          attrs.merge!(intern_keys[Tomlrb.parse(attrs_ary.shift)])
        rescue Tomlrb::ParseError => err
          $stderr.puts('Error while parsing info string for code block at %s:%u: %s' % [opts[:filename], code_line, err.message])
          raise err
        end
      end

      unindent_regexp = Regexp.new('[ ]{0,%u}' % indent.length)
      body.gsub!(/^#{prefix}#{unindent_regexp}/, '')

      # If the line prefixes differ, we've probably parsed
      # something incorrectly.
      if open_prefix != close_prefix
        raise 'Prefix mismatch:  open fence had prefix %s, close fence had prefix %s' % [open_prefix.inspect, close_prefix.inspect]
      end

      chunks << CodeChunk.new(content: body, file: opts[:filename], line: code_line,
                              open_line: open_line, close_line: close_line,
                              prefix: open_prefix, attributes: attrs,
                              orig: orig)
    end
    input = post
  end
  Chunks.new(chunks)
end

class Chunks < Array
  def to_s
    self.collect { |c| c.to_s }.join('')
  end
end

def chunk_opts_for(fname)
  {filename: fname, prefix: PREFIXES[File.extname(fname)]}
end
def collect_code_chunks(fname, opts, out = [])
  if File.directory?(fname)
    for entry in Dir.foreach(fname).reject { |e| e !~ SUPPORTED_FILE_REGEXP }
      collect_code_chunks(File.join(fname, entry), opts, out)
    end
  elsif File.file?(fname)
    out.concat(chunk(fname, opts).select { |c| c.kind_of?(CodeChunk) })
  end

  out
end

class MissingAttributeError < ::RuntimeError
  def initialize(attr, chunks)
    @attr = attr
    @chunks = chunks
    super('Missing attribute `%s` on %u code block%s' %
          [@attr.to_s, @chunks.length, @chunks.length == 1 ? '' : 's'])
  end

  def to_s
    if @chunks.length == 1
      'Missing attribute `%s` on code block at %s' %
        [@attr.to_s, @chunks.first.location_string]
    else
      "Missing attribute `%s` on %u code blocks:\n    %s" %
        [@attr.to_s, @chunks.length, @chunks.collect { |c| c.location_string }.join("\n    ")]
    end
  end
end

# Determine the number of elements in the shared prefix of a set of arrays
#
# @param [Array<Array<Object>>] toks
def common_prefix_len(toks)
  n = toks.map { |t| t.length }.min()
  is_prefix = lambda { |_n| toks[1..-1].all? { |t| t[0..._n] == toks.first[0..._n] }}
  while n > 1 && ! is_prefix[n]
    n -= 1
  end
  n
end

SOURCE_FILE_TEMPLATE = <<RUST
extern crate #{CRATE_NAME};
%s

fn main() {
%s
}
RUST

def combine_chunks_for_file(chunks)
  have_ids = chunks.any? { |c| c.attributes.key?(:id) }
  if have_ids && ! (no_id_chunks = chunks.reject { |c| c.attributes.key?(:id) }).empty?
    raise MissingAttributeError.new(:id, no_id_chunks)
  end
  chunks_ary = chunks.collect { |c| Rust::Parser.parse_string(c.content) }
  pre, main =
    if chunks.length > 1
      chunks_ary = chunks_ary.map { |c| c.map { |t| t.string } }
      prefix_len = common_prefix_len(chunks_ary)
      prefix = chunks_ary.first[0...prefix_len]
      chunks_ary.each { |t| t.shift(prefix_len) }

      pre = prefix.join('')
      main = if have_ids
               0...chunks.length.collect do |i|
                 id = chunks[i].attributes[:id]
                 "//` id=#{id} { \n" + chunks_ary[i].join('') + "//` }\n"
               end
             else
               chunks_ary.collect { |c| c.join('') }
             end.join("\n").gsub(/^/, '    ')
      [pre, main]
    else
      c = chunks_ary.first
      raise 'FIXME'
    end
  SOURCE_FILE_TEMPLATE % [pre, main]
end


def query?(prompt)
  $stderr.write("#{prompt} ")
  r = $stdin.gets() =~ /\A[yY]\n\z/
  $stderr.puts("\n")
  r
end

def transform!(fname, opts)
  chunks_by_file = collect_code_chunks(fname, opts).
    reject { |c| c.attributes[:file].nil? }.
    group_by { |c| c.attributes[:file] }
  # Offer to write out missing example files.
  for src, chunks in chunks_by_file.reject { |f, _| File.exist?(f) }
    if query?("External file \"#{src}\" is missing.  Create it?")
      output = combine_chunks_for_file(chunks)
      File.open(src, 'w') do |io|
        io.write(output)
      end
    end
  end


  if File.directory?(fname)
    for entry in Dir.foreach(fname).reject { |e| e =~ IGNORE_PATTERNS }
      transform!(File.join(fname, entry), opts)
    end
  elsif File.file?(fname) && fname =~ SUPPORTED_FILE_REGEXP
    chunks = chunk(fname, opts)
    if opts[:output].nil?
      fnew = fname + '.new'
      File.open(fnew, 'w') { |io| io.write(chunks.to_s) }
      diff = IO.popen(['diff', '-u', '--color=always', fname, fnew]) { |io| io.read() }
      if diff.empty?
        if opts[:verbose]
          $stderr.puts('%-32s -> no changes' % fname)
        end
        File.unlink(fnew)
      else
        $stderr.write(diff)
        
        if query?('Apply these changes?')
          File.rename(fnew, fname)
        else
          File.unlink(fnew)
        end
      end
    else
      opts[:output].write(chunks.to_s)
    end
  end
end

if caller[0].nil?
  require 'optparse'
  opts = { output: nil, verbose: false, attrs: ConfigHash.new(defaults: DEFAULT_CODE_CHUNK_ATTRS) }
  mode = :substitute
  OptionParser.new do |o|
    o.banner = 'Usage: %s [OPTION]... TARGET...' % $0
    o.on('-E', '--extract', 'Extract code samples from each TARGET') { mode = :extract }
    o.on('-O', '--option KEY=VALUE', 'Pre-load an attribute\'s default for fenced code blocks') do |s|
      k, v = s.split('=', 2)
      opts[:attrs][k.intern] = v
    end
    o.on('-o', '--output=FILE', 'Write all output to FILE') do |out|
      opts[:output] = if out == '-'
                        $stdout
                      else
                        File.open(out, 'w')
                      end
    end
    o.on('-v', '--verbose', 'Be more chatty when processing files.') { opts[:verbose] = true }
    o.on('-h', '--help', 'Show this help') { $stdout.puts(o); exit(0); }
  end.parse!


  if mode == :substitute
    ARGV.each do |filename|
      transform!(filename, opts)
    end
  elsif mode == :extract
    ARGV.each do |filename|
      (opts[:output] || $stdout).write(fetch_code(opts[:attrs].merge(file: filename)))
    end
  else
    raise 'Unknown mode: %s' % mode.inspect
  end
end