module ReLemon # Error raised when problems are detected in the input grammar. class GrammarError < ::RuntimeError attr_reader :file, :line, :column def initialize(msg, file, line, column = nil) @file = file if column.nil? @line = line.line @column = line.column else @line = line @column = column end super('%s:%s:%s: %s' % [@file, @line, @column, msg]) end end # Base parser class providing common functionality for both re2c and # Lemon parsers. module Parser # Convenient representation of a position in an input file. class FilePosition @line = nil @column = nil @offset = nil # Line number. # # @!attribute [r] # @return [Integer] attr_reader :line # Column number. # # @!attribute [r] # @return [Integer] attr_reader :column # Byte offset from the start of the file. # @!attribute [r] # @return [Integer] attr_reader :offset # Initialize the file-position object, optionally specifying the initial # position within the file. # # @param [Integer] _line Initial line value. # @param [Integer] _column Initial column number. # @param [Integer] _offset Initial byte offset. def initialize(_line = nil, _column = nil, _offset = nil) @line = _line || 1 @column = _column || 0 @offset = _offset || 0 end # Create an updated position object by examining a string of consumed # data and updating internal variables in a duplicated FilePosition # as necessary. # # @param [String] consumed_data Input data between the current position # and the desired new position in the input. def update(consumed_data) self.dup.update!(consumed_data) end # Update the position in-place by examining a string of consumed data and # updating internal variables as necessary. # # @param [String] consumed_data Input data between the current position # and the desired new position in the input. def update!(consumed_data) @offset += consumed_data.length @line += (nl = consumed_data.count("\n")) @column = if nl > 0 /(?<=\n)?[^\n]*\z/.match(consumed_data)[0].length else @column + consumed_data.length end self end def inspect '#<%s:%#x %s>' % [self.class, self.object_id, self.to_s] end def to_s 'l. %u/c. %u' % [@line, @column] end end # Token-instance data structure. TokenInstance = ::Struct.new(:type, :value, :string, :position) # Stores the information necessary to recognize, fetch, and create # a TokenInstance for a given token type in the input file. class TokenDefinition # Fetch procedure used if none is explicitly passed to {#initialize}. # This implementation simply uses the regular-expression passed to # {#initialize}. DEFAULT_FETCH_PROC = lambda { |input, m| input.slice!(m.regexp) } @type = nil @match_regexp = nil @fetch_proc = nil # The type of token that this object defines. # # @!attribute [r] # @return [Symbol] attr_reader :type # The regular expression that, when it matches against the beginning of # an input string, will cause a parser to call this definition's # `fetch` method. # # @!attribute [r] # @return [Regexp] attr_reader :match_regexp # Initialize a token definition, specifying the token type, initial # regular expression, and (optionally) a block or proc to be used for # fetching the data for this type of token. # # @param [Symbol] _type Token-type being defined. # # @param [Regexp] _regexp Regular expression that matches partial-initial # or full instances of this token type. If it matches only part of # the full token string, a proc (`_fetch_proc`) or block must be # passed to `#initialize`. # # @param [Proc,nil] _fetch_proc If non-`nil`, a procedure that accepts an # input string and the MatchData from the definition's match regexp, # and returns the initial part of the input string that represents # this token. It is expected that the proc will modify the input # in-place, e.g. using {String#slice!}. def initialize(_type, _regexp, _fetch_proc = nil, &block) raise APIUsageError('TokenDefinition#initialize can only accept one block argument') if block_given? and not _fetch_proc.nil? if _fetch_proc.nil? _fetch_proc = block if block_given? _fetch_proc ||= DEFAULT_FETCH_PROC end @type = _type @match_regexp = _regexp raise APIUsageError.new('Invalid `fetch_proc` argument for %s#%s' % [self.class.name, __method__]) unless _fetch_proc.kind_of?(Proc) or _fetch_proc.kind_of?(Regexp) @fetch_proc = case _fetch_proc when Proc _fetch_proc when Regexp proc { |input, m| input.slice!(_fetch_proc) } end end # Attempt to fetch an instance of this token type from the given input # string. If the definition matches, removes the matching initial part # of the input, updates the given position, and returns a corresponding # TokenInstance; otherwise returns `nil`. # # @param [String] input Remaining input data. # # @param [FilePosition] _position Current position in the input file. # # @return [TokenInstance,nil] Instance of this token type found at the # start of `input`, or `nil` if no such instance was found. def fetch(input, _position, _binding = self) if not (m = @match_regexp.match(input)).nil? tok_position = _position.dup pre_length = input.length value = _binding.instance_exec(input, m, &@fetch_proc) removed = value value, removed = value if value.kind_of?(Array) unless value.nil? # Modify the buffer if the fetch proc didn't. input.slice!(0, removed.length) if input.length == pre_length _position.update!(removed) # update file position return TokenInstance.new(@type, value, removed, tok_position) end else nil end end end # State data for a parser. class ParseState INPUT_TRIM_REGEXP = /\A[ \t\v\n]+/ @input_string = nil @buffer = nil @filename = nil @position = nil @prev_token = nil # Grammar context into which parsed data is stored. # # @!attribute [r] # @return [ReLemon::Context] attr_reader :context # Name of the file being parsed, if applicable. # # @!attribute [r] # @return [String] attr_reader :filename # Portion of `input_string` that has yet to be tokenized or parsed. # # @!attribute [r] # @return [String] attr_reader :buffer # The full, unmodified string on which a parse was requested. # # @!attribute [r] # @return [String] attr_reader :input_string # Token returned by `next_token()` for the PREVIOUS run of # e.g. `handle_token()`. # # @!attribute [r] # @return [TokenInstance] attr_reader :prev_token def remove_junk_from_top() unless (removed = @buffer.slice!(self.class.const_get(:INPUT_TRIM_REGEXP))).nil? @position.update!(removed) end end # Whether the state's unparsed-data buffer is empty. # # @attribute [r] # @return [Boolean] def empty? @buffer.empty? end # Initialize a new parse-state object. # # @param [String] _input_string String to parse. # # @param [Hash] opts Options hash. # @option opts [ReLemon::Context] :context Context to store parsed data in. # @option opts [String] :filename File name to use when reporting errors # @option opts [Integer] :line Initial line number. # @option opts [Integer] :column Initial column number. # @option opts [Integer] :offset Initial byte offset. def initialize(_input_string, **opts) @input_options = opts @input_string = _input_string.clone.freeze @buffer = _input_string.clone @filename = opts[:filename] @position = opts[:position] || FilePosition.new(opts[:line], opts[:column], opts[:offset]) @trim_input = opts.fetch(:trim_input, false) @prev_token = nil @token_set = nil @terminate_parse_requested = false end attr_accessor :token_set def terminate_parse @terminate_parse_requested = true [nil,''] end # Update the parser-state's `line` and `column` values given a string of # consumed data. def update_position(consumed_data) @position.update!(consumed_data) end # Get the line and column number corresponding to the first character in # the parse state's buffer. # # @return [Array] def position() @position end def next_token() remove_junk_from_top() if @trim_input buf = @buffer return nil if buf.empty? o = nil @token_set.each { |defn| break unless (o = defn.fetch(buf, self.position, self)).nil? } if o.nil? lim = [buf.length, 32].min raise GrammarError.new('Parse error on "%s%s"' % [buf[0...lim], lim < buf.length ? '...' : ''], filename, self.position) else if @input_options[:dump_tokens] $stderr.puts(o.inspect) $stderr.flush() end return o end end # Run the parsing algorithm on the input string, and return the context. # @return [ReLemon::Context] def run() while not empty? last_buf = @buffer.clone tok = next_token() break if @terminate_parse_requested raise '>>>' + @buffer + '<<<' if @buffer.length == last_buf.length # It's possible the buffer was empty except for some whitespace, # which may have been discarded by `next_token`. handle_token(tok) unless tok.nil? @prev_token = tok end finish() end end # Define singleton methods on classes/modules that include Parser. def self.included(_module) _module.instance_exec do const_set(:FilePosition, ::ReLemon::Parser::FilePosition) const_set(:ParseState, Class.new(::ReLemon::Parser::ParseState) do |ps| define_method(:initialize) do |*a, **opts| super(*a, **opts) @token_set = _module.const_get(:TOKENS) end end) # Parse a named file. def self.parse_file(file, **options) # begin # oldwd = Dir.pwd # Dir.chdir(File.dirname(file)) self::ParseState.new(File.read(file), options.merge(filename: file)). run() # ensure # Dir.chdir(oldwd) # end end def self.parse_string(buf, **options) self::ParseState.new(buf, options.has_key?(:filename) ? options : options.merge(filename: '(buffer)')). run() end end end # class << self # include IINet::Util::Memoizer # def anchor_regexp(re) # source = re.source # if source =~ /\A\\A/ or source =~ /\A\^/ # re # elsif full # /\A#{source}\z/ # else # /\A#{source}/ # end # end # memoize(:anchor) # end end end