//
// tests/lexer.rs
// The PHiLe Compiler
//
// Created by Arpad Goretity (H2CO3)
// on 31/07/2017
//

#![cfg(test)]
#![deny(missing_debug_implementations, missing_copy_implementations,
        trivial_casts, trivial_numeric_casts,
        unsafe_code,
        unstable_features,
        unused_import_braces, unused_qualifications)]
#![cfg_attr(feature = "cargo-clippy",
            allow(match_same_arms, clone_on_ref_ptr))]
#![cfg_attr(feature = "cargo-clippy",
            deny(wrong_pub_self_convention, used_underscore_binding,
                 stutter, similar_names, pub_enum_variant_names,
                 // TODO(H2CO3): locally allowing non_ascii_literal doesn't work
                 /* non_ascii_literal, */ unicode_not_nfc,
                 /* result_unwrap_used, option_unwrap_used, */ // TODO(H2CO3): fix these
                 option_map_unwrap_or_else, option_map_unwrap_or, filter_map,
                 shadow_unrelated, shadow_reuse, shadow_same,
                 int_plus_one, string_add_assign, if_not_else,
                 invalid_upcast_comparisons,
                 cast_sign_loss, cast_precision_loss,
                 cast_possible_wrap, cast_possible_truncation,
                 mutex_integer, mut_mut, items_after_statements,
                 print_stdout, mem_forget, maybe_infinite_iter))]

#[macro_use]
extern crate quickcheck;
#[macro_use]
extern crate lazy_static;
extern crate unicode_xid;
extern crate unicode_segmentation;
extern crate regex;
extern crate rand;
extern crate phile;

use std::char;
use std::str;
use std::vec;
use std::ops;
use std::iter;
use std::ascii::AsciiExt;
use std::fmt::Write;
use quickcheck::{ Arbitrary, Gen };
use unicode_xid::UnicodeXID;
use unicode_segmentation::UnicodeSegmentation;
use regex::Regex;
use phile::error::Error;
use phile::lexer;
use phile::util::grapheme_count;


//
// Types for generating correct and incorrect source code
//

// Append a lexeme onto `source`'s buffer
trait Lexeme: Arbitrary {
    fn render(&self, buf: &mut String, end: &mut lexer::Location) -> lexer::TokenKind;
}

// Metadata about a token, without the actual underlying lexeme
#[derive(Debug, Clone)]
struct TokenHeader {
    slice: ops::Range<usize>,
    range: lexer::Range,
    kind:  lexer::TokenKind,
}

type ValidSourceFile = Vec<ValidToken>;

// Represents some well-formed multi-file input to the lexer
#[derive(Debug, Clone, PartialEq, Eq)]
struct ValidSource {
    files: Vec<ValidSourceFile>,
}

impl ValidSource {
    fn render(&self) -> (Vec<String>, Vec<TokenHeader>) {
        let mut strings = Vec::with_capacity(self.files.len());
        let mut tokens = Vec::with_capacity(self.files.len() * 100);

        for (i, file) in self.files.iter().enumerate() {
            let mut buf = String::with_capacity(file.len() * 16);
            let mut start = lexer::Location { src_idx: i, line: 1, column: 1 };

            for token in file {
                let mut end = start;
                let old_len = buf.len();
                let kind = token.render(&mut buf, &mut end);
                let new_len = buf.len();
                let slice = old_len..new_len;
                let range = lexer::Range { start, end };
                tokens.push(TokenHeader { slice, range, kind });
                start = end;
            }

            strings.push(buf);
        }

        (strings, tokens)
    }
}

impl Arbitrary for ValidSource {
    fn arbitrary<G: Gen>(g: &mut G) -> Self {
        // Generate a random number of source "files", but at least 1
        // Fill each file with a random number of correct lexemes, but at least 1
        let files = (0..g.gen_range(1, 8)).map(|_| {
            let gen_size = g.size() / 8;

            (0..g.gen_range(1, gen_size)).flat_map(|_| {
                let ws_gens: &[fn(&mut G) -> ValidToken] = &[
                    |g| ValidToken::Whitespace(ValidWhitespace::arbitrary(g)),
                    |g| ValidToken::LineComment(ValidLineComment::arbitrary(g)),
                ];
                let non_ws_gens: &[fn(&mut G) -> ValidToken] = &[
                    |g| ValidToken::Word(ValidWord::arbitrary(g)),
                    |g| ValidToken::Number(ValidNumber::arbitrary(g)),
                    |g| ValidToken::Punct(ValidPunct::arbitrary(g)),
                    |g| ValidToken::String(ValidString::arbitrary(g)),
                ];

                let ws_gen = *g.choose(ws_gens).unwrap();
                let non_ws_gen = *g.choose(non_ws_gens).unwrap();

                iter::once(ws_gen(g)).chain(iter::once(non_ws_gen(g)))
            }).collect()
        }).collect();

        ValidSource { files }
    }

    // First, check which source file the bug is in,
    // then try to shrink that specific file.
    // Token/lexeme sequences generated by Arbitrary always
    // contain a whitespace or a line comment, followed by
    // some other kind of token/lexeme. Therefore, we can
    // reliably shrink the sequence by picking pairs of
    // tokens starting on even indices. For each such pair,
    // generate the Cartesian product of the two lists of
    // shrunk lexemes, one for the whitespace, one for the
    // following non-whitespace.
    fn shrink(&self) -> Box<Iterator<Item=Self>> {
        let it = self.files.iter().flat_map(|tokens| {
            tokens.chunks(2).flat_map(move |chunk| {
                chunk[0].shrink().flat_map(move |ws| {
                    chunk[1].shrink().flat_map(move |non_ws| {
                        iter::once(ValidSource {
                            files: vec![vec![ws.clone(), non_ws.clone()]]
                        })
                    })
                })
            })
        });

        Box::new(it.collect::<Vec<_>>().into_iter())
    }
}

// Poor man's dynamic dispatch for non-object-safe traits: enums!
#[derive(Debug, Clone, PartialEq, Eq)]
enum ValidToken {
    Whitespace(ValidWhitespace),
    LineComment(ValidLineComment),
    Word(ValidWord),
    Number(ValidNumber),
    Punct(ValidPunct),
    String(ValidString),
}

impl Lexeme for ValidToken {
    fn render(&self, buf: &mut String, end: &mut lexer::Location) -> lexer::TokenKind {
        use ValidToken::*;

        match *self {
            Whitespace(ref ws)  => ws.render(buf, end),
            LineComment(ref lc) => lc.render(buf, end),
            Word(ref word)      => word.render(buf, end),
            Number(ref num)     => num.render(buf, end),
            Punct(ref punct)    => punct.render(buf, end),
            String(ref s)       => s.render(buf, end),
        }
    }
}

impl Arbitrary for ValidToken {
    fn arbitrary<G: Gen>(g: &mut G) -> Self {
        use ValidToken::*;

        let token_gens: &[fn(&mut G) -> Self] = &[
            |g| Whitespace(ValidWhitespace::arbitrary(g)),
            |g| LineComment(ValidLineComment::arbitrary(g)),
            |g| Word(ValidWord::arbitrary(g)),
            |g| Number(ValidNumber::arbitrary(g)),
            |g| String(ValidString::arbitrary(g)),
            |g| Punct(ValidPunct::arbitrary(g)),
        ];
        let gen = *g.choose(token_gens).unwrap();

        gen(g)
    }

    fn shrink(&self) -> Box<Iterator<Item=Self>> {
        use ValidToken::*;

        match *self {
            Whitespace(ref ws)  => Box::new(ws.shrink().map(Whitespace)),
            LineComment(ref lc) => Box::new(lc.shrink().map(LineComment)),
            Word(ref word)      => Box::new(word.shrink().map(Word)),
            Number(ref num)     => Box::new(num.shrink().map(Number)),
            Punct(ref punct)    => Box::new(punct.shrink().map(Punct)),
            String(ref s)       => Box::new(s.shrink().map(String)),
        }
    }
}

//
// A lexeme containing well-formed whitespace characters,
// horizontal as well as vertical.
//
#[derive(Debug, Clone, PartialEq, Eq)]
struct ValidWhitespace {
    buf: String,
}

impl Arbitrary for ValidWhitespace {
    fn arbitrary<G: Gen>(g: &mut G) -> Self {
        // generate at least one valid whitespace character
        let buf = (0..g.gen_range(1, 16)).map(|_| {
            let chars = if g.gen() { HOR_WS } else { VER_WS };
            *g.choose(chars).unwrap()
        }).collect();

        ValidWhitespace { buf }
    }

    fn shrink(&self) -> Box<Iterator<Item=Self>> {
        let num_chars = self.buf.chars().count();

        // do not return empty string as valid whitespace
        if num_chars <= 1 {
            return quickcheck::empty_shrinker()
        }

        // leave out each character, one-by-one
        let iter = (0..num_chars).map(|i| {
            let buf = self.buf
                .chars()
                .enumerate()
                .filter_map(|(j, c)| if j == i { None } else { Some(c) })
                .collect();

            ValidWhitespace { buf }
        });

        Box::new(iter.collect::<Vec<_>>().into_iter())
    }
}

impl Lexeme for ValidWhitespace {
    fn render(&self, buf: &mut String, end: &mut lexer::Location) -> lexer::TokenKind {
        for g in self.buf.graphemes(true) {
            if g.contains(VER_WS) {
                end.column = 1;
                end.line += 1;
            } else {
                end.column += 1;
            }
        }

        *buf += &self.buf;

        lexer::TokenKind::Whitespace
    }
}

//
// A lexeme representing a line comment ending with a newline.
//
#[derive(Debug, Clone, PartialEq, Eq)]
struct ValidLineComment {
    buf: String,
}

impl Arbitrary for ValidLineComment {
    fn arbitrary<G: Gen>(g: &mut G) -> Self {
        // Generate 0 or more valid non-newline extended grapheme clusters.
        // Generating the newline character has to be done first because the
        // map iterator mutably borrows the PRNG for the rest of the fn body.
        let newline = *g.choose(VER_WS).unwrap();
        let it = (0..g.gen_range(0, 16)).map(|_| {
            loop {
                let ch: char = g.gen();

                if !is_ver_ws(ch) {
                    break ch
                }
            }
        });
        let buf = iter::once('#').chain(it).chain(iter::once(newline)).collect();

        ValidLineComment { buf }
    }

    fn shrink(&self) -> Box<Iterator<Item=Self>> {
        // leave out each character, one-by-one,
        // except the leading '#' and the trailing newline
        let it = (1..self.buf.chars().count() - 1).map(|i| {
            let buf: String = self.buf
                .chars()
                .enumerate()
                .filter_map(|(j, c)| if j == i { None } else { Some(c) })
                .collect();

            assert!(buf.starts_with('#'));
            assert!(is_ver_ws(buf.chars().last().unwrap()));

            ValidLineComment { buf }
        });

        Box::new(it.collect::<Vec<_>>().into_iter())
    }
}

impl Lexeme for ValidLineComment {
    fn render(&self, buf: &mut String, end: &mut lexer::Location) -> lexer::TokenKind {
        // A line comment represented by this type always ends in a newline.
        assert!(self.buf.starts_with('#'));
        assert!(is_ver_ws(self.buf.chars().last().unwrap()));

        end.line += 1;
        end.column = 1;

        *buf += &self.buf;

        lexer::TokenKind::Comment
    }
}

//
// A lexeme representing words, that is, identifiers and keywords
//
#[derive(Debug, Clone, PartialEq, Eq)]
struct ValidWord {
    buf: String,
}

impl Arbitrary for ValidWord {
    fn arbitrary<G: Gen>(g: &mut G) -> Self {
        // Generate exactly one valid XID_Start char,
        // followed by any number of valid XID_Continue ones.
        let first = loop {
            let ch: char = g.gen();

            if is_ident_start(ch) {
                break iter::once(ch)
            }
        };
        let rest = (0..g.gen_range(0, 16)).map(|_| {
            loop {
                let ch: char = g.gen();

                if is_ident_cont(ch) {
                    break ch
                }
            }
        });

        ValidWord { buf: first.chain(rest).collect() }
    }

    fn shrink(&self) -> Box<Iterator<Item=Self>> {
        // The first char always has to be a XID_Start, so don't touch it.
        // Leave out the rest of the chars, one by one.
        let it = (1..self.buf.chars().count()).map(|i| {
            let buf: String = self.buf
                .chars()
                .enumerate()
                .filter_map(|(j, c)| if j == i { None } else { Some(c) })
                .collect();

            assert!(is_ident_start(buf.chars().next().unwrap()));

            ValidWord { buf }
        });

        Box::new(it.collect::<Vec<_>>().into_iter())
    }
}

impl Lexeme for ValidWord {
    fn render(&self, buf: &mut String, end: &mut lexer::Location) -> lexer::TokenKind {
        // Identifiers must not contain whitespace, including newlines
        assert!(!self.buf.contains(char::is_whitespace));

        end.column += grapheme_count(&self.buf); // because we contain no newlines
        *buf += &self.buf;

        lexer::TokenKind::Word
    }
}

//
// A type representing valid numeric (integer and floating-point) literals
//
#[derive(Debug, Clone, PartialEq, Eq)]
struct ValidNumber {
    buf: String,
    kind: ValidNumberKind,
    prefix_len: usize, // this many leading bytes to be preserved by shrink()
}

#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum ValidNumberKind {
    DecimalInt,
    BinaryInt,
    OctalInt,
    HexInt,
    FloatingPoint,
}

impl ValidNumber {
    fn arbitrary_int<G: Gen>(g: &mut G, prefix: &str, digits: &[char], kind: ValidNumberKind) -> Self {
        let range = 0..g.gen_range(1, 16);
        let digits_iter = range.map(|_| *g.choose(digits).unwrap());
        let buf = prefix.chars().chain(digits_iter).collect();
        let prefix_len = prefix.len();

        ValidNumber { buf, kind, prefix_len }
    }

    fn arbitrary_decimal_int<G: Gen>(g: &mut G) -> Self {
        Self::arbitrary_int(
            g,
            "",
            &['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'],
            ValidNumberKind::DecimalInt,
        )
    }

    fn arbitrary_binary_int<G: Gen>(g: &mut G) -> Self {
        let prefix = if g.gen() { "0b" } else { "0B" };
        Self::arbitrary_int(
            g,
            prefix,
            &['0', '1'],
            ValidNumberKind::BinaryInt,
        )
    }

    fn arbitrary_octal_int<G: Gen>(g: &mut G) -> Self {
        let prefix = if g.gen() { "0o" } else { "0O" };
        Self::arbitrary_int(
            g,
            prefix,
            &['0', '1', '2', '3', '4', '5', '6', '7'],
            ValidNumberKind::OctalInt,
        )
    }

    fn arbitrary_hex_int<G: Gen>(g: &mut G) -> Self {
        let prefix = if g.gen() { "0x" } else { "0X" };
        Self::arbitrary_int(
            g,
            prefix,
            &[
                '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
                'a', 'b', 'c', 'd', 'e', 'f',
                'A', 'B', 'C', 'D', 'E', 'F',
            ],
            ValidNumberKind::HexInt,
        )
    }

    fn decimal_digits<G: Gen>(g: &mut G) -> vec::IntoIter<char> {
        let digits = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'];

        (0..g.gen_range(1, 16))
            .map(|_| *g.choose(&digits).unwrap())
            .collect::<Vec<_>>()
            .into_iter()
    }

    fn arbitrary_float<G: Gen>(g: &mut G) -> Self {
        let integral = Self::decimal_digits(g);
        let fraction = Self::decimal_digits(g);
        let dot = iter::once('.');
        let exponent: Box<Iterator<Item=char>> = if g.gen() {
            let e = iter::once(if g.gen() { 'e' } else { 'E' });
            let ds = Self::decimal_digits(g);
            let sign: Box<Iterator<Item=char>> = if g.gen() {
                Box::new(iter::once(if g.gen() { '+' } else { '-' }))
            } else {
                Box::new(iter::empty())
            };

            Box::new(e.chain(sign).chain(ds))
        } else {
            Box::new(iter::empty())
        };
        let buf = integral.chain(dot).chain(fraction).chain(exponent).collect();
        let kind = ValidNumberKind::FloatingPoint;
        let prefix_len = 0;

        ValidNumber { buf, kind, prefix_len }
    }

    fn shrink_int(&self) -> Box<Iterator<Item=Self>> {
        assert_ne!(self.kind, ValidNumberKind::FloatingPoint);
        assert!(self.prefix_len < self.buf.len());

        let prefix = &self.buf[..self.prefix_len];
        let payload = &self.buf[self.prefix_len..];

        // do not return empty string as a valid integer literal
        if payload.chars().count() <= 1 {
            return quickcheck::empty_shrinker()
        }

        // Remove digits one-by-one
        let it = (0..payload.chars().count()).map(|i| {
            let digits = payload
                .chars()
                .enumerate()
                .filter_map(|(j, c)| if j == i { None } else { Some(c) });

            ValidNumber {
                buf: prefix.chars().chain(digits).collect(),
                kind: self.kind,
                prefix_len: self.prefix_len,
            }
        });

        Box::new(it.collect::<Vec<_>>().into_iter())
    }
}

impl Arbitrary for ValidNumber {
    fn arbitrary<G: Gen>(g: &mut G) -> Self {
        let funcs: &[fn(&mut G) -> Self] = &[
            Self::arbitrary_decimal_int,
            Self::arbitrary_binary_int,
            Self::arbitrary_octal_int,
            Self::arbitrary_hex_int,
            Self::arbitrary_float,
        ];
        let func = g.choose(funcs).unwrap();

        func(g)
    }

    fn shrink(&self) -> Box<Iterator<Item=Self>> {
        use ValidNumberKind::*;

        match self.kind {
            DecimalInt | BinaryInt | OctalInt | HexInt => self.shrink_int(),
            FloatingPoint => quickcheck::empty_shrinker(), // too hard
        }
    }
}

impl Lexeme for ValidNumber {
    fn render(&self, buf: &mut String, end: &mut lexer::Location) -> lexer::TokenKind {
        // Identifiers must not contain whitespace, including newlines
        assert!(!self.buf.contains(char::is_whitespace));

        end.column += grapheme_count(&self.buf); // because we contain no newlines
        *buf += &self.buf;

        lexer::TokenKind::Numeric
    }
}

//
// Type for generating an operator or other punctuation character
//
#[derive(Debug, Clone, PartialEq, Eq)]
struct ValidPunct {
    value: &'static str,
}

// No shrink(); operators are atomic
impl Arbitrary for ValidPunct {
    fn arbitrary<G: Gen>(g: &mut G) -> Self {
        ValidPunct { value: *g.choose(PUNCTUATION).unwrap() }
    }
}

impl Lexeme for ValidPunct {
    fn render(&self, buf: &mut String, end: &mut lexer::Location) -> lexer::TokenKind {
        assert!(!self.value.contains(char::is_whitespace));
        assert!(self.value.is_ascii());

        end.column += grapheme_count(self.value);
        *buf += self.value;

        lexer::TokenKind::Punctuation
    }
}

//
// Type for generating valid string literals
//
#[derive(Debug, Clone, PartialEq, Eq)]
struct ValidString {
    chars: Vec<CharacterLiteral>,
}

#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum CharacterLiteral {
    Unescaped(char),
    Quote,
    Backslash,
    Cr,
    Lf,
    Tab,
    Hex(u8),
    Unicode(char),
}

impl Arbitrary for ValidString {
    fn arbitrary<G: Gen>(g: &mut G) -> Self {
        use CharacterLiteral::*;

        // generate a possibly empty string, 25% of which are escape sequences
        let it = (0..g.gen_range(0, 16)).map(|_| {
            if g.gen::<u8>() < 0x40 {
                let char_gens: &[fn(&mut G) -> CharacterLiteral] = &[
                    |_| Quote,
                    |_| Backslash,
                    |_| Cr,
                    |_| Lf,
                    |_| Tab,
                    |g| Hex(g.gen()),
                    |g| Unicode(g.gen()),
                ];
                let char_gen = *g.choose(char_gens).unwrap();

                return char_gen(g);
            }

            // Otherwise, generate any valid Unicode scalar except " and \
            loop {
                match g.gen::<char>() {
                    '"' | '\\' => {},
                    ch => break Unescaped(ch),
                }
            }
        });

        ValidString { chars: it.collect() }
    }

    fn shrink(&self) -> Box<Iterator<Item=Self>> {
        let it = (0..self.chars.len()).map(|i| {
            let chars = self.chars
                .iter()
                .enumerate()
                .filter_map(|(j, &c)| if j == i { None } else { Some(c) })
                .collect();

            ValidString { chars }
        });

        Box::new(it.collect::<Vec<_>>().into_iter())
    }
}

impl Lexeme for ValidString {
    fn render(&self, buf: &mut String, end: &mut lexer::Location) -> lexer::TokenKind {
        use CharacterLiteral::*;

        let old_len = buf.len();

        buf.reserve(self.chars.len() * 2);
        buf.push('"');

        for ch in &self.chars {
            match *ch {
                Unescaped(uch) => buf.push(uch),
                Backslash      => *buf += r#"\\"#,
                Quote          => *buf += r#"\""#,
                Cr             => *buf += r#"\r"#,
                Lf             => *buf += r#"\n"#,
                Tab            => *buf += r#"\t"#,
                Hex(byte)      => write!(buf, "\\x{:02x}", byte).unwrap(),
                Unicode(uch)   => write!(buf, "\\u{{{:x}}}", uch as u32).unwrap(),
            }
        }

        buf.push('"');

        for g in buf[old_len..].graphemes(true) {
            // if one char is vertical whitespace, then all of them must be so
            if g.contains(VER_WS) {
                assert!(g.chars().all(|ch| VER_WS.contains(&ch)));
                end.line += 1;
                end.column = 1;
            } else {
                end.column += 1;
            }
        }

        lexer::TokenKind::String
    }
}

// TODO(H2CO3): test invalid lexemes (e.g. unterminated string
// literals, string literals with invalid escapes or disallowed
// [e.g. deprecated, surrogate, ...] characters, lonely integer
// base prefixes without following digits [0b, 0x, etc.],
// floating-point literals with missing parts [e.g. .5, 1.])
// using QuickCheck. (Currently there are only manually-written
// unit tests.)

//
// Constants and functions for generating pseudorandom strings
//

// Horizontal whitespace
static HOR_WS: &[char] = &[
    '\u{0009}',
    '\u{0020}',
    '\u{00A0}',
    '\u{1680}',
    '\u{2000}',
    '\u{2001}',
    '\u{2002}',
    '\u{2003}',
    '\u{2004}',
    '\u{2005}',
    '\u{2006}',
    '\u{2007}',
    '\u{2008}',
    '\u{2009}',
    '\u{200A}',
    '\u{202F}',
    '\u{205F}',
    '\u{3000}',
];

// Vertical whitespace, a.k.a. newline characters
static VER_WS: &[char] = &[
    '\u{000A}',
    '\u{000B}',
    '\u{000C}',
    '\u{000D}',
    '\u{0085}',
    '\u{2028}',
    '\u{2029}',
];

// Valid operators
static PUNCTUATION: &[&str] = &[
     "<->",  "<->!",  "<->?",  "<->*",  "<->+",
    "!<->", "!<->!", "!<->?", "!<->*", "!<->+",
    "?<->", "?<->!", "?<->?", "?<->*", "?<->+",
    "*<->", "*<->!", "*<->?", "*<->*", "*<->+",
    "+<->", "+<->!", "+<->?", "+<->*", "+<->+",

    "...", "..", ".", "<=", ">=", "<", ">", "==", "!=",
    "->", "=>", "=", "::", ":", "(", ")", "[", "]", "{", "}",
    "+", "-", "*", "/", "%", "&", "|", "~", "?", ",", ";",
];

const CHAR_MAX_BYTES: usize = 4;

fn is_hor_ws(ch: char) -> bool {
    HOR_WS.contains(&ch)
}

fn is_ver_ws(ch: char) -> bool {
    VER_WS.contains(&ch)
}

// Regex uses an outdated (Unicode 7.0) definition of XID_{Start,Continue},
// whereas the UnicodeXID crate conforms to Unicode 9.0. This makes
// unit tests fail for some code points, e.g. U+17828.
// See https://github.com/rust-lang/regex/issues/391.
// TODO(H2CO3): this is both horribly slow, absolutely disgusting,
// and doesn't actually test _anything_, because the same regex that
// is used for lexing is also utilized in generating test cases.
// Consequently, we must rewrite these two functions using UnicodeXID
// once the regex crate is updated to a more recent version of Unicode...
// When that is done, we can do away with the regex and lazy_static
// imports in the test crate, and with the lazy_static dev dependency.
#[allow(non_upper_case_globals)]
fn is_ident_start(ch: char) -> bool {
    // UnicodeXID::is_xid_start(ch) || ch == '_'

    lazy_static! {
        static ref re: Regex = Regex::new(r"^[_\p{XID_Start}]$").unwrap();
    }

    re.is_match(ch.encode_utf8(&mut [0; CHAR_MAX_BYTES]))
}

#[allow(non_upper_case_globals)]
fn is_ident_cont(ch: char) -> bool {
    // UnicodeXID::is_xid_continue(ch)

    lazy_static! {
        static ref re: Regex = Regex::new(r"^\p{XID_Continue}$").unwrap();
    }

    re.is_match(ch.encode_utf8(&mut [0; CHAR_MAX_BYTES]))
}

fn shrunk_transitive_closure<T, I>(it: I) -> vec::IntoIter<T>
    where T: Arbitrary,
          I: Iterator<Item=T> {

    let result = it.flat_map(|item| {
        let shrunk_items = item.shrink();
        iter::once(item).chain(shrunk_transitive_closure(shrunk_items))
    });

    result.collect::<Vec<_>>().into_iter()
}

//
// Actual Unit Tests
//

#[test]
fn shrink_valid_whitespace_items() {
    let ws = ValidWhitespace {
        buf: " \r\n".to_owned()
    };

    let actual: Vec<_> = shrunk_transitive_closure(iter::once(ws))
        .map(|lexeme| lexeme.buf)
        .collect();

    let expected = [
        " \r\n",
        "\r\n", "\n", "\r",
        " \n",  "\n", " ",
        " \r",  "\r", " ",
    ];

    assert_eq!(actual, expected);
}

#[test]
fn shrink_valid_whitespace_size() {
    fn num_shrunk(len: usize) -> usize {
        match len {
            0 => panic!("Valid whitespace must be at least one char long"),
            1 => 1,
            _ => 1 + len * num_shrunk(len - 1),
        }
    }

    let mut g = quickcheck::StdGen::new(rand::OsRng::new().unwrap(), 100);

    for _ in 0..100 {
        let ws = ValidWhitespace::arbitrary(&mut g);
        let num_chars = ws.buf.chars().count();

        // this blows up exponentially, so only check for length < 6
        if num_chars >= 6 { continue }

        let num_actual = shrunk_transitive_closure(iter::once(ws.clone())).count();
        let num_expected = num_shrunk(num_chars);

        assert_eq!(num_actual, num_expected, "{:?}", ws);
    }
}

#[test]
fn shrink_valid_line_comment_items() {
    let comment = ValidLineComment {
        buf: "#XYZ\n".to_owned(),
    };

    let actual: Vec<_> = shrunk_transitive_closure(iter::once(comment))
        .map(|lexeme| lexeme.buf)
        .collect();

    let expected = [
        "#XYZ\n",
        "#YZ\n",  "#Z\n", "#\n", "#Y\n", "#\n",
        "#XZ\n",  "#Z\n", "#\n", "#X\n", "#\n",
        "#XY\n",  "#Y\n", "#\n", "#X\n", "#\n",
    ];

    assert_eq!(actual, expected);
}

#[test]
fn shrink_valid_line_comment_size() {
    fn num_shrunk(len: usize) -> usize {
        match len {
            0 | 1 => panic!("Valid line comment must be at least 2 chars long (#\n)"),
            2     => 1,
            _     => 1 + (len - 2) * num_shrunk(len - 1),
        }
    }

    let mut g = quickcheck::StdGen::new(rand::OsRng::new().unwrap(), 100);

    for _ in 0..100 {
        let comment = ValidLineComment::arbitrary(&mut g);
        let num_chars = comment.buf.chars().count();

        // this blows up exponentially, so only check for length < 8
        if num_chars >= 8 { continue }

        let num_actual = shrunk_transitive_closure(iter::once(comment.clone())).count();
        let num_expected = num_shrunk(num_chars);

        assert_eq!(num_actual, num_expected, "{:?}", comment);
    }
}

#[test]
fn shrink_valid_word_items() {
    let word = ValidWord {
        buf: "a1b2".to_owned(),
    };

    let actual: Vec<_> = shrunk_transitive_closure(iter::once(word))
        .map(|lexeme| lexeme.buf)
        .collect();

    let expected = [
        "a1b2",
        "ab2", "a2", "a", "ab", "a",
        "a12", "a2", "a", "a1", "a",
        "a1b", "ab", "a", "a1", "a",
    ];

    assert_eq!(actual, expected);
}

#[test]
fn shrink_valid_word_size() {
    fn num_shrunk(len: usize) -> usize {
        match len {
            0 => panic!("Valid word must be at least one char long"),
            1 => 1,
            _ => 1 + (len - 1) * num_shrunk(len - 1),
        }
    }

    let mut g = quickcheck::StdGen::new(rand::OsRng::new().unwrap(), 100);

    for _ in 0..100 {
        let word = ValidWord::arbitrary(&mut g);
        let num_chars = word.buf.chars().count();

        // this blows up exponentially, so only check for length < 6
        if num_chars >= 6 { continue }

        let num_actual = shrunk_transitive_closure(iter::once(word.clone())).count();
        let num_expected = num_shrunk(num_chars);

        assert_eq!(num_actual, num_expected, "{:?}", word);
    }
}

#[test]
fn shrink_valid_number_items() {
    let numbers = vec![
        (
            ValidNumber {
                buf: "908".to_owned(),
                kind: ValidNumberKind::DecimalInt,
                prefix_len: 0,
            },
            vec![
                "908",
                "08", "8", "0",
                "98", "8", "9",
                "90", "0", "9",
            ],
        ),
        (
            ValidNumber {
                buf: "0b110".to_owned(),
                kind: ValidNumberKind::BinaryInt,
                prefix_len: 2,
            },
            vec![
                "0b110",
                "0b10", "0b0", "0b1",
                "0b10", "0b0", "0b1",
                "0b11", "0b1", "0b1",
            ],
        ),
        (
            ValidNumber {
                buf: "0o704".to_owned(),
                kind: ValidNumberKind::OctalInt,
                prefix_len: 2,
            },
            vec![
                "0o704",
                "0o04", "0o4", "0o0",
                "0o74", "0o4", "0o7",
                "0o70", "0o0", "0o7",
            ],
        ),
        (
            ValidNumber {
                buf: "0x30f".to_owned(),
                kind: ValidNumberKind::HexInt,
                prefix_len: 2,
            },
            vec![
                "0x30f",
                "0x0f", "0xf", "0x0",
                "0x3f", "0xf", "0x3",
                "0x30", "0x0", "0x3",
            ],
        ),
        (
            ValidNumber {
                buf: "123.456e+78".to_owned(),
                kind: ValidNumberKind::FloatingPoint,
                prefix_len: 0,
            },
            vec!["123.456e+78"],
        ),
    ];

    for (number, expected) in numbers {
        let actual: Vec<_> = shrunk_transitive_closure(iter::once(number))
            .map(|lexeme| lexeme.buf)
            .collect();

        assert_eq!(actual, expected);
    }
}

#[test]
fn shrink_valid_punctuation() {
    for &value in PUNCTUATION {
        let it = iter::once(ValidPunct { value });
        let items: Vec<_> = shrunk_transitive_closure(it)
            .map(|lexeme| lexeme.value)
            .collect();

        assert_eq!(items, [value]);
    }
}

#[test]
fn shrink_valid_string_items() {
    use CharacterLiteral::*;

    let s = ValidString {
        chars: vec![Quote, Unicode('\u{0170}'), Unescaped('@')]
    };

    let actual: Vec<_> = shrunk_transitive_closure(iter::once(s))
        .map(|lexeme| lexeme.chars)
        .collect();

    let expected = [
        vec![Quote, Unicode('\u{0170}'), Unescaped('@')],

        vec![Unicode('\u{0170}'), Unescaped('@')],
        vec![Unescaped('@')],
        vec![],
        vec![Unicode('\u{0170}')],
        vec![],

        vec![Quote, Unescaped('@')],
        vec![Unescaped('@')],
        vec![],
        vec![Quote],
        vec![],

        vec![Quote, Unicode('\u{0170}')],
        vec![Unicode('\u{0170}')],
        vec![],
        vec![Quote],
        vec![],
    ];

    assert_eq!(actual, expected);
}

#[test]
fn shrink_valid_string_size() {
    fn num_shrunk(len: usize) -> usize {
        match len {
            0 => 1,
            _ => 1 + len * num_shrunk(len - 1),
        }
    }

    let mut g = quickcheck::StdGen::new(rand::OsRng::new().unwrap(), 100);

    for _ in 0..100 {
        let s = ValidString::arbitrary(&mut g);
        let num_chars = s.chars.len();

        // this blows up exponentially, so only check for length < 6
        if num_chars >= 6 { continue }

        let num_actual = shrunk_transitive_closure(iter::once(s.clone())).count();
        let num_expected = num_shrunk(num_chars);

        assert_eq!(num_actual, num_expected, "{:#?}", s);
    }
}

#[test]
fn shrink_valid_source() {
    let source = ValidSource {
        files: vec![
            vec![
                ValidToken::Whitespace(ValidWhitespace { buf: " \r".to_owned() }),
                ValidToken::Number(
                    ValidNumber {
                        kind: ValidNumberKind::DecimalInt,
                        buf: "10".to_owned(),
                        prefix_len: 0
                    }
                ),
                ValidToken::LineComment(ValidLineComment { buf: "#a\n".to_owned() }),
                ValidToken::Word(ValidWord { buf: "XYZ".to_owned() }),
            ],
        ]
    };

    let expected = vec![
        // Results of shrinking the first two of lexemes
        ValidSource {
            files: vec![
                vec![
                    ValidToken::Whitespace(ValidWhitespace { buf: "\r".to_owned() }),
                    ValidToken::Number(
                        ValidNumber {
                            kind: ValidNumberKind::DecimalInt,
                            buf: "0".to_owned(),
                            prefix_len: 0
                        }
                    ),
                ],
            ],
        },
        ValidSource {
            files: vec![
                vec![
                    ValidToken::Whitespace(ValidWhitespace { buf: "\r".to_owned() }),
                    ValidToken::Number(
                        ValidNumber {
                            kind: ValidNumberKind::DecimalInt,
                            buf: "1".to_owned(),
                            prefix_len: 0
                        }
                    ),
                ],
            ],
        },
        ValidSource {
            files: vec![
                vec![
                    ValidToken::Whitespace(ValidWhitespace { buf: " ".to_owned() }),
                    ValidToken::Number(
                        ValidNumber {
                            kind: ValidNumberKind::DecimalInt,
                            buf: "0".to_owned(),
                            prefix_len: 0
                        }
                    ),
                ],
            ],
        },
        ValidSource {
            files: vec![
                vec![
                    ValidToken::Whitespace(ValidWhitespace { buf: " ".to_owned() }),
                    ValidToken::Number(
                        ValidNumber {
                            kind: ValidNumberKind::DecimalInt,
                            buf: "1".to_owned(),
                            prefix_len: 0
                        }
                    ),
                ],
            ],
        },

        // Results of shrinking the second pair of lexemes
        ValidSource {
            files: vec![
                vec![
                    ValidToken::LineComment(ValidLineComment { buf: "#\n".to_owned() }),
                    ValidToken::Word(ValidWord { buf: "XZ".to_owned() }),
                ],
            ],
        },
        ValidSource {
            files: vec![
                vec![
                    ValidToken::LineComment(ValidLineComment { buf: "#\n".to_owned() }),
                    ValidToken::Word(ValidWord { buf: "XY".to_owned() }),
                ],
            ],
        },
    ];

    let actual: Vec<_> = source.shrink().collect();

    assert_eq!(actual, expected);
}

#[test]
fn no_sources() {
    let sources: &[&str] = &[];
    assert!(lexer::lex(sources).unwrap().is_empty());
}

#[test]
fn single_empty_source() {
    assert!(lexer::lex(&[""]).unwrap().is_empty());
}

#[test]
fn multiple_empty_sources() {
    assert!(lexer::lex(&["", ""]).unwrap().is_empty());
}

#[test]
fn ascii_digit_is_numeric() {
    let lexemes: &[&str] = &[
        "0b0", "0b1", "0B0", "0B1", "0b00", "0b01", "0B00", "0B11",
        "0o0", "0o7", "0O0", "0O6", "0o00", "0o45", "0O32", "0O14",
        "0x0", "0xf", "0X0", "0XD", "0x00", "0x0A", "0X00", "0X0b", "0xC", "0Xf",
        "0", "1", "2", "3", "4", "5", "6", "7", "8", "9",
        "010", "0099", "2468", "13570", "34021",
        "0.0", "00.0", "0.00", "00.00", "1.0", "0.1", "123.468", "10.09",
        "0.0e0", "1.0e0", "0.1e0", "9.9e9", "1.9e5", "00.00e00", "123.456e012",
        "0.0E0", "2.0E0", "0.3E0", "4.5E9", "6.7E5", "00.00E00", "123.456E012",
        "0.0e+0", "6.0e+0", "0.4e+0", "3.6e+9", "1.3e+5", "00.00e+00", "123.456e+012",
        "0.0E+0", "7.0E+0", "0.3E+0", "4.5E+9", "3.5E+5", "00.00E+00", "456.123E+024",
        "0.0e-0", "8.0e-0", "0.2e-0", "5.4e-9", "5.7e-5", "00.00e-00", "246.753e-034",
        "0.0E-0", "9.0E-0", "0.1E-0", "6.3E-9", "7.9E-5", "00.00E-00", "846.195E-029",
        "53.46e24", "39.81E31", "65.43e+21", "75.64E+13", "13.37e-37", "42.24E-31",
    ];

    for &lexeme in lexemes {
        let range = lexer::Range {
            start: lexer::Location { src_idx: 0, line: 1, column: 1 },
            end:   lexer::Location { src_idx: 0, line: 1, column: 1 + grapheme_count(lexeme) },
        };
        let sources = &[lexeme];
        let tokens = lexer::lex(sources).unwrap();
        let token = tokens[0];

        assert_eq!(tokens.len(), 1);
        assert_eq!(token.kind, lexer::TokenKind::Numeric);
        assert_eq!(token.value, lexeme);
        assert_eq!(token.range, range);
    }
}

#[test]
fn unicode_digit_is_not_numeric() {
    // Characters that are classified as "digit" in Unicode,
    // but should not be accepted by the lexer as such.
    let non_numeric_digits: &[&str] = &[
        "\u{0660}",
        "\u{0967}",
        "\u{0E52}",
        "\u{0967}",
        "\u{1814}",
        "\u{FF15}",
    ];

    // They all express a single grapheme cluster.
    let err_range = lexer::Range {
        start: lexer::Location { src_idx: 0, line: 1, column: 1 },
        end:   lexer::Location { src_idx: 0, line: 1, column: 2 },
    };

    for &lexeme in non_numeric_digits {
        let sources = &[lexeme];
        let result = lexer::lex(sources);

        match result {
            Err(Error::Syntax { ref message, range }) => {
                assert_eq!(message, "Invalid token");
                assert_eq!(range, err_range);
            },
            Err(other) => panic!("Lexer returned a non-syntax error: {}", other),
            Ok(tokens) => panic!("Lexer unexpectedly accepted {}: {:#?}", lexeme, tokens),
        }
    }
}

#[test]
fn identifier_with_inner_unicode_digit() {
    let identifiers: &[&str] = &[
        "x\u{0660}",                // Latin Letter followed by Digit
        "_\u{0967}",                // Underscore followed by Digit
        "E\u{0300}\u{0e52}",        // Latin letter + Combining Grave Accent + Digit
        "\u{0687}\u{08EA}\u{0967}", // Arabic + Nonspacing Mark followed by Digit
        "\u{4FA1}\u{1814}",         // Chinese followed by Digit
        "\u{0938}\u{A8F1}\u{FF15}", // Devanagari + Combining Avagraha + Digit
    ];

    for &ident in identifiers {
        let range = lexer::Range {
            start: lexer::Location { src_idx: 0, line: 1, column: 1 },
            end:   lexer::Location { src_idx: 0, line: 1, column: 1 + grapheme_count(ident) },
        };
        let sources = &[ident];
        let tokens = lexer::lex(sources).unwrap();
        let token = tokens[0];

        assert_eq!(tokens.len(), 1);
        assert_eq!(token.kind, lexer::TokenKind::Word);
        assert_eq!(token.value, ident);
        assert_eq!(token.range, range);
    }
}

#[test]
fn all_valid_punctuation() {
    for &punct in PUNCTUATION {
        let range = lexer::Range {
            start: lexer::Location { src_idx: 0, line: 1, column: 1 },
            end:   lexer::Location { src_idx: 0, line: 1, column: 1 + grapheme_count(punct) },
        };
        let sources = &[punct];
        let tokens = lexer::lex(sources).unwrap();
        let token = tokens[0];

        assert_eq!(tokens.len(), 1);
        assert_eq!(token.kind, lexer::TokenKind::Punctuation);
        assert_eq!(token.value, punct);
        assert_eq!(token.range, range);
    }
}

#[test]
fn windows_newline_in_comment() {
    let src = ValidSource {
        files: vec![
            vec![
                ValidToken::LineComment(
                    ValidLineComment {
                        buf: "#abc\r\n".to_owned()
                    }
                ),
            ]
        ]
    };
    let (srcs, _) = src.render();
    let actual = lexer::lex(&srcs).unwrap();

    assert_eq!(actual.len(), 1);
    assert_eq!(actual[0].kind, lexer::TokenKind::Comment);
}

#[test]
fn interesting_valid_sources() {
    let test_cases: &[_] = &[
        // Some identifiers and keywords
        ("foo",     vec!["foo"]),
        ("bar qux", vec!["bar", " ", "qux"]),
        ("if",      vec!["if"]),
        ("then",    vec!["then"]),
        ("else",    vec!["else"]),
        ("match",   vec!["match"]),
        ("true",    vec!["true"]),
        ("false",   vec!["false"]),
        ("as",      vec!["as"]),
        ("nil",     vec!["nil"]),
        ("struct",  vec!["struct"]),
        ("class",   vec!["class"]),
        ("enum",    vec!["enum"]),
        ("fn",      vec!["fn"]),
        ("impl",    vec!["impl"]),
        ("let",     vec!["let"]),

        // line comments without a trailing newline
        ("#", vec!["#"]),
        ("# baz", vec!["# baz"]),
        ("#\t", vec!["#\t"]),

        // without a decimal point, it's not a floating-point literal
        ("123e456", vec!["123", "e456"]),

        // <digits>.<letters>() can be a method call;
        // <letters>.digits can be a tuple field access
        ("850.method()", vec!["850", ".", "method", "(", ")"]),
        ("thing.01",     vec!["thing", ".", "01"]),

        // numbers followed directly by a letter are allowed _for now_. Soon they won't be.
        ("99foo",       vec!["99", "foo"]),
        ("03.14barqux", vec!["03.14", "barqux"]),

        // Lonely 0[bBoOxX] prefixes are _currently_ recognized as a zero and a letter. Soon they won't be.
        ("0b", vec!["0", "b"]),
        ("0B", vec!["0", "B"]),
        ("0o", vec!["0", "o"]),
        ("0O", vec!["0", "O"]),
        ("0x", vec!["0", "x"]),
        ("0X", vec!["0", "X"]),

        // .. and ... after a number is a range, not a floating-point literal
        ("10..99", vec!["10", "..", "99"]),
        ("32...85", vec!["32", "...", "85"]),
        ("25.7..0.34", vec!["25.7", "..", "0.34"]),
        ("3.14...2.71", vec!["3.14", "...", "2.71"]),

        // Parentheses
        ("()[]{}", vec!["(", ")", "[", "]", "{", "}"]),
        ("{[(([{}]))]}", vec!["{", "[", "(", "(", "[", "{", "}", "]", ")", ")", "]", "}"]),

        // Operators that are repetitions of the same character
        ("......", vec!["...", "..."]),
        (".....",  vec!["...", ".."]),
        ("....",   vec!["...", "."]),
        ("::::",   vec!["::", "::"]),
        (":::",    vec!["::", ":"]),
        ("?:",     vec!["?", ":"]),
        ("?::",    vec!["?", "::"]),
        ("====",   vec!["==", "=="]),
        ("===",    vec!["==", "="]),

        (". .....",  vec![".", " ", "...", ".."]),
        (".. ....",  vec!["..", " ", "...", "."]),
        (". .. ...", vec![".", " ", "..", " ", "..."]),
        (".. .. ..", vec!["..", " ", "..", " ", ".."]),
        (".. . ...", vec!["..", " ", ".", " ", "..."]),
        (".. ...",   vec!["..", " ", "..."]),
        ("... ..",   vec!["...", " ", ".."]),
        (".. ..",    vec!["..", " ", ".."]),
        (". ...",    vec![".", " ", "..."]),
        (". . . .",  vec![".", " ", ".", " ", ".", " ", "."]),
        (": :::",    vec![":", " ", "::", ":"]),
        (": ::",     vec![":", " ", "::"]),
        ("? :",      vec!["?", " ", ":"]),
        ("? : :",    vec!["?", " ",  ":", " ", ":"]),
        ("=== =",    vec!["==", "=", " ", "="]),
        ("= ==",     vec!["=", " ", "=="]),

        // no decrement/increment, separate logical/bitwise operators,
        // nor shifts (left/logical right/arithmetic right)
        ("--", vec!["-", "-"]),
        ("++", vec!["+", "+"]),
        ("&&", vec!["&", "&"]),
        ("||", vec!["|", "|"]),
        ("<<>>>", vec!["<", "<", ">", ">", ">"]),

        // no compound assignment operators
        (
            "+=-=*=/=%=&=|=~=",
            vec!["+", "=", "-", "=", "*", "=", "/", "=", "%", "=", "&", "=", "|", "=", "~", "="]
        ),

        // Operators sharing common characters
        ("->-<->->=><=>=", vec!["->", "-", "<->", "->", "=>", "<=", ">="]),

        // more complex/realistic sequences of tokens
        ("abc()", vec!["abc", "(", ")"]),
        ("def(true, 83)", vec!["def", "(", "true", ",", " ", "83", ")"]),
        ("qux.lol[index]", vec!["qux", ".", "lol", "[", "index", "]"]),
        ("let x_0 = \"0\";", vec!["let", " ", "x_0", " ", "=", " ", r#""0""#, ";"]),

        (
            "fn recent_followers(start_date: Date) -> [User] {\n    []\n}",
            vec![
                "fn", " ", "recent_followers", "(", "start_date", ":", " ", "Date", ")",
                " ", "->", " ", "[", "User", "]", " ", "{", "\n    ", "[", "]", "\n", "}",
            ]
        ),

        // some programmers just can't be bothered to write spaces between binary ops
        (
            "1.2+999*0.314e+1+2+2718.2818e-3-4",
            vec!["1.2", "+", "999", "*", "0.314e+1", "+", "2", "+", "2718.2818e-3", "-", "4"]
        ),
    ];

    for &(source, ref expected) in test_cases {
        let sources = &[source];
        let actual: Vec<_> = lexer::lex(sources)
            .unwrap()
            .iter()
            .map(|token| token.value)
            .collect();

        assert_eq!(actual, *expected);
    }
}

#[test]
fn invalid_source() {
    let lexemes: &[&str] = &[
        // Invalid punctuation
        "^",
        "!",
        "@",
        "'",
        "$",
        "\\",
        "`",
        "´",
        "–",
        "—",
        "…",
        "˚",
        "∞",
        "™",
        "¡",
        "¬",
        "§",
        "¶",
        "•",
        "≈",
        "˜",
        "√",
        "©",
        "˙",
        "¨",
        "“",
        "”",
        "‘",
        "’",
        "«",
        "»",
        "±",

        // Invalid string literals
        r#""abc def"#,      // Unterminated, no right "
        r#""\x0z""#,        // Invalid character in escape sequence #0
        r#""\u{abcdefg}""#, // Invalid character in escape sequence #1
        r#""\q""#,          // Invalid character in escape sequence #2
        r#""\U{20}""#,      // Invalid character in escape sequence #3
        r#""\X0a""#,        // Invalid character in escape sequence #4
        r#""\x"#,           // Escape sequence too short #0
        r#""\x""#,          // Escape sequence too short #1
        r#""\x9""#,         // Escape sequence too short #2
        r#""\u""#,          // Missing delimiter in Unicode escape #0
        r#""\u{""#,         // Missing delimiter in Unicode escape #1
        r#""\u{}""#,        // Escape sequence too short #3
        r#"\"#,             // Unterminated escape sequence

        // Tricky Unicode (some borrowed from QuickCheck)
        "\u{e0001}",        // Tag
        "\u{e0020}",        // Tag Space
        "\u{e000}",         // Private Use, etc.
        "\u{e001}",
        "\u{ef8ff}",
        "\u{f0000}",
        "\u{ffffd}",
        "\u{ffffe}",
        "\u{fffff}",
        "\u{100000}",
        "\u{10fffd}",
        "\u{10fffe}",
        "\u{10ffff}",       // Highest valid code point
    ];

    for lexeme in lexemes {
        match lexer::lex(&[lexeme]) {
            Ok(tokens) => panic!("Invalid lexeme '{}' was unexpectedly recognized as {:?}", lexeme, tokens),
            Err(Error::Syntax { message, range }) => {
                let expected_range = lexer::Range {
                    start: lexer::Location { src_idx: 0, line: 1, column: 1 },
                    end:   lexer::Location { src_idx: 0, line: 1, column: 1 + grapheme_count(lexeme) },
                };
                assert_eq!(message, "Invalid token");
                assert_eq!(range, expected_range);
            },
            Err(err) => panic!("Lexer must always return a syntax error; got: {}", err),
        }
    }
}

#[cfg_attr(feature = "cargo-clippy", allow(needless_pass_by_value))]
fn quickcheck_valid_lexeme<T: Lexeme>(lexeme: T) -> bool {
    let mut source = String::new();
    let start = lexer::Location { src_idx: 0, line: 1, column: 1 };
    let mut end = start;
    let expected_kind = lexeme.render(&mut source, &mut end);
    let range = lexer::Range { start, end };
    let sources = [source];
    let tokens = lexer::lex(&sources).unwrap();

    assert_eq!(tokens.len(), 1);
    assert_eq!(tokens[0].kind, expected_kind);
    assert_eq!(tokens[0].range, range);
    assert_eq!(tokens[0].value, sources[0]);

    true
}

quickcheck! {
    #[allow(trivial_casts)]
    fn random_sources(sources: Vec<String>) -> bool {
        match lexer::lex(&sources) {
            Err(Error::Syntax { message, range }) => {
                assert_eq!(message, "Invalid token");
                assert!(!sources[range.start.src_idx].is_empty());
                true
            },
            Err(_) => false, // lexer must always produce a syntax error on failure
            Ok(tokens) => {
                sources.iter().all(String::is_empty) && tokens.is_empty()
                ||
                sources.iter().any(|s| !s.is_empty()) && !tokens.is_empty()
            },
        }
    }

    //
    // Test each valid token kind before more complex tests
    //
    #[allow(trivial_casts)]
    fn valid_whitespace(lexeme: ValidWhitespace) -> bool {
        quickcheck_valid_lexeme(lexeme)
    }

    #[allow(trivial_casts)]
    fn valid_line_comment(lexeme: ValidLineComment) -> bool {
        quickcheck_valid_lexeme(lexeme)
    }

    #[allow(trivial_casts)]
    fn valid_word(lexeme: ValidWord) -> bool {
        quickcheck_valid_lexeme(lexeme)
    }

    #[allow(trivial_casts)]
    fn valid_number(lexeme: ValidNumber) -> bool {
        quickcheck_valid_lexeme(lexeme)
    }

    #[allow(trivial_casts)]
    fn valid_string(lexeme: ValidString) -> bool {
        quickcheck_valid_lexeme(lexeme)
    }

    #[allow(trivial_casts)]
    fn valid_punct(lexeme: ValidPunct) -> bool {
        quickcheck_valid_lexeme(lexeme)
    }

    #[allow(trivial_casts)]
    fn any_valid_token(token: ValidToken) -> bool {
        quickcheck_valid_lexeme(token)
    }

    #[allow(trivial_casts)]
    fn valid_source(source: ValidSource) -> bool {
        let (sources, expected) = source.render();
        let actual = lexer::lex(&sources).unwrap();

        assert_eq!(actual.len(), expected.len());

        for (act_tok, exp_tok) in actual.iter().zip(expected) {
            let src_idx = exp_tok.range.start.src_idx;
            let slice = exp_tok.slice.clone();

            assert_eq!(act_tok.kind, exp_tok.kind);
            assert_eq!(act_tok.value, &sources[src_idx][slice]);
            assert_eq!(act_tok.range, exp_tok.range);
        }

        true
    }
}