#![allow(clippy::uninlined_format_args)]
use std::{fs, path::Path};

use criterion::{black_box, criterion_group, criterion_main, Criterion, Throughput};
use str_indices::{chars, lines, lines_crlf, lines_lf, utf16};

fn all(c: &mut Criterion) {
    let root = Path::new(env!("CARGO_MANIFEST_DIR")).join("benches/text");
    let read_text =
        |name: &str| fs::read_to_string(root.join(name)).expect("cannot find benchmark text at");
    // Load benchmark strings.
    let test_strings = vec![
        ("en_0001", "E".into()),
        ("en_0010", read_text("en_10.txt")),
        ("en_0100", read_text("en_100.txt")),
        ("en_1000", read_text("en_1000.txt")),
        ("en_10000", read_text("en_1000.txt").repeat(10)),
        ("jp_0003", "日".into()),
        ("jp_0102", read_text("jp_102.txt")),
        ("jp_1001", read_text("jp_1001.txt")),
        ("jp_10000", read_text("jp_1001.txt").repeat(10)),
    ];

    let line_strings = vec![
        ("lines_100", read_text("lines.txt")),
        ("lines_1000", read_text("lines.txt").repeat(10)),
        ("lines_10000", read_text("lines.txt").repeat(100)),
    ];

    //---------------------------------------------------------
    // Chars.

    // chars::count()
    {
        let mut group = c.benchmark_group("chars::count");
        for (text_name, text) in test_strings.iter() {
            group.throughput(Throughput::Bytes(text.len() as u64));
            group.bench_function(*text_name, |bench| {
                bench.iter(|| {
                    black_box(chars::count(text));
                })
            });
        }
    }
    {
        // Equivalent implementations using stdlib functions,
        // for performance comparisons.
        let mut group = c.benchmark_group("chars::count_std");
        for (text_name, text) in test_strings.iter() {
            group.throughput(Throughput::Bytes(text.len() as u64));
            group.bench_function(*text_name, |bench| {
                bench.iter(|| {
                    black_box(text.chars().count());
                })
            });
        }
    }

    // chars::from_byte_idx()
    {
        let mut group = c.benchmark_group("chars::from_byte_idx");
        for (text_name, text) in test_strings.iter() {
            group.throughput(Throughput::Bytes(text.len() as u64));
            group.bench_function(*text_name, |bench| {
                let idx = text.len();
                bench.iter(|| {
                    black_box(chars::from_byte_idx(text, idx));
                })
            });
        }
    }
    {
        // Equivalent implementations using stdlib functions,
        // for performance comparisons.
        let mut group = c.benchmark_group("chars::from_byte_idx_std");
        for (text_name, text) in test_strings.iter() {
            group.throughput(Throughput::Bytes(text.len() as u64));
            group.bench_function(format!("std::{}", text_name), |bench| {
                let idx = text.len();
                bench.iter(|| {
                    black_box({
                        let mut byte_idx = idx;
                        // Find the beginning of the code point.
                        while !text.is_char_boundary(byte_idx) {
                            byte_idx -= 1;
                        }
                        // Count the number of chars until the
                        // char that begins at `byte_idx`.
                        text[..byte_idx].chars().count()
                    })
                })
            });
        }
    }

    // chars::to_byte_idx()
    {
        let mut group = c.benchmark_group("chars::to_byte_idx");
        for (text_name, text) in test_strings.iter() {
            group.throughput(Throughput::Bytes(text.len() as u64));
            group.bench_function(*text_name, |bench| {
                let idx = chars::count(text);
                bench.iter(|| {
                    black_box(chars::to_byte_idx(text, idx));
                })
            });
        }
    }
    {
        // Equivalent implementations using stdlib functions,
        // for performance comparisons.
        let mut group = c.benchmark_group("chars::to_byte_idx_std");
        for (text_name, text) in test_strings.iter() {
            group.throughput(Throughput::Bytes(text.len() as u64));
            group.bench_function(format!("std::{}", text_name), |bench| {
                let idx = chars::count(text) - 1; // Minus 1 so we can unwrap below.
                bench.iter(|| {
                    black_box(text.char_indices().nth(idx).unwrap().0);
                })
            });
        }
    }

    //---------------------------------------------------------
    // UTF16.

    // utf16::count()
    {
        let mut group = c.benchmark_group("utf16::count");
        for (text_name, text) in test_strings.iter() {
            group.throughput(Throughput::Bytes(text.len() as u64));
            group.bench_function(*text_name, |bench| {
                bench.iter(|| {
                    black_box(utf16::count(text));
                })
            });
        }
    }

    // utf16::count_surrogates()
    {
        let mut group = c.benchmark_group("utf16::count_surrogates");
        for (text_name, text) in test_strings.iter() {
            group.throughput(Throughput::Bytes(text.len() as u64));
            group.bench_function(*text_name, |bench| {
                bench.iter(|| {
                    black_box(utf16::count_surrogates(text));
                })
            });
        }
    }

    // utf16::from_byte_idx()
    {
        let mut group = c.benchmark_group("utf16::from_byte_idx");
        for (text_name, text) in test_strings.iter() {
            group.throughput(Throughput::Bytes(text.len() as u64));
            group.bench_function(*text_name, |bench| {
                let idx = text.len();
                bench.iter(|| {
                    black_box(utf16::from_byte_idx(text, idx));
                })
            });
        }
    }

    // utf16::to_byte_idx()
    {
        let mut group = c.benchmark_group("utf16::to_byte_idx");
        for (text_name, text) in test_strings.iter() {
            group.throughput(Throughput::Bytes(text.len() as u64));
            group.bench_function(*text_name, |bench| {
                let idx = utf16::count(text);
                bench.iter(|| {
                    black_box(utf16::to_byte_idx(text, idx));
                })
            });
        }
    }

    //---------------------------------------------------------
    // Lines (unicode).
    let unicode_line_breaks = [
        ("LF", "\u{000A}"),
        ("VT", "\u{000B}"),
        ("FF", "\u{000C}"),
        ("CR", "\u{000D}"),
        ("NEL", "\u{0085}"),
        ("LS", "\u{2028}"),
        ("PS", "\u{2029}"),
        ("CRLF", "\u{000D}\u{000A}"),
    ];

    // lines::count_breaks()
    {
        let mut group = c.benchmark_group("lines::count_breaks");
        for (text_name, text) in line_strings.iter() {
            for (break_name, line_break) in unicode_line_breaks {
                let text = text.replace('\n', line_break);
                group.throughput(Throughput::Bytes(text.len() as u64));
                group.bench_function(format!("{text_name}_{break_name}"), |bench| {
                    bench.iter(|| {
                        black_box(lines::count_breaks(&text));
                    })
                });
            }
        }
    }

    // lines::from_byte_idx()
    {
        let mut group = c.benchmark_group("lines::from_byte_idx");
        for (text_name, text) in line_strings.iter() {
            for (break_name, line_break) in unicode_line_breaks {
                let text = text.replace('\n', line_break);
                group.throughput(Throughput::Bytes(text.len() as u64));
                group.bench_function(format!("{text_name}_{break_name}"), |bench| {
                    let idx = text.len();
                    bench.iter(|| {
                        black_box(lines::from_byte_idx(&text, idx));
                    })
                });
            }
        }
    }

    // lines::to_byte_idx()
    {
        let mut group = c.benchmark_group("lines::to_byte_idx");
        for (text_name, text) in line_strings.iter() {
            for (break_name, line_break) in unicode_line_breaks {
                let text = &text.replace('\n', line_break);
                group.throughput(Throughput::Bytes(text.len() as u64));
                group.bench_function(format!("{text_name}_{break_name}"), |bench| {
                    let idx = lines::count_breaks(text) + 1;
                    bench.iter(|| {
                        black_box(lines::to_byte_idx(text, idx));
                    })
                });
            }
        }
    }

    //---------------------------------------------------------
    // Lines (LF).

    // lines_lf::count_breaks()
    {
        let mut group = c.benchmark_group("lines_lf::count_breaks");
        for (text_name, text) in line_strings.iter() {
            group.throughput(Throughput::Bytes(text.len() as u64));
            group.bench_function(*text_name, |bench| {
                bench.iter(|| {
                    black_box(lines_lf::count_breaks(text));
                })
            });
        }
    }
    {
        // Version implemented with stdlib functions,
        // for performance comparisons.  Note: this
        // isn't exactly identical in behavior, since
        // stdlib ignores document-final line breaks.
        // But it should be close enough for perf
        // comparisons.
        let mut group = c.benchmark_group("lines_lf::count_breaks_std");
        for (text_name, text) in line_strings.iter() {
            group.throughput(Throughput::Bytes(text.len() as u64));
            group.bench_function(*text_name, |bench| {
                bench.iter(|| {
                    black_box(text.lines().count());
                })
            });
        }
    }

    // lines_lf::from_byte_idx()
    {
        let mut group = c.benchmark_group("lines_lf::from_byte_idx");
        for (text_name, text) in line_strings.iter() {
            group.throughput(Throughput::Bytes(text.len() as u64));
            group.bench_function(*text_name, |bench| {
                let idx = text.len();
                bench.iter(|| {
                    black_box(lines_lf::from_byte_idx(text, idx));
                })
            });
        }
    }

    // lines_lf::to_byte_idx()
    {
        let mut group = c.benchmark_group("lines_lf::to_byte_idx");
        for (text_name, text) in line_strings.iter() {
            group.throughput(Throughput::Bytes(text.len() as u64));
            group.bench_function(*text_name, |bench| {
                let idx = lines_lf::count_breaks(text) + 1;
                bench.iter(|| {
                    black_box(lines_lf::to_byte_idx(text, idx));
                })
            });
        }
    }

    //---------------------------------------------------------
    // Lines (CRLF).
    let crlf_line_breaks = [
        ("LF", "\u{000A}"),
        ("CR", "\u{000D}"),
        ("CRLF", "\u{000D}\u{000A}"),
    ];

    // lines_crlf::count_breaks()
    {
        let mut group = c.benchmark_group("lines_crlf::count_breaks");
        for (text_name, text) in line_strings.iter() {
            for (break_name, line_break) in crlf_line_breaks {
                let text = &text.replace('\n', line_break);
                group.throughput(Throughput::Bytes(text.len() as u64));
                group.bench_function(format!("{text_name}_{break_name}"), |bench| {
                    bench.iter(|| {
                        black_box(lines_crlf::count_breaks(text));
                    })
                });
            }
        }
    }

    // lines_crlf::from_byte_idx()
    {
        let mut group = c.benchmark_group("lines_crlf::from_byte_idx");
        for (text_name, text) in line_strings.iter() {
            for (break_name, line_break) in crlf_line_breaks {
                let text = &text.replace('\n', line_break);
                group.throughput(Throughput::Bytes(text.len() as u64));
                group.bench_function(format!("{text_name}_{break_name}"), |bench| {
                    let idx = text.len();
                    bench.iter(|| {
                        black_box(lines_crlf::from_byte_idx(text, idx));
                    })
                });
            }
        }
    }

    // lines_crlf::to_byte_idx()
    {
        let mut group = c.benchmark_group("lines_crlf::to_byte_idx");
        for (text_name, text) in line_strings.iter() {
            for (break_name, line_break) in crlf_line_breaks {
                let text = &text.replace('\n', line_break);
                group.throughput(Throughput::Bytes(text.len() as u64));
                group.bench_function(format!("{text_name}_{break_name}"), |bench| {
                    let idx = lines_crlf::count_breaks(text) + 1;
                    bench.iter(|| {
                        black_box(lines_crlf::to_byte_idx(text, idx));
                    })
                });
            }
        }
    }
}

//-------------------------------------------------------------

criterion_group!(benches, all,);
criterion_main!(benches);