use std::collections::{HashMap, HashSet};
use pithy;
use rayon::prelude::*;
use colored::*;
//Import Instant
use std::time::Instant;

//use std::alloc;
//use cap::Cap;

//#[global_allocator]
//static ALLOCATOR: Cap<alloc::System> = Cap::new(alloc::System, usize::max_value());

fn main() {
    //ALLOCATOR.set_limit(60 * 1024 * 1024).unwrap();
    let arguments: Vec<String> = std::env::args().collect();
    //--bias:
    //slash (i.e \"/\") separated list of words to bias the summary towards.
    //Very experimental. Try lots of synonyms.
    let help_documentation = "
        ------------
        --help:
            Print this help message
        -f:
            The file pithy will read from. Required.
        --sentences:
            The number of sentences for pithy to return. Defaults to 3.
        --approximate:
            Will return a decent approximation of the summary. Good
            for extremely long texts where you don't care about precision.
        --context:
            A second file to use as a context for the summary. Experimental.
            May not actually affect final results, but improve biased summaries.
        --bias:
            slash (i.e \"/\") separated list of words to bias the summary towards.
            If you are using pithy on a large text, increase the chunk_size to
            2500-5000 to get relevant results. Note that this doesn't work in
            approximate mode.
        --bias_strength:
            The strength of the bias, must be an integer. Defaults to 6.
        --by_section:
            If set, pithy splits the text into sections, and each section is
            summarized separately. Defaults to false.
        --chunk_size:
            The number of sentences to read at a time. Defaults to 500 
            if unspecified.
        --force_all:
            If set, pithy reads the text all at once. Can be quite 
            slow once you go past the 7k mark. Defaults to false.
        --force_chunk:
            If set, regardless of how large the text is, pithy splits it
            into chunks. Should be used in combination with chunk_size 
            and by_section.
        --ngrams:
            If set, pithy uses ngrams rather than words. 
            It's usually crap, but you might use it as a last resort 
            for non-spaced languages that you can't pre-tokenise. 
            Defaults to false.
        --min_length:
            The minimum sentence length before filtering. Defaults to 30.
        --max_length:
            The maximum sentence length before filtering. Defaults to 1500.
        --separator:
            The separator used to split the text into sentences. 
            Defaults to '. '. You can type newline to separate by newlines.
        --clean_whitespace:
            If set, removes sentences with excessive whitespace. Useful for 
            pdfs and copy-pastes from websites.
        --clean_nonalphabetic:
            If set, removes sentences with too many non-alphabetic characters.
        --clean_caps:
            If set, removes sentences with too many capital letters. Useful 
            if the text contains a lot of references or indices.
        --length_penalty:
            The length penalty. Defaults to 1.5. Decrease to make glance for longer 
            sentences, increase for shorter sentences.
        --density:
            Experimental setting. Defaults to 3. Setting it lower 
            seems to bias pithy's summaries towards more common words, 
            setting it higher seems to bias summaries towards rarer 
            but more informative words.
        --no_context:
            If set, the context surrounding sentences isn't provided. 
            Defaults to false.
        --relevance:
            If set, the sentences are sorted by their relevance rather 
            than their order in the original text. Defaults to false.
        --nobar:
            If set, the progress bar is not printed. Defaults to false because
            progress bars are cool.
        ------------
        pithy 0.1.0 - an absurdly fast, strangely accurate, summariser
        ------------
        Quick example:
        pithy -f your_file_here.txt --sentences 4
        ";
    if arguments.contains(&"--help".to_string()) || arguments.len() == 1 {
        println!("{}", help_documentation);
        return;
    }
    let by_section = arguments.contains(&"--by_section".to_string());
    let approximate = arguments.contains(&"--approximate".to_string());
    let chunk_size = if arguments.contains(&"--chunk_size".to_string()) {
        Some(
            arguments
                .get(arguments.iter().position(|x| x == "--chunk_size").unwrap() + 1)
                .expect("No chunk size provided")
                .parse::<usize>()
                .unwrap(),
        )
    } else {
        None
    };
    let second_file = if arguments.contains(&"--context".to_string()) {
        Some(
            arguments
                .get(arguments.iter().position(|x| x == "--context").unwrap() + 1)
                .expect("No context file provided"),
        )
    } else {
        None
    };
    let bias_list = if arguments.contains(&"--bias".to_string()) {
        Some(
            arguments
                .get(arguments.iter().position(|x| x == "--bias").unwrap() + 1)
                .expect("No bias list provided")
                .split("/")
                .map(|x| x.to_string())
                .collect::<HashSet<String>>(),
        )
    } else {
        None
    };
    let bias_strength = if arguments.contains(&"--bias_strength".to_string()) {
        Some(
            arguments
                .get(
                    arguments
                        .iter()
                        .position(|x| x == "--bias_strength")
                        .unwrap()
                        + 1,
                )
                .expect("No bias strength provided")
                .parse::<f32>()
                .unwrap(),
        )
    } else {
        Some(6.0)
    };
    let filename = arguments
        .get(arguments.iter().position(|x| x == "-f").unwrap() + 1)
        .expect("No filename provided");
    let number_of_sentences_to_return = if arguments.contains(&"--sentences".to_string()) {
        arguments
            .get(arguments.iter().position(|x| x == "--sentences").unwrap() + 1)
            .expect("No number of sentences provided")
            .parse::<usize>()
            .unwrap()
    } else {
        3
    };
    let force_all = arguments.contains(&"--force_all".to_string());
    let force_chunk = arguments.contains(&"--force_chunk".to_string());
    //if space, we set "." as the separator. if newline, we set "\n" as the separator.
    let separator = if arguments.contains(&"--separator".to_string()) {
        let arg = arguments
            .get(arguments.iter().position(|x| x == "--separator").unwrap() + 1)
            .expect("No separator provided");
        if arg == "newline" {
            "\n"
        } else {
            arg
        }
    } else {
        "."
    };
    let ngrams = arguments.contains(&"--ngrams".to_string());
    let min_length = if arguments.contains(&"--min_length".to_string()) {
        arguments
            .get(arguments.iter().position(|x| x == "--min_length").unwrap() + 1)
            .expect("No minimum length provided")
            .parse::<usize>()
            .unwrap()
    } else {
        50
    };
    let max_length = if arguments.contains(&"--max_length".to_string()) {
        arguments
            .get(arguments.iter().position(|x| x == "--max_length").unwrap() + 1)
            .expect("No maximum length provided")
            .parse::<usize>()
            .unwrap()
    } else {
        1500
    };
    let relevance = arguments.contains(&"--relevance".to_string());
    let no_context = arguments.contains(&"--no_context".to_string());
    let clean_whitespace = arguments.contains(&"--clean_whitespace".to_string());
    let clean_nonalphabetic = arguments.contains(&"--clean_nonalphabetic".to_string());
    let clean_caps = arguments.contains(&"--clean_caps".to_string());
    let length_penalty = if arguments.contains(&"--length_penalty".to_string()) {
        arguments
            .get(
                arguments
                    .iter()
                    .position(|x| x == "--length_penalty")
                    .unwrap()
                    + 1,
            )
            .expect("No length penalty provided")
            .parse::<f32>()
            .unwrap()
    } else {
        0.6
    };
    let density = if arguments.contains(&"--density".to_string()) {
        arguments
            .get(arguments.iter().position(|x| x == "--density").unwrap() + 1)
            .expect("No density provided")
            .parse::<f32>()
            .unwrap()
    } else {
        3.0
    };

    let use_bar = arguments.contains(&"--nobar".to_string());
    let now = Instant::now();
    let raw_text = std::fs::read_to_string(filename.clone()).expect("Could not open the file");
    //api: pub fn from_raw_text(raw_text: String, separator: &str, min_length: usize, max_length: usize, ngrams: bool)
    let mut summariser = pithy::Summariser::new();
    summariser.add_raw_text(
        filename.clone(),
        raw_text.clone(),
        separator,
        min_length,
        max_length,
        ngrams,
    );
    if second_file.is_some() {
        let raw_text = std::fs::read_to_string(second_file.unwrap().clone()).expect("Could not open the file");
        summariser.add_raw_text(
            second_file.unwrap().clone(),
            raw_text.clone(),
            separator,
            min_length,
            max_length,
            ngrams,
        );
    }
    //api: excessive_whitespace: bool, excessive_punctuation_and_nums: bool, excessive_caps: bool,
    if clean_whitespace || clean_nonalphabetic || clean_caps {
        summariser.clean_sentences(clean_whitespace, clean_nonalphabetic, clean_caps);
    }
    //summariser.clean_sentences(clean_whitespace, clean_nonalphabetic, clean_caps);
    //api: number_of_sentences_to_return: usize, return_summaries_for_each: bool, chunk_size: Option<usize>, force_sum_all: bool, length_penalty: f32
    let mut summary = if !approximate {
        let sum = summariser.top_sentences(
            number_of_sentences_to_return,
            by_section,
            chunk_size,
            force_all,
            length_penalty,
            force_chunk,
            density,
            bias_list,
            bias_strength,
            !use_bar,
            filename.clone(),
        );
        sum
    } else {
        let sum = summariser.approximate_top_sentences(
            number_of_sentences_to_return,
            density,
            length_penalty,
        );
        sum
    };
    if !use_bar {
        println!("Summarising took {} seconds", now.elapsed().as_secs_f32());
    }
    //sort sentences by .index
    if !relevance {
        summary.par_sort_unstable_by(|a, b| a.index.partial_cmp(&b.index).unwrap());
    }
    //The summary is an array of strings, so we'll pretty-print it:
    // /retrieve_sentence_by_index
    let mut sentence_indices = summariser.sentences.keys().cloned().collect::<Vec<usize>>();
    sentence_indices.sort_unstable();
    //summariser.semirandom_walk(summary[0].index, 5);
    for sentence in summary.clone() {
        let index_number = sentence.index;
        //There might be missing indices, so in retrieving the previous sentence, we need to find what the closest number preceding it is
        if !no_context {
            let previous_sentence_indx = sentence_indices
                .iter()
                .filter(|x| **x < index_number)
                .last()
                .unwrap_or(&0);
            let next_sentence_indx = sentence_indices
                .iter()
                .filter(|x| **x > index_number)
                .next()
                .unwrap_or(&0);
            let previous_sentence = if summariser.sentences.get(previous_sentence_indx).is_some() {
                summariser
                    .sentences
                    .get(previous_sentence_indx)
                    .unwrap()
                    .clone()
                    .text
            } else {
                String::from("")
            };
            let next_sentence = if summariser.sentences.get(next_sentence_indx).is_some() {
                summariser
                    .sentences
                    .get(next_sentence_indx)
                    .unwrap()
                    .clone()
                    .text
            } else {
                String::from("")
            };
            print!(
                "\n{}\n{}{}{}{}{}{}",
                sentence.index,
                separator,
                previous_sentence.italic(),
                separator,
                sentence.text.bold().red(),
                separator,
                next_sentence.italic()
            );
        } else {
            println!(
                "{}",
                sentence.index.to_string().underline().italic().magenta()
            );
            println!("{}", sentence.text.bold().cyan());
            println!("")
        }
    }
    println!("")
    //If the bar is turned off, then concatenate sentence.text and write it to stdout (so that the script can be used in pipes)
    //if use_bar {
    //let stdout = std::io::stdout();
    //let lock = stdout.lock();
    //let mut w = std::io::BufWriter::new(lock);
    //let mut output = String::new();
    //for sentence in summary {
    //output.push_str(&sentence.text);
    //output.push_str(&separator);
    // }
    //w.write_all(output.as_bytes()).unwrap();
    //}
}