//! An example of using syntect for code analysis. //! Basically a fancy lines of code count program that works //! for all languages Sublime Text supports and also counts things //! like number of functions and number of types defined. //! //! Another thing it does that other line count programs can't always //! do is properly count comments in embedded syntaxes. For example //! JS, CSS and Ruby comments embedded in ERB files. use syntect::parsing::{SyntaxSet, ParseState, ScopeStackOp, ScopeStack}; use syntect::highlighting::{ScopeSelector, ScopeSelectors}; use syntect::easy::{ScopeRegionIterator}; use std::path::Path; use std::io::{BufRead, BufReader}; use std::fs::File; use walkdir::{DirEntry, WalkDir}; use std::str::FromStr; #[derive(Debug)] struct Selectors { comment: ScopeSelector, doc_comment: ScopeSelectors, function: ScopeSelector, types: ScopeSelectors, } impl Default for Selectors { fn default() -> Selectors { Selectors { comment: ScopeSelector::from_str("comment - comment.block.attribute").unwrap(), doc_comment: ScopeSelectors::from_str("comment.line.documentation, comment.block.documentation").unwrap(), function: ScopeSelector::from_str("entity.name.function").unwrap(), types: ScopeSelectors::from_str("entity.name.class, entity.name.struct, entity.name.enum, entity.name.type").unwrap(), } } } #[derive(Debug, Default)] struct Stats { selectors: Selectors, files: usize, functions: usize, types: usize, lines: usize, chars: usize, code_lines: usize, comment_lines: usize, comment_chars: usize, comment_words: usize, doc_comment_lines: usize, doc_comment_words: usize, } fn print_stats(stats: &Stats) { println!(); println!("################## Stats ###################"); println!("File count: {:>6}", stats.files); println!("Total characters: {:>6}", stats.chars); println!(); println!("Function count: {:>6}", stats.functions); println!("Type count (structs, enums, classes): {:>6}", stats.types); println!(); println!("Code lines (traditional SLOC): {:>6}", stats.code_lines); println!("Total lines (w/ comments & blanks): {:>6}", stats.lines); println!("Comment lines (comment but no code): {:>6}", stats.comment_lines); println!("Blank lines (lines-blank-comment): {:>6}", stats.lines-stats.code_lines-stats.comment_lines); println!(); println!("Lines with a documentation comment: {:>6}", stats.doc_comment_lines); println!("Total words written in doc comments: {:>6}", stats.doc_comment_words); println!("Total words written in all comments: {:>6}", stats.comment_words); println!("Characters of comment: {:>6}", stats.comment_chars); } fn is_ignored(entry: &DirEntry) -> bool { entry.file_name() .to_str() .map(|s| s.starts_with('.') && s.len() > 1 || s.ends_with(".md")) .unwrap_or(false) } fn count_line(ops: &[(usize, ScopeStackOp)], line: &str, stack: &mut ScopeStack, stats: &mut Stats) { stats.lines += 1; let mut line_has_comment = false; let mut line_has_doc_comment = false; let mut line_has_code = false; for (s, op) in ScopeRegionIterator::new(ops, line) { stack.apply(op).unwrap(); if s.is_empty() { // in this case we don't care about blank tokens continue; } if stats.selectors.comment.does_match(stack.as_slice()).is_some() { let words = s.split_whitespace().filter(|w| w.chars().all(|c| c.is_alphanumeric() || c == '.' || c == '\'')).count(); if stats.selectors.doc_comment.does_match(stack.as_slice()).is_some() { line_has_doc_comment = true; stats.doc_comment_words += words; } stats.comment_chars += s.len(); stats.comment_words += words; line_has_comment = true; } else if !s.chars().all(|c| c.is_whitespace()) { line_has_code = true; } if stats.selectors.function.does_match(stack.as_slice()).is_some() { stats.functions += 1; } if stats.selectors.types.does_match(stack.as_slice()).is_some() { stats.types += 1; } } if line_has_comment && !line_has_code { stats.comment_lines += 1; } if line_has_doc_comment { stats.doc_comment_lines += 1; } if line_has_code { stats.code_lines += 1; } } fn count(ss: &SyntaxSet, path: &Path, stats: &mut Stats) { let syntax = match ss.find_syntax_for_file(path).unwrap_or(None) { Some(syntax) => syntax, None => return }; stats.files += 1; let mut state = ParseState::new(syntax); let f = File::open(path).unwrap(); let mut reader = BufReader::new(f); let mut line = String::new(); let mut stack = ScopeStack::new(); while reader.read_line(&mut line).unwrap() > 0 { { let ops = state.parse_line(&line, ss).unwrap(); stats.chars += line.len(); count_line(&ops, &line, &mut stack, stats); } line.clear(); } } fn main() { let ss = SyntaxSet::load_defaults_newlines(); // note we load the version with newlines let args: Vec = std::env::args().collect(); let path = if args.len() < 2 { "." } else { &args[1] }; println!("################## Files ###################"); let mut stats = Stats::default(); let walker = WalkDir::new(path).into_iter(); for entry in walker.filter_entry(|e| !is_ignored(e)) { let entry = entry.unwrap(); if entry.file_type().is_file() { println!("{}", entry.path().display()); count(&ss, entry.path(), &mut stats); } } // println!("{:?}", stats); print_stats(&stats); }