extern crate leptess; extern crate regex; use leptess::{ leptonica::{self, BoxGeometry}, tesseract, LepTess, Variable, }; use regex::Regex; use std::path::Path; #[test] fn test_source_resolution() { let mut lt = LepTess::new(Some("./tests/tessdata"), "eng").unwrap(); lt.set_image("./tests/di.png").unwrap(); assert_eq!(lt.get_source_y_resolution(), 0); } #[test] fn test_get_text() { let mut lt = LepTess::new(Some("./tests/tessdata"), "eng").unwrap(); lt.set_image("./tests/di.png").unwrap(); let text = lt.get_utf8_text().unwrap(); let mut lines = text.lines(); assert_eq!( "We hold these truths to be self-evident, that all men", lines.nth(14).unwrap() ); assert_eq!( "are created equal, that they are endowed by their", lines.nth(0).unwrap() ); } #[test] fn test_get_hocr_text() { let mut lt = LepTess::new(Some("./tests/tessdata"), "eng").unwrap(); lt.set_image("./tests/di.png").unwrap(); let text = lt.get_hocr_text(0).unwrap(); assert!(text.contains("
"#).unwrap(); assert!(re.is_match(&text)); assert!(text.contains("CONTENT=\"Declaration\"/>")); } #[test] fn test_get_tsv_text() { let mut lt = LepTess::new(Some("./tests/tessdata"), "eng").unwrap(); lt.set_image("./tests/di.png").unwrap(); let text = lt.get_tsv_text(0).unwrap(); let re = Regex::new(r"([-0-9]+\t){11}.*").unwrap(); assert!(re.is_match(&text)); assert!(text.contains("Declaration")); } #[test] fn test_get_lstm_box_text() { let mut lt = LepTess::new(Some("./tests/tessdata"), "eng").unwrap(); lt.set_image("./tests/di.png").unwrap(); let text = lt.get_lstm_box_text(0).unwrap(); let re = Regex::new(r".?( [0-9]+){5}").unwrap(); assert!(re.is_match(&text)); } #[test] fn test_get_word_str_box_text() { let mut lt = LepTess::new(Some("./tests/tessdata"), "eng").unwrap(); lt.set_image("./tests/di.png").unwrap(); let text = lt.get_word_str_box_text(0).unwrap(); assert!(text.contains("WordStr")); assert!(text.contains("becomes necessary for one people to")); assert!(text.contains("to throw off such Government, and to provide new")); } #[test] fn test_ocr_iterate_word() { let mut lt = LepTess::new(Some("./tests/tessdata"), "eng").unwrap(); lt.set_image("./tests/di.png").unwrap(); let boxes = lt .get_component_boxes(leptess::capi::TessPageIteratorLevel_RIL_WORD, true) .unwrap(); for b in &boxes { lt.set_rectangle_from_box(&b); let text = lt.get_utf8_text().unwrap(); assert_eq!( BoxGeometry { x: 118, y: 5, w: 17, h: 11 }, b.get_geometry() ); assert_eq!("IN\n", text); break; } let mut iter = boxes.into_iter(); let b = iter.nth(5).unwrap(); lt.set_rectangle_from_box(&b); assert_eq!("The unanimous Declaration\n", lt.get_utf8_text().unwrap()); let b = iter.nth(14).unwrap(); lt.set_rectangle_from_box(&b); assert_eq!("people\n", lt.get_utf8_text().unwrap()); } #[test] fn test_low_lvl_get_text() { let path = Path::new("./tests/di.png"); let img = leptonica::pix_read(path).unwrap(); let mut api = tesseract::TessApi::new(Some("./tests/tessdata"), "eng").unwrap(); api.set_image(&img); let text = api.get_utf8_text().unwrap(); let mut lines = text.lines(); assert_eq!( "We hold these truths to be self-evident, that all men", lines.nth(14).unwrap() ); assert_eq!( "are created equal, that they are endowed by their", lines.nth(0).unwrap() ); } #[test] fn test_low_lvl_ocr_iterate_word() { let path = Path::new("./tests/di.png"); let img = leptonica::pix_read(path).unwrap(); let mut api = tesseract::TessApi::new(Some("./tests/tessdata"), "eng").unwrap(); api.set_image(&img); let boxes = api .get_component_images(leptess::capi::TessPageIteratorLevel_RIL_WORD, true) .unwrap(); for b in &boxes { api.set_rectangle_from_box(&b); let text = api.get_utf8_text().unwrap(); assert_eq!( BoxGeometry { x: 118, y: 5, w: 17, h: 11 }, b.get_geometry() ); assert_eq!("IN\n", text); break; } let mut iter = boxes.into_iter(); let b = iter.nth(5).unwrap(); api.set_rectangle_from_box(&b); assert_eq!("The unanimous Declaration\n", api.get_utf8_text().unwrap()); let b = iter.nth(14).unwrap(); api.set_rectangle_from_box(&b); assert_eq!("people\n", api.get_utf8_text().unwrap()); } #[test] fn test_low_lvl_invalid_data_path() { let re = tesseract::TessApi::new(Some("tests_foo"), "eng"); assert_eq!(Some(tesseract::TessInitError { code: -1 }), re.err()); } #[cfg(not(windows))] #[test] fn test_low_lvl_read_data_path_from_env() { std::env::set_var("TESSDATA_PREFIX", "./tests/tessdata"); tesseract::TessApi::new(None, "eng").unwrap(); } #[test] fn test_set_variable() { let mut lt = LepTess::new(Some("./tests/tessdata"), "eng").unwrap(); lt.set_image("./tests/di.png").unwrap(); lt.set_variable(Variable::TesseditCharBlacklist, "aeiou") .unwrap(); let text = lt.get_utf8_text().unwrap(); let mut lines = text.lines(); assert_eq!( "W hld ths trths t b slf-vdnt, tht ll mn", lines.nth(14).unwrap() ); }