from pathlib import Path from unittest import TestCase from eyecite import annotate_citations, clean_text, get_citations class AnnotateTest(TestCase): def test_annotate(self): def straighten_quotes(text): return text.replace("’", "'") def lower_annotator(before, text, after): return before + text.lower() + after test_pairs = ( # single cite ("1 U.S. 1", "<0>1 U.S. 10>", []), # cite with extra text ("foo 1 U.S. 1 bar", "foo <0>1 U.S. 10> bar", []), # cite with punctuation ("foo '1 U.S. 1' bar", "foo '<0>1 U.S. 10>' bar", []), # law cite ( "foo. Mass. Gen. Laws ch. 1, § 2. bar", "foo. <0>Mass. Gen. Laws ch. 1, § 20>. bar", [], ), # journal cite ( "foo. 1 Minn. L. Rev. 2. bar", "foo. <0>1 Minn. L. Rev. 20>. bar", [], ), # Id. cite ( "1 U.S. 1. Foo. Id. Bar. Id. at 2.", "<0>1 U.S. 10>. Foo. <1>Id.1> Bar. <2>Id. at 22>.", [], ), # Supra cite ( "1 U.S. 1. Foo v. Bar, supra at 2.", "<0>1 U.S. 10>. Foo v. Bar, <1>supra at 21>.", [], ), # whitespace and html -- no unbalanced tag check ( "
foo 1 U.S. 1 bar", "foo <0>1 U.S. 10> bar", ["html", "inline_whitespace"], ), # whitespace and html -- skip unbalanced tags ( "foo 1 U.S. 1; 2 U.S. 2", "foo 1 U.S. 1; <1>2 U.S. 21>", ["html", "inline_whitespace"], {"unbalanced_tags": "skip"}, ), # whitespace and html -- wrap unbalanced tags ( "1 U.S. 1; 2 U.S. 2", "<0>1 U.S.0><0> 10>; <1>2 U.S. 21>", ["html", "inline_whitespace"], {"unbalanced_tags": "wrap"}, ), # tighly-wrapped html -- skip unbalanced tags (issue #54) ( "foo Ibid. bar", "foo <0>Ibid.0> bar", ["html", "inline_whitespace"], {"unbalanced_tags": "skip"}, ), # whitespace containing linebreaks ("1\nU.S. 1", "<0>1\nU.S. 10>", ["all_whitespace"]), # multiple Id. tags ( "1 U.S. 1. Id. 2 U.S. 2. Id.", "<0>1 U.S. 10>. <1>Id.1> <2>2 U.S. 22>. <3>Id.3>", [], ), # replacement in cleaners ( "1 Abbott’s Pr.Rep. 1", "<0>1 Abbott’s Pr.Rep. 10>", [straighten_quotes], ), # custom annotator ( "1 U.S. 1", "<0>1 u.s. 10>", [], {"annotator": lower_annotator}, ), ) for source_text, expected, clean_steps, *annotate_kwargs in test_pairs: annotate_kwargs = annotate_kwargs[0] if annotate_kwargs else {} with self.subTest( source_text, clean_steps=clean_steps, annotate_args=annotate_kwargs, ): plain_text = clean_text(source_text, clean_steps) cites = get_citations(plain_text) annotations = [ (c.span(), f"<{i}>", f"{i}>") for i, c in enumerate(cites) ] annotated = annotate_citations( plain_text, annotations, source_text=source_text, **annotate_kwargs, ) self.assertEqual(annotated, expected) def test_long_diff(self): """Does diffing work across a long text with many changes?""" opinion_text = ( Path(__file__).parent / "assets" / "opinion.txt" ).read_text() cleaned_text = clean_text(opinion_text, ["all_whitespace"]) annotated_text = annotate_citations( cleaned_text, [((902, 915), "~FOO~", "~BAR~")], opinion_text ) self.assertIn("~FOO~539\n U. S. 306~BAR~", annotated_text)