import re
from textwrap import dedent
from unittest import TestCase
from eyecite import clean_text, get_citations
from eyecite.utils import dump_citations
class UtilsTest(TestCase):
def test_clean_text(self):
test_pairs = (
(["inline_whitespace"], " word \t \n word ", " word \n word "),
(["all_whitespace"], " word \t \n word ", " word word "),
(["underscores"], "__word__word_", "wordword_"),
(["html"], " word ", " word "),
(
["html", "underscores", "inline_whitespace"],
" __ word word ",
" word word ",
),
)
for steps, text, expected in test_pairs:
print(
"Testing clean_text for %s" % text.replace("\n", " "), end=" "
)
result = clean_text(text, steps)
self.assertEqual(
result,
expected,
)
print("✓")
def test_clean_text_invalid(self):
with self.assertRaises(ValueError):
clean_text("foo", ["invalid"])
def test_dump_citations(self):
text = "blah. Foo v. Bar, 1 U.S. 2, 3-4 (1999). blah"
cites = get_citations(text)
dumped_text = dump_citations(cites, text)
dumped_text = re.sub(r"\x1B.*?m", "", dumped_text) # strip colors
expected = dedent(
"""
FullCaseCitation: blah. Foo v. Bar, 1 U.S. 2, 3-4 (1999). blah
* groups
* volume='1'
* reporter='U.S.'
* page='2'
* metadata
* pin_cite='3-4'
* year='1999'
* court='scotus'
* plaintiff='Foo'
* defendant='Bar'
* year=1999
"""
)
self.assertEqual(dumped_text.strip(), expected.strip())