#copied from nymag.com.txt title: //h2[contains(@class, 'primary')] body: //*[@itemprop="articleBody"] body: //div[@id='story'] body: //section[@class='body'] author: //*[@class='by']/a date: substring-after(//*[@class='date'], 'Published') #Skip GDPR warning http_header(Cookie): nymuc=11111111111 strip_id_or_class: article-toc # prevent a line-break in numbered subhedings between the number and the sub-heading-text # the closing
is automatically transformed to find_string:replace_string: prune: no tidy: no strip: //button strip: //aside next_page_link: //div[@class='page-navigation']//li[@class='next']/a test_url: https://www.thecut.com/2018/06/trump-administration-says-it-has-a-family-reunification-plan.html test_contains: back to their home country #example with numbered sub-headings: test_url: https://www.thecut.com/article/tipping-rules-etiquette-rules.html