tidy: no prune: no # As of 2020, this cookie is required # The value can be anything, as long as it is set at all http_header(Cookie): zonconsent=2022-09-03T19:45:59.150Z # Pretending to be the Google bot disables the paywall banner for now http_header(user-agent): Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html) # Figures are wrapped in a noscript tag which is itself wrapped in a # conditional comment. Feed readers will fail to parse this correctly # so get rid of the noscript tag altogether. replace_string( 1] title: //meta[@property='og:title']/@content date: //meta[@name='date']/@content strip: //span[@class='figure__copyright'] strip: //aside strip: //nav # prevent using 'zeit' logo if article has no image insert_detected_image: no # Self advertisements strip: //figure[@class='figure-stamp'] strip: //a[contains(@title, 'Dieser Text ')] strip: //a[contains(@title, 'Dieser Artikel ')] strip: //span[@class='figure__text']/text()[contains(., 'Dieser Text ')] strip: //span[@class='figure__text']/text()[contains(., 'Dieser Artikel ')] strip_id_or_class: ad-container ####################################### # ZEIT: ####################################### single_page_link: //li[@class='article-pager__all']//a[@data-ct-label='all'] next_page_link: //a[contains(@class, 'article-pagination') and @data-ct-label='Nächste Seite'] author: //a[@class='byline__author']/span author: substring-after(//span[@class='metadata__source'], 'Quelle: ') body: //header//img | //div[@class='summary' or contains(@class, 'article-body') or @class='byline'] body: //main/article/div[@itemprop='articleBody'] body: //main/article[1] body: //div[contains(concat(' ',normalize-space(@class),' '),' article-page ')] strip: //h2[@class='visually-hidden'] strip: //a[@href='#'] strip: //form[@id='newsletter-teaser-form'] strip_id_or_class: 'article-pagination article__item' strip_id_or_class: js-videoplayer strip: //figure[@data-video-provider="brightcove"] strip: //a[contains(concat(' ',normalize-space(@class),' '),' faq-link ')] strip_id_or_class: embed-wrapper__inner strip_id_or_class: article-footer strip_id_or_class: zplus-badge strip: //footer # needed for wallabag, without that, everything after # the embedded content is missing. And now you have youtube inside ;-) # https://www.zeit.de/politik/ausland/2023-10/israel-hamas-gaza-reaktionen-usa-eu-scholz find_string: