http_header(user-agent): Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/123.0 # July 2020 redesign tidy: no prune: no title: //article[@data-area="article"]/@aria-label author: //meta[@name="author"]/@content date: //meta[@name="date"]/@content body: //article[@data-area="article"]//header//div[contains(@class, 'leading-loose')] | //div[@data-article-el="body"] # Remove subscription advertiser strip: //div[@data-area='paywall'] # Remove video with lots of inner tags strip_id_or_class: jwplayer strip: //div[@data-component="JWPlayer"] # Remove lazy-loaded imags (old method) #replace_string(): #strip: //img[contains(concat(" ", normalize-space(@class), " "), " lazyload ")] # New method replace_string(src="data): data-src-disabled="data replace_string(data-src="): src=" strip: //picture/following-sibling::noscript[1] strip_id_or_class: pointer-events-none strip_id_or_class: border-separator-b strip: //button strip: //svg strip: //*[@data-app-hidden="true"] #strip: //*[@data-advertisement] strip: //aside # Remove 'Mehr zum Thema' section strip: //div[@data-article-el="body"]//section/div[contains(./span/text(), 'Mehr zum Thema')] # remove pull quotes strip: //div[@data-article-el="body"]//div[contains(concat(" ", normalize-space(@class), " "), " my-32 ")] # field name is always the id of the article (a-710880) instead of the title... skip_json_ld: yes test_url: test_contains: Und sie kann es, weil sie keiner in die Schranken weist. test_url:,1518,787602,00.html test_url: test_url: