# most images are loaded by client side JavaScript, so they don't show up, when using # Fulltext-RSS or wallabag UI # you may use wallabagger browser plugin instead body: //div[contains(@class, 'meteredContent')] body: //div[contains(@class, 'article-body')] # print view body: //body[@id='print_facet']//div[@id='content'] # this top one currently appears after JS has done its work # author:(//article//div[contains(@class, 'author-names')])[1] author://article//div[contains(@class, 'byline')]//a[contains(@class, 'author-name-link')] author://meta[@name='DC.creator']/@content author://span[@class="pb-byline"] author://h3[@property='dc.creator']//a[@rel='author'] author://a[@class='author-name'][1] title://meta[@name='title']/@content date: substring-before(//article//div[contains(@class, 'display-date')], ' at ') title://h1 date://div[contains(@class,'byline')]//span[contains(@class,'published')]/@title date://meta[@name="DC.date.issued"]/@content date://span[contains(@class,"pb-timestamp")] date://meta[@name="eomportal-lastUpdate"]/@content #GDPR cookies http_header(Cookie): wp_devicetype=0; rplampr=0a|20181213; wp_ak_hpsw=0|20211124; wp_ak_v_ot=1; wp_ak_ot=1|20211012; strip://div[@class="relative primary-slot padding-top img-border gallery-container photo-wrapper"] strip://div[@id="wp-column six end"] # strip://div[contains(@class,'hidden')] strip://div[@id='article-side-rail'] strip://div[@class="module component todays-paper-module curved"] strip://div[@class="module component live-qa curved img-border"] strip://div[@class="module component newsletter-signup curved"] strip://div[@class="module featured-stories component curved img-border"] strip://h3[@property="dc.creator"] strip: //div[@data-qa="article-body-ad"] strip: //p[a[@data-qa="interstitial-link"]] strip: //strong[contains(., 'Read more consumer tech reviews and analysis')] strip: //strong[contains(., 'Read more tech reviews and advice from')] strip: //strong[contains(., 'Read more tech advice and analysis from')] strip: //strong[contains(., 'Read more:')] strip: //strong[contains(., 'More from PostEverything:')] strip: //em[contains(., 'You might also like:')] strip: //div[@data-testid="core-carousel-container" or @aria-roledescription="carousel"] find_string: .jpg&w=32" replace_string: .jpg&w=916" find_string: replace_string: strip_id_or_class: carousel strip_id_or_class: toolbar strip_id_or_class: module strip_id_or_class: tooltip strip_id_or_class: powa-wrapper strip_id_or_class: powa-blurb-wrapper strip_id_or_class: sr-only # if stripping all 'hide-for-print', wallabagger and pushtokindle # browser plugins won't load images #strip_id_or_class: hide-for-print strip: //div[contains(@class, 'hide-for-print')]/div[@data-qa='inline-magnet']/parent::div strip: //svg strip: //button prune: no tidy: no # Change gJQAwdJG4U_story.html to gJQAwdJG4U_print.html single_page_link: concat(substring-before(//link[@rel="canonical"]/@href, "_story.html"), "_print.html?noredirect=on") if_page_contains: //link[@rel="canonical" and contains(@href, '_story.html')] # [OLD] Change gJQAwdJG4U_story.html to gJQAwdJG4U_story_print.html #single_page_link: concat(substring-before(//link[@rel="canonical"]/@href, "_story.html"), "_story_print.html") test_url: http://www.washingtonpost.com/world/europe/in-europe-new-fears-of-german-might/2011/10/19/gIQA3baZ7L_story.html?hpid=z1 test_url: http://www.washingtonpost.com/national/health-science/radical-theory-of-first-americans-places-stone-age-europeans-in-delmarva-20000-years-ago/2012/02/28/gIQA4mriiR_story.html test_url: http://www.washingtonpost.com/lifestyle/magazine/the-sorry-fate-of-a-tech-pioneer-halsey-minor-and-historic-virginia-estate-carters-grove/2012/05/30/gJQAwdJG4U_story.html test_url: https://www.washingtonpost.com/technology/2020/12/10/amazon-halo-band-review/ test_url: https://www.washingtonpost.com/posteverything/wp/2015/05/15/take-off-that-fitbit-exercise-alone-wont-make-you-lose-weight/ test_url: https://www.washingtonpost.com/news/wonk/wp/2015/10/04/what-people-in-1900-thought-the-year-2000-would-look-like/