body: //article[contains(@class, "-ArticleWrapper")] body: //article[@class='article__body'] | (//div[contains(@class, 'article__body-content')])[1] body: (//div[@id="story-body"])[1] # for video entries # 2023-07-24 - Marking line below for removal (can't see these attributes any more) body: //div[contains(@class, "videoInStory") or @id="meta-information"] # Remove video block without iframe (which appears after JS processing) - see video entry test URLs strip: //div[contains(@class, 'MediaPlayerWrapper') and not(.//iframe)] title: //meta[@property="og:title"]/@content title: //h1[@class="story-header"] date: //article//div[contains(@class, 'author-unit__container')]//span date: //span[@class="story-date"]/span[@class='date'] # for sport site # 2023-07-24 - Marking for removal (can't see this attributes any more) date: //meta[@name='DCTERMS.created']/@content author: //meta[@name="author"]/@content author: //div[@id='headline']//span[@class='byline-name'] # recipes, e.g. http://www.bbc.co.uk/food/recipes/mymincepies_71055 # 2023-07-24 - Marking for removal (can't see these attributes any more) body: //div[contains(@class, 'hrecipe')]//div[@id='subcolumn-1'] strip: //article//header strip: //div[contains(@class, "CommentsWrapper")] strip: //*[contains(@class, 'VisuallyHidden')] strip: //*[@data-testid='byline-role' or @data-testid='drawer-background' or @data-testid='main-footer'] strip: //*[@data-analytics_group_name='More' or @data-analytics_group_name='Related'] strip_image_src: grey-placeholder.png strip: //svg strip: //button #strip: //div[@class="story-feature narrow"] #strip: //div[@class="story-feature wide"] #strip: //div[@class="story-feature dslideshow-enclosure"] strip: //div[contains(@class, "story-feature") and not(contains(@class, 'full-width'))] strip: //span[@class="story-date"] #strip: //div[@class="caption body-narrow-width"] strip: //div[@class="warning"]//p strip: //div[@id='page-bookmark-links-head'] strip: //object strip: //div[contains(@class, "bbccom_advert_placeholder")] strip: //div[contains(@class, "embedded-hyper")] strip: //div[contains(@class, 'market-data')] strip: //a[contains(@class, 'hidden')] strip: //div[contains(@class, 'hypertabs')] strip: //div[contains(@class, 'related')] strip: //form[@id='comment-form'] strip: //div[contains(@class, 'comment-introduction')] strip: //div[contains(@class, 'share-tools')] strip: //div[@id='also-related-links'] strip: //div[contains(concat(' ',normalize-space(@class),' '),' drop-capped ')] strip: //section[@data-component="tag-list"] strip: //section[@data-component="see-alsos"] strip: //figcaption strip_id_or_class: image-and-copyright-container strip_id_or_class: article-body__pull-quote strip: //aside[contains(@class, 'sp-pullout')] strip: //section[contains(@class, 'LinksWrapper')] strip_id_or_class: share-help strip_id_or_class: comments_module find_string: http://ichef.bbci.co.uk/news/200/ replace_string: http://ichef.bbci.co.uk/news/624/ find_string: http://ichef.bbci.co.uk/news/304/ replace_string: http://ichef.bbci.co.uk/news/624/ find_string: http://ichef.bbci.co.uk/news/320/ replace_string: http://ichef.bbci.co.uk/news/660/ replace_string({width}{hidpi}): 624 replace_string(