# Author: zinnober # Complete rewrite of the faz.net template as the standard one is broken # I tried to consider as many page variants as possible, which was some serious work tidy: no prune: no # Title title: //p[@class='Content HeadlineShort'] # Set author author: substring-after(//span[@class='Autor'], 'von ') author: //span[@class='caps last']/span[@class='caps last'] author: //a[@rel='author'] # Set date date: //span[@class='Datum'] date: //span[@class='Datum'],/span # Fetch full multipage articles single_page_link: //a[contains(@href, 'printPagedArticle')] # Content is here body: //article[@class='storytelling'] body: //article[@class='article']//div[contains(@class,'body-elements')] | (//div[contains(@class,'header-teaser__image')])[1] | (//div[contains(concat(' ',normalize-space(@class),' '),' header-teaser ')])[last()] body: //article[1] body: //div[@class='Artikel'] # Tidy up before article strip: //div[@id='FAZHeaderNeu'] strip: //h2[@itemprop='headline'] strip: //span[@class='Datum'] strip: //span[@class='Autor'] strip_id_or_class: ArticlePagerTop strip_id_or_class: header-detail strip_id_or_class: intro-text strip: //button[contains(@class,'image-toggle')] # General cleanup strip: //div[@class='clear'] strip: //a[@title='Zur Homepage FAZ.NET'] #strip: //iframe replace_string( ยท ): strip_id_or_class: TeaserMore strip_id_or_class: plista_alternativ strip_id_or_class: paywall #strip: //button strip_id_or_class: header-teaser__image-details strip_id_or_class: tik4-sharing # Remove tracking and ads strip_image_src: /l.gif? strip: //div[contains(@style, 'background-image')] strip: //img[@width='1'] strip_id_or_class: invisible strip_id_or_class: Anzeige strip_id_or_class: billboard # Remove various text boxes and social media foo strip_id_or_class: WeitereBeitraege strip_id_or_class: WBListe strip_id_or_class: AutorenModul strip_id_or_class: Community strip_id_or_class: SocialMediaStatus strip_id_or_class: RelatedLinkBox strip_id_or_class: MultimediaNavigation strip_id_or_class: IndexTitel strip_id_or_class: cbx-Author-is-in-article-container-info strip_id_or_class: BigBox strip_id_or_class: upper-toolbar # Fix picture caps and pictures (use better resolution and remove clutter) strip_id_or_class: LightBoxOverlay strip_id_or_class: exitLarge strip_id_or_class: PagerBox strip_id_or_class: Bildnachweis strip_id_or_class: Bildueberschrift strip_id_or_class: Bildbeschreibung strip_id_or_class: ArtikelBild610 strip_id_or_class: MediaLink strip_id_or_class: FotoBoxInnerLeft strip_id_or_class: BilderRelatedLinks # Handle