# Publication date date: //header[contains(concat(' ',normalize-space(@class),' '),' o-page__content__head ')]//div[contains(concat(' ',normalize-space(@class),' '),' o-infos ')]//time[contains(concat(' ',normalize-space(@class),' '),' o-infos__date ')] # Author author: //div[contains(@class, 'o-page__content__body')]//a[contains(@href, '/users/')] | //header//div[contains(@class, "o-page__content__who")]//a | //article//a[contains(@href, '/users/')] # Content body: //div[contains(concat(' ',normalize-space(@class),' '),' o-page ')] # Remove quotes and others strip: //div[contains(concat(' ',normalize-space(@class),' '),' o-page__footer ')] strip: //noscript strip_id_or_class: c-same-subject strip_id_or_class: o-head strip_id_or_class: c-kiosk--single strip_id_or_class: o-page__footer strip_id_or_class: o-page__figure__trigger strip_id_or_class: o-view-more strip_id_or_class: c-comments strip_id_or_class: c-epigraph strip: //div[contains(@class, 'o-page__content__who')] strip: //div[contains(@class, 'c-kiosk--single__body')] # Strip optional [removed because clearer like this #strip_id_or_class: o-page__content__head # login stuff requires_login: yes login_uri: https://www.alternatives-economiques.fr/connexion login_username_field: email login_password_field: password not_logged_in_xpath: //body[@class="c-header__logger--not-logged"] # Test URL test_url: https://www.alternatives-economiques.fr/etats-unis-overdoses-tuent-plus-armes-a-feu/00085167 test_url: https://www.alternatives-economiques.fr/nucleaire-france-a-lheure-choix/00087747