# mobile.nytimes.com appears to be the same as www.nytimes.com now, 
# so any changes here should probably also be made to mobile.nytimes.com.txt too
# also .nytimes.com

title: //h1[@class="articleHeadline"]
title: //meta[@property="og:title"]/@content
author://meta[@name="byl"]/@content
body: //section[@name="articleBody"] | //p[@id="article-summary"] | //header/figure[contains(@class, 'sizeMedium')]
body: //div[contains(concat(' ',normalize-space(@class),' '),' story-body ')]
body://div[@id="article"]
body://*[@itemprop="articleBody"]
body: //div[contains(concat(' ',normalize-space(@class),' '),' g-body-article-container ')]
body: //article[@id='story']
strip_id_or_class:articleTools
strip_id_or_class:readerscomment
#strip://div[contains(@class, "articleInline runaroundLeft")]
strip: //div[contains(@class, "doubleRule")]
# strip image credit - appears as a bold heading
strip: //div[contains(@class, "articleInline")]//h6
strip_id_or_class:enlargeThis
strip_id_or_class:pageLinks
strip_id_or_class:memberTools
strip_id_or_class:articleExtras
strip_id_or_class:singleAd
strip_id_or_class:byline
strip_id_or_class:dateline
strip_id_or_class:articleheadline
strip_id_or_class:articleBottomExtra
strip_id_or_class:shareTools
strip_id_or_class:story-meta
strip_id_or_class:related-coverage
strip_id_or_class:ad-header
strip_id_or_class:bottom-ad
strip_id_or_class:advert_item
strip_id_or_class:burst-app
strip://a[contains(@href, 'nytimes.com/adx/')]
strip: //nyt_byline
strip: //span[contains(@class, 'slideshow') or contains(@class, 'video')]
strip: //p[@class='caption']//a[contains(., 'More Photos')]
strip_id_or_class: ResponsiveAd
strip: //figure//span[.='Image' or .='Video']
strip: //div[@aria-labelledby="modal-title"]
strip: //button
strip: //div[starts-with(@id, 'story-ad-')]
strip: //aside
strip: //div[@data-test-id="RecommendedNewsletter"]
strip: //div[@data-test-id="share-tools"]
strip: //div[@role="complementary"]
strip: //section[contains(@role, "complementary")]
strip: //video
strip: //span/span[contains(text(), 'Credit')]
strip: //span/span/span[contains(text(), 'Credit')]
strip: //img[contains(@class, 'post-screenshot')]

# August 2022
strip_id_or_class: sponsor-wrapper
# Preserve figcaptions, which can also have aria-hidden="true" inside
strip: //*[@aria-hidden="true" and not(./parent::figcaption)]
strip: //*[@data-testid="share-tools"]
strip: //*[contains(@id, '-recirc')]

# If HTML parsed by JS... 
# let's remove duplicate images
strip: //div[contains(@style, "visibility: hidden")]//picture
strip: //div[contains(@style, "visibility: hidden")]//div[@data-testid="lazyimage-container"]
# let's remove inline messages
strip: //div[@data-testid="inline-message"]

strip_id_or_class: robots-nocontent
strip_id_or_class: hidden
strip_id_or_class: NYT_MAIN_CONTENT_1_REGION
strip_id_or_class: related-links-block
strip_id_or_class: g-LABELS

http_header(user-agent): curl/7.83.1

prune: no
tidy: no

date: //meta[@property="article:published"]/@content
date: //meta[@itemprop="datePublished"]/@content

find_string: src='https://static01.nyt.com/packages/flash/multimedia/ICONS/transparent.png
replace_string: ignore-src='https://static01.nyt.com/packages/flash/multimedia/ICONS/transparent.png
find_string: data-mediaviewer-src='https://static01.nyt.com
replace_string: src='https://static01.nyt.com

single_page_link: //link[contains(@href, 'pagewanted=all')]
#mobile.nytimes.com looks same as regular www.nytimes.com now
#single_page_link: //link[@rel='alternate' and contains(@href, 'mobile.nytimes.com')]/@href
#single_page_link: concat(substring-before(//div[@id='pageLinks']//a[contains(@href, 'pagewanted=')]/@href, 'pagewanted='), 'pagewanted=all') 

strip://h6[@class = 'kicker']

test_url: http://www.nytimes.com/2011/07/24/books/review/an-academic-authors-unintentional-masterpiece.html
test_contains: In this column I want to look at a not uncommon way of writing

test_url: http://www.nytimes.com/2012/06/10/arts/television/the-newsroom-aaron-sorkins-return-to-tv.html
test_contains: IF you’ve seen enough of Aaron Sorkin’s theater

test_url: https://www.nytimes.com/interactive/2016/books/review/best-books.html
test_contains: invention and speculation flow together

test_url: http://www.nytimes.com/2013/03/25/world/middleeast/israeli-military-responds-after-patrols-come-under-fire-from-syria.html
test_url: http://www.nytimes.com/2013/08/15/nyregion/when-the-new-york-city-subway-ran-without-rails.html
test_url: http://www.nytimes.com/2004/02/29/weekinreview/correspondence-class-consciousness-china-s-wealthy-live-creed-hobbes-darwin-meet.html
test_url: http://www.nytimes.com/2014/06/19/opinion/gail-collins-romney-and-the-2016-contenders-huddle.html
test_url: https://www.nytimes.com/interactive/2015/12/16/upshot/100000004092329.app.html?_r=2
test_url: https://www.nytimes.com/2022/02/27/business/economy/price-increases-inflation.html
