package com.speechify.client.helpers.content.standard.html.contentExtractionRules.knownPages.knowledge.nyTimes

import com.speechify.client.helpers.content.standard.html.ElementMatcher
import com.speechify.client.helpers.content.standard.html.contentExtractionRules.RelevantContentExtractionRules

internal val nyTimesContentExtractionRules by lazy { /* `lazy` just for this file to read overview-before-detail/what-before-how */
    mapOf(
        "nytimes.com" to contentExtractionRules,
    )
}

// Desktop and mobile are covered in one layout on `nytimes.com`
private val contentExtractionRules = RelevantContentExtractionRules(
    ignored = ElementMatcher.CompositeAnyOf(
        otherMatchers = arrayOf(
            ElementMatcher.ByAttributeValue("data-testid", "StandardAd"), /* Adverts, but
            there seems to be more types of adverts (these appear e.g. on https://www.nytimes.com/live/2022/08/30/world/ukraine-russia-war-news
            also a copy here https://firebasestorage.googleapis.com/v0/b/speechifymobile.appspot.com/o/multiplatform%2Fimport%2FBOJ3CWKUCLhs6ee1aS8F88enjKx1%2F9d8d4b75-fb76-41d4-ad69-e94d49b63fd4?alt=media&token=c18c0986-d815-4a7f-ba3c-cc13a45b68cb
             */
            ElementMatcher.ById("top-wrapper"), /* More advert types. Found on
              https://www.nytimes.com/2022/08/30/opinion/trump-barr-justice-department.html
              stored: https://firebasestorage.googleapis.com/v0/b/speechifymobile.appspot.com/o/multiplatform%2Fimport%2FBOJ3CWKUCLhs6ee1aS8F88enjKx1%2F3fee1b60-47a6-4653-b50d-fe10c0e88fc7?alt=media&token=2a966d2f-4099-44e0-8b5c-4aca1ae44d36 */
            ElementMatcher.ById("bottom-wrapper"), /* And more advert types. Found on
              https://www.nytimes.com/2022/08/30/opinion/trump-barr-justice-department.html
              stored: https://firebasestorage.googleapis.com/v0/b/speechifymobile.appspot.com/o/multiplatform%2Fimport%2FBOJ3CWKUCLhs6ee1aS8F88enjKx1%2F3fee1b60-47a6-4653-b50d-fe10c0e88fc7?alt=media&token=2a966d2f-4099-44e0-8b5c-4aca1ae44d36 */
            ElementMatcher.ById("in-story-masthead"), /* This is hidden in real browsers, and contains the title
             repeated (not needed as we have the title), as well as the URL for the article (no point to read). Found on
              https://www.nytimes.com/2022/08/30/opinion/trump-barr-justice-department.html
              stored: https://firebasestorage.googleapis.com/v0/b/speechifymobile.appspot.com/o/multiplatform%2Fimport%2FBOJ3CWKUCLhs6ee1aS8F88enjKx1%2F3fee1b60-47a6-4653-b50d-fe10c0e88fc7?alt=media&token=2a966d2f-4099-44e0-8b5c-4aca1ae44d36
            */
        ),
    ),
)
