package com.speechify.client.helpers.content.standard.html.contentExtractionRules.knownPages.knowledge.wikipedia

import com.speechify.client.helpers.content.standard.html.ElementMatcher
import com.speechify.client.helpers.content.standard.html.contentExtractionRules.RelevantContentExtractionRules

internal val wikipediaContentExtractionRules by lazy { /* `lazy` just for this file to read overview-before-detail/what-before-how */
    mapOf(
        /* TODO consider making the rules work for all MediaWiki software (e.g. match `body[class=mediawiki]`) */
        "wikipedia.org" to desktopRules,
        "m.wikipedia.org" to mobileRules,
    )
}

private val desktopRules = RelevantContentExtractionRules(
    ignored = ElementMatcher.CompositeAnyOf(
        otherMatchers = arrayOf(
            ElementMatcher.ById("mw-navigation"), /* The main left menu and even the banner */
            ElementMatcher.ById("siteNotice"), /* Request for funds, content contribution campaigns etc. */
            ElementMatcher.ByClass("metadata"), /* Especially the warning box on top of the article
              about its problems and with requests like: _"Please help improve this article by introducing citations to
               additional sources."_, as well as the prompt at the bottom of _"This article about ... is a stub. You can
                help Wikipedia by expanding it."_
             */
            ElementMatcher.ByClass("infobox"), /* Especially the right-hand box with most important info on
              the term
             */
            ElementMatcher.ByClass("reference"), /* the in-text links that look like "[2]", pointing
                     to the boring (😉) references on bottom of articles */
            ElementMatcher.ByClass("reflist"), /* The boring (😉) descriptions of references on bottom
                     of articles, e.g. [here](https://en.wikipedia.org/wiki/Speech_synthesis) */
            ElementMatcher.ByClass("mw-jump-link"), /* These seem to actually be 'hidden but
                     tabbable-into' and contain navigational _"Jump to navigation"_ _"Jump to search"_ (e.g. [here](https://en.wikipedia.org/wiki/Speech_synthesis)) */
            ElementMatcher.ByClass("noprint"), /* Various elements which Wikimedia deems not
                     printable, e.g. [here](https://en.wikipedia.org/wiki/Speech_synthesis): the "[citation needed]"
                      text, a hidden short description, an audio figure etc.  */
            ElementMatcher.ByClass("mw-editsection"), /* The links to start editing a section.
                     Obviously not to be read. */
            ElementMatcher.ByClass("thumb"), /* Seems to be essentially an equivalent `<figure>`,
                     most of the time with the equivalent of `<figcaption>`, e.g. [here](https://en.wikipedia.org/wiki/Speech_synthesis) */
            ElementMatcher.ByClass("catlinks"), /* The "Categories:" links on the bottom, e.g.
                     [here](https://en.wikipedia.org/wiki/Speech_synthesis) */
            ElementMatcher.ByClass("printfooter"), /* an invisible footer of `Retrieved from <a dir="ltr"
            href="...">https://en.wikipedia.org/w/index.php?title=...&amp;oldid=...</a>` */
        ),
    ),
)

private val mobileRules =
    desktopRules + /* Desktop rules seem to yield almost satisfactory results already, so we just reuse.
     Caveat: Whether any of the desktop rules are redundant for mobile, have not been checked.
    */
        RelevantContentExtractionRules(
            ignored = ElementMatcher.CompositeAnyOf(
                otherMatchers = arrayOf(
                    ElementMatcher.ByClass("minerva__tab-container"), /* As of 2022-08-29 this is the only way
                     to match the "Article" and "Talk" tabs on mobile (on desktop they are `<nav>`, so the mobile layout
                     likely just lacks accessibility semantics)
                     */
                ),
            ),
        )
