package com.speechify.client.helpers.content.standard.html.contentExtractionRules.knownPages.knowledge.harvardBusinessReview

import com.speechify.client.helpers.content.standard.html.ElementMatcher
import com.speechify.client.helpers.content.standard.html.contentExtractionRules.RelevantContentExtractionRules

internal val harvardBusinessReviewContentExtractionRules by lazy { /* `lazy` just for this file to read overview-before-detail/what-before-how */
    mapOf(
        "hbr.org" to contentExtractionRules, /* Developed for the articles under
            URLs like: https://hbr.org/2022/08/readable-article-identifier
        */
    )
}

// Desktop and mobile are covered in one layout on `hbr.org`
private val contentExtractionRules = RelevantContentExtractionRules(
    ignored = ElementMatcher.CompositeAnyOf(
        byTagNameMatchersSet = setOf(
            ElementMatcher.ByTagName("hero-image"), /* The big image on top of article, a <figure>
            equivalent because also containing caption, e.g. on https://hbr.org/2022/08/how-to-handle-office-gossip-when-its-about-you
            where the caption says _"master1305/Getty Images"_
             */
        ),
        otherMatchers = arrayOf(
            ElementMatcher.ByClass("hide-for-print"), /* This matcher nicely includes the _"You are reading
             your last free article for this month."_ banner, e.g. on https://hbr.org/2022/08/how-to-handle-office-gossip-when-its-about-you */
            ElementMatcher.ByClass("translate-message"), /* The text: "Leer en español" and "Ler em português"
             e.g. on https://hbr.org/2022/08/how-to-handle-office-gossip-when-its-about-you */
            ElementMatcher.ByClass("article-dek-group"), /* A hidden (at least on desktop) section with a
             short subtitle and "by AuthorFirstname Lastname" (on desktop version the author is repeated, but the subtitle
              isn't). Hiding this one prevents the repeat (at least on https://hbr.org/2022/08/how-to-handle-office-gossip-when-its-about-you )
              */
            ElementMatcher.ByClass("slug-container"), /* A link to the catalogue of all articles on this topic
             e.g. "Managing Conflicts" on https://hbr.org/2022/08/how-to-handle-office-gossip-when-its-about-you */
        ),
    ),
    inline = ElementMatcher.CompositeAnyOf(
        otherMatchers = arrayOf(
            /* The two matchers below (the `article-byline-list` and `article-author`) are actually displayed inline
            (using that page's CSS) so we do inline too: */
            ElementMatcher.ByClass("article-byline-list"), /* It's a `<ul>`, at least on
             https://hbr.org/2022/08/how-to-handle-office-gossip-when-its-about-you */
            ElementMatcher.ByClass("article-author"), /* It's a `<li>`, at least on
             https://hbr.org/2022/08/how-to-handle-office-gossip-when-its-about-you */
        ),
    ),
)
