// ktlint-disable filename
package com.speechify.client.helpers.content.standard.html

import com.speechify.client.api.content.view.web.WebPageElementAttribute
import com.speechify.client.api.content.view.web.WebPageNode
import com.speechify.client.api.content.view.web.getLowercaseAttr
import com.speechify.client.api.content.view.web.getValueOfLowercaseAttr
import com.speechify.client.helpers.content.standard.html.ElementMatcher.ByTagName
import com.speechify.client.internal.util.extensions.collections.plus
import com.speechify.client.internal.util.extensions.strings.isLowerCase

/**
 * Describes alternative ways of matching an element
 */
internal sealed class ElementMatcher {

    /**
     * Marker Interfaces, as per [Marker interface pattern](https://en.wikipedia.org/wiki/Marker_interface_pattern)
     *
     * This grouping allows a simple way of taking only [NonCompositeMatcher]s as input. This is especially for
     * [CompositeAnyOf], or any other [CompositeMatcher]s, to ensure that optimization opportunities
     * in the [NonCompositeMatcher] group (e.g. [ByTagName]) are not lost though composing [CompositeAnyOf] from other
     * [CompositeAnyOf]s or some new uncategorized composite matchers.
     *
     * Usage in type declarations: Please never have an instance that has both interfaces (careful about indirect inheritance!)
     */
    companion object CompositionMarkerInterfaces {
        /**
         * Groups matchers that aren't [CompositeMatcher].
         */
        interface NonCompositeMatcher

        /**
         * Groups matchers that aren't [NonCompositeMatcher].
         */
        interface CompositeMatcher
    }

    /**
     * Matcher by tag name (AKA the element's [Type](https://developer.mozilla.org/en-US/docs/Learn/CSS/Building_blocks/Selectors/Type_Class_and_ID_Selectors#type_selectors)) - especially of interest as it allows constructing efficient lookup sets and avoid scanning.
     */
    data class ByTagName(val elementNameLowerCase: String) : ElementMatcher(), NonCompositeMatcher {
        init {
            require(elementNameLowerCase.isLowerCase()) {
                "$elementNameLowerCase must be specified as lowercase to keep all code consistent."
            }
        }
    }

    /**
     * Abstract class that groups [ElementMatcher] classes which currently don't have any optimization (like the
     * tag-name can be optimized to a set-lookup), so a collection of such matchers will be scanned.
     *
     * Extracting this group especially allows [CompositeAnyOf] to enforce putting instances into correct properties
     * (not putting the optimized [ByTagName] together with the other matchers).
     *
     * NOTE: Whether a matcher belongs to this group can change, e.g. multiple \[role\] matchers can be implemented
     * with a scan, but in a different implementation a set of roles to match could be prepared, like with tag-names.
     */
    sealed class ScanRequiringMatcher : ElementMatcher() {
        abstract fun isMatching(element: WebPageNode.Element): Boolean
    }

    /**
     * Matcher by a [HTML class](https://developer.mozilla.org/en-US/docs/Web/HTML/Global_attributes/class).
     */
    class ByClass(
        /**
         * Note that the matching is [case-sensitive](https://developer.mozilla.org/en-US/docs/Web/API/Element/getElementsByClassName#usage_notes)
         */
        val className: String,
    ) : ScanRequiringMatcher() {
        override fun isMatching(element: WebPageNode.Element): Boolean =
            /* TODO - optimize by turning classes into a map (consider doing it lazily
                 especially to save the work on contents of entire skipped branches)
             */
            element.attributes.getValueOfLowercaseAttr("class")
                ?.asTokensListContains(className) == true
    }

    /**
     * Matcher by an HTML element's attribute value.
     */
    class ByAttributeValue(
        val attributeNameLowercase: String,
        val value: String,
    ) : ScanRequiringMatcher(), NonCompositeMatcher {
        init {
            require(attributeNameLowercase.isLowerCase()) {
                "$attributeNameLowercase must be specified as lowercase to keep all code consistent."
            }
        }

        override fun isMatching(element: WebPageNode.Element): Boolean =
            element.attributes.getValueOfLowercaseAttr(attributeNameLowercase) == value
    }

    /**
     * Equivalent to `[attr-name]` CSS matcher. Especially needed because HTML allows valueless, AKA [boolean attributes](https://html.spec.whatwg.org/multipage/common-microsyntaxes.html#boolean-attributes)
     */
    class ByAttributePresence(
        val attributeNameLowercase: String,
        val ignoreForTags: Set<String> = emptySet(),
        /**
         * If the element has any of the specified attributes, the rule will be ignored.
         * This is needed since HTML content is very flexible, and there may be cases where the element has an attribute
         * which marks it as "skip" when parsing the page, but is of interest in combination with a different value or
         * with another attribute.
         *
         * Ex: https://er.educause.edu/articles/2013/12/starting-the-conversation-universitywide-research-data-management-policy
         *  The page has the content inside a `div` with the attribute `aria-label`. Elements with `aria-label` are
         *  normally skipped, but in this case, it is actually the main content. This is identifiable actually by two
         *  other checks:
         *      - aria-label has the value of `content`, meaning that this is the main content
         *      - the element has the role of `main`, meaning that this is the main content
         *
         * This scenario is the same as having the `aria-label` for a `article` tag.
         */
        val ignoreForElementAttributes: Set<WebPageElementAttribute> = emptySet(),
    ) : ScanRequiringMatcher(), NonCompositeMatcher {
        init {
            require(attributeNameLowercase.isLowerCase()) {
                "$attributeNameLowercase must be specified as lowercase to keep all code consistent."
            }
        }

        private fun hasIgnorableAttribute(element: WebPageNode.Element): Boolean {
            for (attribute in element.attributes) {
                if (attribute in ignoreForElementAttributes) {
                    return true
                }
            }

            return false
        }

        override fun isMatching(element: WebPageNode.Element): Boolean =
            element.attributes.getLowercaseAttr(attributeNameLowercase) != null &&
                element.tagName.lowercase() !in ignoreForTags &&
                !hasIgnorableAttribute(element)
    }

    /**
     * Matcher by the [element id](https://developer.mozilla.org/en-US/docs/Web/API/Element/id).
     */
    class ById(
        /**
         * Note that [_"Identifiers are case-sensitive"_](https://developer.mozilla.org/en-US/docs/Web/API/Element/id)
         */
        val idValue: String,
    ) : ScanRequiringMatcher(), NonCompositeMatcher {
        override fun isMatching(element: WebPageNode.Element): Boolean =
            element.attributes.getValueOfLowercaseAttr("id") == idValue
    }

    /**
     * Matcher by [HTML role attribute](https://developer.mozilla.org/en-US/docs/Web/Accessibility/ARIA/Roles).
     */
    class ByRole(
        /**
         *  Note you must [_"use ASCII lowercase for all role token values"_](https://www.w3.org/TR/html-aria/#case-sensitivity)
         */
        val roleName: String,
    ) : ScanRequiringMatcher(), NonCompositeMatcher {
        init {
            require(roleName.isLowerCase()) {
                "$roleName must be all-lowercase" /* As per [_"use ASCII lowercase for all role token
                 values"_](https://www.w3.org/TR/html-aria/#case-sensitivity) */
            }
        }

        override fun isMatching(element: WebPageNode.Element): Boolean =
            element.attributes.getValueOfLowercaseAttr("role")
                ?.asTokensListContains(roleName) == true /* Using `asTokensListContains` because [_"The attribute value
                 MUST allow a token list as the value"](https://w3c.github.io/aria/#host_general_role) */
    }

    /**
     * AKA 'logical Union' of matchers.
     */
    class CompositeAnyOf(
        val byTagNameMatchersSet: Set<ByTagName>? = null,
        val otherMatchers: Array<ScanRequiringMatcher>? = null,
    ) : ScanRequiringMatcher(), CompositeMatcher {
        override fun isMatching(element: WebPageNode.Element): Boolean =
            byTagNameMatchersSet?.contains(
                ByTagName(element.tagName.lowercase()), /* A bit of a lie that this is a `ByTagName` matcher
                but this saves us from having create 1-to-1 alike sets of tagNames
                */
            ) == true ||
                otherMatchers?.any { it.isMatching(element) } == true

        companion object {
            /**
             * Constructs a [CompositeAnyOf] matcher while maintaining performance optimizations (e.g. of [ByTagName]).
             */
            fun fromScalarMatchers(scalars: Sequence<NonCompositeMatcher>) =
                scalars.partition {
                    when (it) {
                        is ByTagName -> true
                        is ScanRequiringMatcher -> false
                        else -> throw IllegalArgumentException(
                            "Unknown scalar matcher $it that needs to be classified  as " +
                                "${ScanRequiringMatcher::class.simpleName} or support needs to be added to ${
                                CompositeAnyOf::class.simpleName
                                }",
                        )
                    }
                }
                    .let { tagNameMatchersAndRest ->
                        CompositeAnyOf(
                            tagNameMatchersAndRest.first.map { it as ByTagName }.toSet(),
                            tagNameMatchersAndRest.second.map { it as ScanRequiringMatcher }.toTypedArray(),
                        )
                    }
        }
    }
}

/**
 * For attribute values that are 'space-separated tokens' (e.g. `class` or `role`).
 */
private fun String.asTokensListContains(token: String) =
    this.contains(Regex("(\\s|^)${Regex.escape(token)}(\\s|\$)"))

/**
 * Combine with another composite matcher while maintaining performance optimizations (e.g. of [ByTagName]).
 */
internal operator fun ElementMatcher.CompositeAnyOf?.plus(another: ElementMatcher.CompositeAnyOf?):
    ElementMatcher.CompositeAnyOf? =
    if (this == null) {
        another
    } else if (another == null) {
        this
    } else {
        ElementMatcher.CompositeAnyOf(
            byTagNameMatchersSet + another.byTagNameMatchersSet,
            otherMatchers + another.otherMatchers,
        )
    }
