// ktlint-disable filename
package com.speechify.client.helpers.content.standard.html.contentExtractionRules.knownPages

import com.speechify.client.helpers.content.standard.html.contentExtractionRules.RelevantContentExtractionRules
import com.speechify.client.helpers.content.standard.html.contentExtractionRules.knownPages.knowledge.harvardBusinessReview.harvardBusinessReviewContentExtractionRules
import com.speechify.client.helpers.content.standard.html.contentExtractionRules.knownPages.knowledge.medium.mediumContentExtractionRules
import com.speechify.client.helpers.content.standard.html.contentExtractionRules.knownPages.knowledge.nyTimes.nyTimesContentExtractionRules
import com.speechify.client.helpers.content.standard.html.contentExtractionRules.knownPages.knowledge.wikipedia.wikipediaContentExtractionRules
import com.speechify.client.internal.util.extensions.strings.toSequenceOfShorterSubstringsAfter

internal fun getRulesForSourceUrl(sourceUrl: String): RelevantContentExtractionRules? {
    val domainName = sourceUrl
        .substringAfter("//") // shave off 'scheme://', if there is one
        .substringBefore("/") // shave off path
        .substringBefore(":") // shave off port

    return (
        sequenceOf(domainName) + domainName.toSequenceOfShorterSubstringsAfter(".")
        )
        .firstNotNullOfOrNull { domainOrShorterToTry -> rules[domainOrShorterToTry] }
}

private val rules =
    wikipediaContentExtractionRules +
        harvardBusinessReviewContentExtractionRules +
        nyTimesContentExtractionRules +
        mediumContentExtractionRules
