package com.speechify.client.internal.util.text.groupingToSentences.internal.sentenceTerminators

import com.speechify.client.internal.util.text.groupingToSentences.internal.TextStructureSpanDefinition

/**
 * As per https://en.wikipedia.org/wiki/Full_stop#Armenian
 * Interestingly, [its question-marks and exclamations occur mid-sentence while the sentence is terminated with a full stop.](https://polyglotclub.com/wiki/Language/Armenian/Grammar/Question-Mark-and-Exclamation-Point)
 */
const val armenianFullStop = '։'

/**
 * As per https://en.wikipedia.org/wiki/Full_stop#Southeast_Asian
 */
const val burmeseFullStop = '။'

/**
 * As per https://en.wikipedia.org/wiki/Full_stop#Chinese_and_Japanese
 */
const val chineseAndJapaneseFullStop = '。'

/**
 * As per https://en.wikipedia.org/wiki/Question_mark#Fullwidth_question_mark_in_East_Asian_languages
 */
const val chineseAndJapaneseFullWidthQuestionMark = '？'

/**
 * As per https://en.wikipedia.org/wiki/Chinese_punctuation#Marks_similar_to_European_punctuation
 */
const val chineseAndJapaneseFullWidthExclamation = '！'

/**
 * Not to be confused with `;` ([it is even rendered the same way, most of the time](https://en.wikipedia.org/wiki/Question_mark#Greek_question_mark))
 */
const val greekQuestionMark = ';'

/**
 * As per https://en.wikipedia.org/wiki/Full_stop#Nagari
 */
const val nagariFullStop = '।'

/**
 * As per https://en.wikipedia.org/wiki/Question_mark#Mirrored_question_mark_in_right-to-left_scripts
 */
const val rightToLeftScriptsQuestionMarkArabic = '؟'

/**
 * As per https://en.wikipedia.org/wiki/Question_mark#Mirrored_question_mark_in_right-to-left_scripts
 */
const val rightToLeftScriptsQuestionMarkReversed = '⸮'

/**
 * As per https://en.wikipedia.org/wiki/Full_stop#Nagari
 */
const val sanskritFullStop = '॥'

/**
 * As per https://en.wikipedia.org/wiki/Full_stop#Shahmukhi
 */
const val shahmukhiFullStop = '۔'

internal val sentenceTerminationSpanDefinition = SentenceSpanDefinition(
    regexOfTerminatorsNotUsingWhitespaceAfter = Regex(
        """[""" +
            chineseAndJapaneseFullStop +
            chineseAndJapaneseFullWidthQuestionMark +
            chineseAndJapaneseFullWidthExclamation +
            """]+""",
    ),
    regexOfTerminatorUsingTrailingWhitespaceButWithoutMatchingIt = Regex(
        """[.!?‽…""" +
            armenianFullStop +
            burmeseFullStop +
            greekQuestionMark +
            nagariFullStop +
            rightToLeftScriptsQuestionMarkArabic +
            rightToLeftScriptsQuestionMarkReversed +
            sanskritFullStop +
            shahmukhiFullStop +
            """]+""",
    ),
)

internal class SentenceSpanDefinition(
    regexOfTerminatorsNotUsingWhitespaceAfter: Regex,
    regexOfTerminatorUsingTrailingWhitespaceButWithoutMatchingIt: Regex,
) : TextStructureSpanDefinition(
    regexOfTerminatorMidString = Regex(
        regexOfTerminatorUsingTrailingWhitespaceButWithoutMatchingIt.pattern + """(?=\s)""" +
            """|""" +
            regexOfTerminatorsNotUsingWhitespaceAfter.pattern,
    ),
) {

    /**
     * This can be used for testing if sentence termination is present, but not for finding an end of a sentence
     * mid-string.
     */
    private val regexOfTerminatorWithoutMatchingWhitespace = Regex(
        regexOfTerminatorUsingTrailingWhitespaceButWithoutMatchingIt.pattern +
            """|""" +
            regexOfTerminatorsNotUsingWhitespaceAfter.pattern,
    )

    fun isTerminatorAtEndOf(input: CharSequence): Boolean = if (input.isEmpty()) {
        false
    } else {
        regexOfTerminatorWithoutMatchingWhitespace.matchesAt(
            input = input,
            index = input.lastIndex,
        )
    }
}
