package com.speechify.client.api.content.view.speech

/**
 * WARNING: Using "()"s has a special meaning in all regexes. Adding one means 'the text to match' and the
 * remaining text will be treated as a non-matching look-around. See #RationaleForRegexCaptureGroupBehavior
 **/
internal sealed class SpeechTextPatterns(val patterns: Array<String>) {
    companion object {
        /**
         * "note the "-–" below is not obviously different but they different
         * character codes 45 and 8211 respectively*/
        const val dashes = "-–"
    }

    /**
     * handles different styles of in text citations, see [here](https://en.wikipedia.org/wiki/Vancouver_system) and [here](https://www.bibguru.com/blog/citation-styles-numbers-in-brackets/)
     * - superscript citations, typically appearing at the end of comma or full stop e.g.
     * "Researchers have also proposed mathematical definitions of bias,11–13
     * describing methods5 for measuring bias,14–17 and offering approaches for
     * mitigating bias.15,18,19"
     * - square bracket number citations, typically appearing before comma or full stop e.g.
     * "The suggestion was first put noted in [1]."
     * "Jones [2] has stated that......."
     * "Recent studies [5], [7], [9], [11] have also described..."
     * "Recent studies [5-11] have also described..."
     * - parentheses number citations
     * "The suggestion was first put noted in (1)."
     * "Jones (2) has stated that......."
     * "Recent studies (5), (7), (9), (11) have also described..."
     * "Recent studies (5-11) have also described..."
     * - parentheses with name and date, e.g. "(Smallbone & Quinton, 2004)"
     */
    object InTextCitations : SpeechTextPatterns(
        arrayOf(
            /* superscript citations, appearing at the end of comma or full stop e.g.
             * "Researchers have also proposed mathematical definitions of bias,11–13" */
            """[^\d][.,]([\d]+[\d,$dashes]*)""",
            /* superscript citations, appearing at the end of a word e.g.
             * "describing methods5 for measuring bias" */
            """(?![A-Z]+)[a-zA-Z]([\d]+[\d,$dashes]*)""",
            /* square bracket number citations, appearing before comma or full stop e.g.
             * "The suggestion was first put noted in [1]."
             * "Recent studies [5], [7], [9], [11] have also described..." */
            """[a-zA-Z ](\[[\d]+[\d,$dashes]*\])[,.]+""",
            /* square bracket number citations, appearing after a space e.g.
             * "Jones [2] has stated that......."
             * "Recent studies [5-11] have also described..." */
            """[ ](\[[\d]+[\d,$dashes]*\])""",
            /* parentheses number citations, typically appearing before comma or full stop e.g.
             * "The suggestion was first put noted in (1)."
             * "Recent studies (5), (7), (9), (11) have also described..." */
            """[a-zA-Z ](\([\d]+[\d,$dashes]*\))[.,]+""",
            /* parentheses number citations, appearing after a space e.g.
             * "Jones (2) has stated that......."
             * "Recent studies (5-11) have also described..." */
            """[ ](\([\d]+[\d,$dashes]*\))""",
            /* parenthetical citations e.g.
             * "(Smallbone & Quinton, 2004)"
             * "(Wagemann & Weger, 2021)" */
            """\([^)]*?\d{4}\)""",
        ),
    )

    object RoundBrackets : SpeechTextPatterns(
        arrayOf(
            """(?<![:;'-])\([^)^\]]+[\)\]]""",
        ),
    )

    object SquareBrackets : SpeechTextPatterns(
        arrayOf(
            """\[[^\]^\)]+[\]\)]""",
        ),
    )

    object Braces : SpeechTextPatterns(
        arrayOf(
            """\{[^}]+\}""",
        ),
    )

    object Urls : SpeechTextPatterns(
        /*
        * WARNING: Using "()"s has a special meaning in all regexes. Adding one means 'the text to match' and the
        * remaining text will be treated as a non-matching look-around. See #RationaleForRegexCaptureGroupBehavior
        *  */
        arrayOf(
            /* http or https urls, e.g. https://jsbin.com/cutiduluxe */
            "https?",
            /* file, e.g. file://192.168.1.57/~User/2ndFile.html */
            "file",
            /* ftp urls, e.g. ftp://user@host/foo/bar.txt */
            "ftp",
        ).map { """$it://[-a-zA-Z0-9+&@#/%?=~_|!:,.;]*[-a-zA-Z0-9+&@#/%=~_|]""" }.toTypedArray(),
    )
}
