package com.speechify.client.helpers.content.standard.book

import com.speechify.client.api.content.view.book.BookPageTextContentItem
import com.speechify.client.internal.text.CharDirectionality
import com.speechify.client.internal.text.directionality

/**
 * # Background
 * Supporting RTL languages in general is simpler than it might seem at first. The "directionality" of the text only
 * influences how it ought to be presented to the reader - importantly, all text in all languages is serialized in the
 * same first-to-last order. This means that for a given String, all of our algorithms for scanning and splitting
 * sentences are totally unaffected by whether the language reads left-to-right or right-to-left.
 *
 * # The Problem
 * For PDFs specifically, our job in parsing is to reverse-engineer the original source text from the content and
 * positions of the [WrappedBookPageTextContentItem]s that were rendered to the page.  Removing the assumption that the
 * page's content is read LTR makes this much more difficult, because we now have to infer the directionality of
 * 1. groups within the page,
 * 2. lines within each group, and
 * 3. items within each line
 * before we can gather text approximating the original source, whereas previously we could reliably sort left-to-right
 * at each stage.
 *
 * We assume that the layout of the page content roughly follows the
 * [Unicode Bidirectional Algorithm](https://unicode.org/reports/tr9/), which is the industry standard.
 *
 * # Design Goals
 * 1. We seek an 80/20 solution to RTL support. There are various edge cases in the UBA layout algorithm itself, and
 *    there are tons more in PDF rendering and the composition of documents in general, and at time of writing the
 *    standard we aim to meet is that
 *    1) totally-RTL documents work as well as totally-LTR documents currently do
 *    2) mixed-direction documents don't blow up horribly, but may have bugs in sections where mixed-direction content
 *       appears in the same sentence.
 * 2. Minimize regression risk to LTR documents. We accomplish this by avoiding structural changes to the parsing
 *    pipeline and defaulting to LTR in situations where directionality is ambiguous.
 *
 * # Solution Approach
 * 1. (#RTLSortCollectionOfPageElementsAccordingToDominantDirection)
 *    Any time we sort groups/lines/items on the page, we first determine the correct horizontal sort order using a
 *    simple heuristic computed over the entire collection of things to be sorted.
 * 2. Any other time we apply geometric comparisons between elements on a page, we consider the directions of the
 *    elements and/or their parent element as appropriate.
 * 3. Use a rudimentary UBA-aware algorithm to determine the final granular ordering of items within each line, relying
 *    on the dominant directionality inferred for the line in (1) above.
 */

/**
 * A high-level representation of the dominant direction in which a line of text would be presented to a reader.
 *
 * For a String, this is well-defined by the [Unicode Bidirectional Algorithm](https://unicode.org/reports/tr9).
 *
 * However, it becomes useful to represent this concept explicitly in contexts when we have text-like objects that do
 * not carry well-defined directionality semantics. For instance, consider a collection of PDF text fragments that
 * together form a line, or groups of lines forming columns that must be read in a particular direction. In these cases,
 * their original ordering in the source text can only be reconstructed by inference over their geometric
 * relationships.
 */
internal enum class TextDirection {
    LeftToRight,
    RightToLeft,
}

/**
 * Returns the dominant directionality of the text contained in this item.
 */
internal val BookPageTextContentItem.textDirection: TextDirection?
    get() = this.text.text.textDirection

/**
 * Returns the dominant directionality for the content contained in this line.
 *
 * We trust that prior to constructing this [TextLine], we had already ordered the content according to the inferred
 * dominant direction.
 */
internal val TextLine.textDirection
    get(): TextDirection? = when (this.texts.size) {
        1 -> this.texts.first().source.textDirection

        // If we have more than one item, we can assume they were already ordered according to the correct dominant
        // directionality, which will be evident in comparing the horizontal positions of the first and last items.
        else -> when {
            this.texts.first().normalizedBox.centerX > this.texts.last().normalizedBox.centerX ->
                TextDirection.RightToLeft
            else -> TextDirection.LeftToRight
        }
    }

/**
 * Returns the dominant directionality for the content contained in this line.
 *
 * We trust that prior to constructing this [Line], we had already ordered the content according to the inferred
 * dominant direction.
 */
internal val Line.textDirection
    get(): TextDirection? = when (chunks.size) {
        1 -> this.chunks.first().textDirection

        // If we have more than one item, we can assume they were already ordered according to the correct dominant
        // directionality, which will be evident in comparing the horizontal positions of the first and last items.
        else -> when {
            this.chunks.first().normalizedBox.centerX > this.chunks.last().normalizedBox.centerX ->
                TextDirection.RightToLeft
            else -> TextDirection.LeftToRight
        }
    }

/**
 * Returns the dominant directionality for the content contained in this group.
 */
internal val TextGroup.textDirection
    get(): TextDirection? = this.lines.firstNotNullOfOrNull { it.textDirection }

/**
 * Simple heuristic for inferring dominant text direction from a group of possibly-unordered fragments of text
 * that nonetheless originate from some logical unit within a document (e.g. page, column, section, line, etc)
 */
internal fun <T> inferDominantTextDirectionFromPossiblyUnorderedTextFragments(
    texts: Sequence<T>,
    block: (T) -> String,
): TextDirection? {
    return texts.map { block(it) }.firstNotNullOfOrNull { it.textDirection }
}

/**
 * Simple heuristic for inferring dominant text direction from a group of possibly-unordered fragments of text
 * that nonetheless originate from some logical unit within a document (e.g. page, column, section, line, etc)
 */
internal fun <T> inferDominantTextDirectionFromPossiblyUnorderedTextFragments(
    texts: List<T>,
    block: (T) -> String,
): TextDirection? {
    return inferDominantTextDirectionFromPossiblyUnorderedTextFragments(texts.asSequence(), block)
}

/**
 * Returns the dominant text direction for a string using a rough approximation typically used for this purpose:
 * - Return the direction of the first strongly-directional character, if one exists
 * - Else return [TextDirection.LeftToRight] if we found any weakly-directional characters,
 * - Else return null and let the caller decide how to handle it
 */
internal val String.textDirection
    get(): TextDirection? {
        var firstSeenWeakDirection: TextDirection? = null
        this.forEach { char ->
            when (char.directionality) {
                CharDirectionality.StrongLeftToRight -> return TextDirection.LeftToRight
                CharDirectionality.StrongRightToLeft -> return TextDirection.RightToLeft

                // Neutral chars carry no direction information, so we ignore them here
                CharDirectionality.Neutral -> {}

                CharDirectionality.Weak -> {
                    // NOTE: AFAICT, all the "weak" directional characters are written left-to-right
                    if (firstSeenWeakDirection == null) firstSeenWeakDirection = TextDirection.LeftToRight
                }

                // NOTE: leaving handling of these control characters out of scope for now, because:
                // 1. it's not clear how to handle them in PDF context, and
                // 2. it seems more common that the PDF renderer would emit separate items for RTL/LTR spans, rather than encoding directionality using these invisible control chars.
                CharDirectionality.ExplicitFormatting -> {}
            }
        }
        return firstSeenWeakDirection
    }

/**
Returns a list containing the same items, reordered as necessary to revert re-orderings that the Unicode Bidirectional
Algorithm might have done during rendering, attempting to reconstruct the original ordering of the source text.

For example, consider the following scenario in which
- numbers appear in a line of RTL text, AND
- each character is a separate item (very common for RTL)
- (pretend for the moment that English should be read RTL)

Original Source text:
I have 1000 apples.

Display (because numbers always read LTR!!):
.selppa 1000 evah I

Reconstructed source text based solely on dominant direction and geometry:
I have 0001 apples.

Reconstructed source text based on dominant direction, geometry, and UBA-layout rules:
I have 1000 apples.
 */
internal fun TextLine.itemsInMostLikelySourceTextOrderAccountingForBidirectionalTextRendering():
    List<WrappedBookPageTextContentItem> {
    return reverseRunsOfItemsWithNonDominantDirectionThatWillBeBackwardsIfDerivedFromAVisualTextRendering(
        this.texts,
        this.textDirection ?: TextDirection.LeftToRight,
    ) {
        it.source.textDirection
    }
}

/**
 * Reorders a list of items that is already ordered according to the [dominantTextDirection], but possibly containing
 * artifacts in which runs of items with non-dominant directionality appear in the reverse order. We can detect these
 * situations by assuming the original rendering was done using the Unicode Bidirectional Algorithm, and thus
 * reconstruct the source text by reversing these runs of items with non-dominant direction.
 *
 * Note that we don't have to do anything to the actual text within each item - the whole reason this exists is that
 * with PDFs we end up with page content that is rendered with bidirectional layout, but the text that was originally
 * serialized first-to-last in the source document must be reconstructed.
 */
internal fun <T> reverseRunsOfItemsWithNonDominantDirectionThatWillBeBackwardsIfDerivedFromAVisualTextRendering(
    items: List<T>,
    dominantTextDirection: TextDirection,
    itemToTextDirection: (T) -> TextDirection?,
): List<T> {
    // Rely on the prior order of elements in the line to tell us the dominant ordering
    return items.fold(emptyList<T>() to emptyList<T>()) { acc, item ->
        val (result, temp) = acc
        when (itemToTextDirection(item)) {
            // Indeterminate order (neutral) should continue current run, defaulting to dominant order
            null -> when {
                temp.isNotEmpty() -> (result to temp + item)
                else -> result + item to emptyList()
            }

            // Flush temp to the result once we hit an item in dominant order
            dominantTextDirection -> result + temp + item to emptyList()

            // Accumulate runs of non-dominant direction items in reverse order
            else -> (result to listOf(item) + temp)
        }
    }
        .let { it.first + it.second }
}
