package com.speechify.client.helpers.content.standard.book

import com.speechify.client.api.content.view.standard.StandardBlock
import com.speechify.client.api.content.view.standard.StandardBlocks
import com.speechify.client.internal.services.ml.ParsedBookPageTextGroup
import com.speechify.client.internal.services.ml.models.TextGroupType
import com.speechify.client.internal.util.collections.mapFirst

// NOTE(anson): here's where we would smooth out paragraphs that span page breaks
internal fun combineBlocksForPages(pageBlocks: List<StandardBlocks>): List<StandardBlock> {
    return pageBlocks.flatMap { it.blocks.toList() }
}

internal fun List<ParsedBookPageTextGroup>.toStandardBlocks(isScannedBook: Boolean): Sequence<StandardBlock> {
    // Verify if the text starts with a bullet point followed by a space
    fun isListItemWithBulletPoint(text: String): Boolean {
        if (text.length < 2) {
            // not enough characters to be considered a list item
            return false
        }

        return text[0].isBulletPoint() && text[1].isWhitespace()
    }
    return asSequence().flatMap { group ->
        when {
            group.textGroupType is TextGroupType.ListItem -> {
                val processedItems = group.labeledBookPageTextContentItems.mapFirst {
                    if (isListItemWithBulletPoint(it.text.text)) {
                        it.copy(text = it.text.slice(2, it.text.length))
                    } else {
                        it
                    }
                }
                val text = Line.groupsFrom(processedItems).joinLinesApplyingCleanupHeuristics()
                sequenceOf(StandardBlock.List(isNumbered = false, items = arrayOf(StandardBlock.Paragraph(text))))
            }
            // Ignore the text content of Picture (if it is not scanned book), Formula and Unknown.
            group.textGroupType is TextGroupType.Picture && !isScannedBook ||
                group.textGroupType is TextGroupType.Formula ||
                group.textGroupType is TextGroupType.Unknown ||
                group.textGroupType is TextGroupType.Table ->
                emptySequence()

            else -> {
                val processedItems = group.labeledBookPageTextContentItems.mapFirst {
                    if (isListItemWithBulletPoint(it.text.text)) {
                        it.copy(text = it.text.slice(2, it.text.length))
                    } else {
                        it
                    }
                }
                val text = Line.groupsFrom(processedItems).joinLinesApplyingCleanupHeuristics()
                val standardBlock = when (group.textGroupType) {
                    TextGroupType.Footer -> StandardBlock.Footer(text)
                    TextGroupType.Footnote -> StandardBlock.Footnote(text)
                    TextGroupType.Header -> StandardBlock.Header(text)
                    TextGroupType.SectionTitle, TextGroupType.Title, TextGroupType.SectionHeader ->
                        StandardBlock.Heading(text)
                    TextGroupType.Paragraph -> StandardBlock.Paragraph(text)
                    TextGroupType.Caption -> StandardBlock.Caption(text)
                    // Only for scanned pages
                    TextGroupType.Picture -> StandardBlock.Paragraph(text)
                    else -> null
                }

                standardBlock?.let { sequenceOf(it) } ?: emptySequence()
            }
        }
    }
}

internal fun List<LineGroup>.toParsedBookPageTextGroup(
    headersAndFooters: HeadersAndFooters,
    footnotes: List<LineGroup>,
): Sequence<ParsedBookPageTextGroup> {
    return asSequence().map { lineGroup ->
        val textGroupType = when {
            headersAndFooters.headers.contains(lineGroup) -> TextGroupType.Header
            headersAndFooters.footers.contains(lineGroup) -> TextGroupType.Footer
            footnotes.contains(lineGroup) -> TextGroupType.Footnote
            else -> {
                when (lineGroup.type) {
                    Line.Type.Heading -> TextGroupType.Title
                    Line.Type.Paragraph -> TextGroupType.Paragraph
                }
            }
        }
        ParsedBookPageTextGroup(
            textGroupType = textGroupType,
            labeledBookPageTextContentItems = lineGroup.lines.flatMap { it.chunks },
        )
    }
}
