package com.speechify.client.helpers.content.standard.book.heuristics.v2.stages

import com.speechify.client.helpers.content.standard.book.heuristics.v2.models.ContentBlock
import com.speechify.client.helpers.content.standard.book.heuristics.v2.models.LinePosition
import com.speechify.client.helpers.content.standard.book.heuristics.v2.models.Logger
import com.speechify.client.helpers.content.standard.book.heuristics.v2.models.ParsingPipelineStage
import com.speechify.client.helpers.content.standard.book.isAlmostEqual
import com.speechify.client.internal.util.text.groupingToWords.wordCount
import kotlin.math.min

/**
 * Minimum number of words in a block for it to be considered part of a column.
 * This helps differentiate real columns from image captions or table headers.
 */
private const val MIN_WORDS_IN_THE_COLUMN = 4

internal class UnionBlocksColumn(private val logger: Logger?) :
    ParsingPipelineStage<List<ContentBlock>, List<ContentBlock>> {
    private val possibleColumnBlocks = mutableListOf<ContentBlock>()

    override fun process(input: List<ContentBlock>): List<ContentBlock> {
        // Ignore vertical blocks
        val verticalBlocks = input.filter { it.linesPosition == LinePosition.VERTICAL }
        val horizontalBlocks = input.filter { it.linesPosition == LinePosition.HORIZONTAL }

        possibleColumnBlocks.addAll(horizontalBlocks)

        mergeMultiColumnBlocks()

        logUnionBlocks()

        return possibleColumnBlocks + verticalBlocks
    }

    /** The algorithm searches for a sign that a new column has started,
     *   then tries to find the start of the previous column and merges the blocks.
     *   For example:
     *   ....(1) ....(2)
     *   ....(1)
     *  The algorithm will detect the new column (2) and merge it with the previous one.
     */
    private fun mergeMultiColumnBlocks() {
        var currentIndex = 1

        while (currentIndex < possibleColumnBlocks.size) {
            val current = possibleColumnBlocks[currentIndex]
            val previous = possibleColumnBlocks[currentIndex - 1]

            if (isColumnNext(previous, current)) {
                val startOfColumnIndex = findPossibleStartOfColumn(currentIndex)
                val endOfColumnIndex = findPossibleEndOfColumn(currentIndex)

                val newColumn = possibleColumnBlocks.subList(startOfColumnIndex, endOfColumnIndex + 1)
                val newBlock = buildContentBlock(newColumn)

                for (i in 0 until newColumn.size) possibleColumnBlocks.removeAt(startOfColumnIndex)
                possibleColumnBlocks.add(startOfColumnIndex, newBlock)

                currentIndex = startOfColumnIndex + 1
            } else {
                currentIndex++
            }
        }
    }

    /**
     * Doesn't count the column if in block less than 3 words (could be reconsidered)
     * */
    private fun isColumnNext(prev: ContentBlock, current: ContentBlock): Boolean {
        if (prev.box.right >= current.box.left) return false
        if (prev.box.bottom <= current.box.top) return false
        if (prev.plainText.wordCount() < MIN_WORDS_IN_THE_COLUMN) return false
        if (current.plainText.wordCount() < MIN_WORDS_IN_THE_COLUMN) return false
        return true
    }

    private fun findPossibleStartOfColumn(columnIndex: Int): Int {
        val minTop = min(possibleColumnBlocks[columnIndex].box.top, possibleColumnBlocks[columnIndex - 1].box.top)
        var i = columnIndex - 2
        while (i >= 0) {
            val block = possibleColumnBlocks[i]
            if (block.box.top < minTop && !block.box.top.isAlmostEqual(minTop, 0.0001)) break
            i--
        }
        return i + 1
    }

    private fun findPossibleEndOfColumn(columnIndex: Int): Int {
        val minRight = min(possibleColumnBlocks[columnIndex].box.right, possibleColumnBlocks[columnIndex - 1].box.right)
        val minTop = min(possibleColumnBlocks[columnIndex].box.top, possibleColumnBlocks[columnIndex - 1].box.top)
        var i = columnIndex + 1
        while (i < possibleColumnBlocks.size) {
            val block = possibleColumnBlocks[i]
            if (block.box.top < minTop) break
            if (minRight > block.box.left) break
            i++
        }
        return i - 1
    }

    internal fun buildContentBlock(blocks: List<ContentBlock>): ContentBlock {
        val lines = blocks.flatMap { it.lines }
        return ContentBlock(lines, blocks.first().linesPosition)
    }

    private fun logUnionBlocks() {
        logger?.log("Union blocks:")
        possibleColumnBlocks.forEach { logger?.log(it.plainText) }
    }
}
