package com.speechify.client.helpers.content.index

import com.speechify.client.api.content.ContentIndex
import com.speechify.client.api.content.ContentStats
import com.speechify.client.api.content.EstimatedCount
import com.speechify.client.api.content.view.book.BookView
import com.speechify.client.api.content.view.book.coGetPages
import com.speechify.client.api.util.orThrow
import com.speechify.client.internal.util.collections.flows.SharedFlowThatFinishes
import com.speechify.client.internal.util.collections.flows.flowFromAsyncGetFlow
import com.speechify.client.internal.util.collections.median
import com.speechify.client.internal.util.coroutines.ChildCoroutineErrorBehavior
import com.speechify.client.internal.util.extensions.collections.flows.mapConcurrentlyPreservingOrderReturningIntermediateSparseLists
import com.speechify.client.internal.util.extensions.collections.flows.shareFinishingAlwaysReturningLastItemIn
import com.speechify.client.internal.util.text.groupingToWords.wordCount
import kotlinx.coroutines.flow.map
import kotlin.random.Random

const val PAGES_TO_CHECK = 6

/**
 * A [ContentIndex] for [BookView]s that uses approximations and a sampling approach. This lets us efficiently implement the interface for large books without ever processing the full content.
 */
internal class ApproximateBookIndexV1(override val book: BookView) : BaseBookContentIndex(book) {

    override val contentStatsFlow: SharedFlowThatFinishes<ContentStats> = flowFromAsyncGetFlow {
        val contentAllPagesCount = book.getMetadata().numberOfPages
        val pageRange = if (contentAllPagesCount <= PAGES_TO_CHECK) {
            (0 until PAGES_TO_CHECK.coerceAtMost(contentAllPagesCount)).toList().toTypedArray()
        } else {
            /**
             * Use random pages because sampling is more reliable than taking the first / last x pages since its more likely to grab a
             * representative page of the document.
             * By seeding the random generator here we make sure that we sample the same pages every time
             * thereby making the UX consistent. The same duration will be shown every time.
             */

            val pagesToPickRandomly =
                /**
                 * We skipped 0 and 1 here because we want to make sure that the first and second pages are always
                 * included in the picked pages. They are needed to start listening for fresh imported item.
                 */
                (2 until contentAllPagesCount).shuffled(Random(contentAllPagesCount)).take(PAGES_TO_CHECK - 2)

            (listOf(0, 1) + pagesToPickRandomly).toTypedArray()
        }
        val pages = book.coGetPages(pageRange).orThrow()

        val result = pages.toList().mapConcurrentlyPreservingOrderReturningIntermediateSparseLists(
            /**
             * We put 2 as a value, to give precedence to the first and second pages over the rest of the random pages.
             * In order to speed up the listening experience.
             */
            maxConcurrency = 2,
        ) { page ->
            val bookPageTextContentItem = page.getUnstableTextContentApproximatelyOrdered().orThrow()
            PageTextualCount(
                wordCount = bookPageTextContentItem.sumOf { it.text.text.wordCount() },
                charCount = bookPageTextContentItem.sumOf { it.text.text.length },
            )
        }.map { allPagesCountsCompletedSoFar ->
            allPagesCountsCompletedSoFar.toContentStats(contentAllPagesCount = contentAllPagesCount)
        }

        result
    }.shareFinishingAlwaysReturningLastItemIn(
        onError = ChildCoroutineErrorBehavior.PROPAGATE_TO_PARENT,
        scope = scope,
    )
}

internal data class PageTextualCount(
    val wordCount: Int,
    val charCount: Int,
)

/**
 * Calculates the content stats based on the word and char count of page.
 */
private fun List<PageTextualCount>.toContentStats(contentAllPagesCount: Int) =
    if (contentAllPagesCount <= size) {
        ContentStats(
            estimatedWordCount = EstimatedCount(count = sumOf { it.wordCount }, confidence = 1.0),
            estimatedCharCount = EstimatedCount(count = sumOf { it.charCount }, confidence = 1.0),
        )
    } else {
        val confidence = size.toDouble() / contentAllPagesCount
        ContentStats(
            estimatedWordCount = EstimatedCount(
                count = map { it.wordCount }.median() * contentAllPagesCount,
                confidence = confidence,
            ),
            estimatedCharCount = EstimatedCount(
                count = map { it.charCount }.median() * contentAllPagesCount,
                confidence = confidence,
            ),
        )
    }
