package com.speechify.client.api.content.txt

import com.speechify.client.api.content.Content
import com.speechify.client.api.content.ContentCursor
import com.speechify.client.api.content.ContentElementReference
import com.speechify.client.api.content.ContentText
import com.speechify.client.api.content.ContentTextPosition
import com.speechify.client.api.content.ContentTextUtils
import com.speechify.client.api.content.TextElementContentSlice
import com.speechify.client.api.content.view.txt.KBytes
import com.speechify.client.api.content.view.txt.PlainTextView
import com.speechify.client.api.util.Callback
import com.speechify.client.api.util.Result
import com.speechify.client.api.util.characterLength
import com.speechify.client.api.util.fromCo
import com.speechify.client.api.util.io.File
import com.speechify.client.api.util.io.coGetBytes
import com.speechify.client.api.util.io.coGetSizeInBytes
import com.speechify.client.api.util.isUtf8CharBoundary
import com.speechify.client.api.util.successfully
import com.speechify.client.api.util.tryCollectTo
import com.speechify.client.api.util.tryFold
import com.speechify.client.internal.sync.AtomicInt
import kotlinx.coroutines.flow.Flow
import kotlinx.coroutines.flow.flow
import kotlinx.coroutines.flow.map
import kotlinx.coroutines.sync.Semaphore
import kotlinx.coroutines.sync.withPermit
import kotlin.js.JsExport
import kotlin.js.JsName

/**
 * # Note
 *
 * This implementation only supports **UTF-8 encoded files**, other encoding will just result in mangling by having characters replaced with �
 */
@JsExport
data class FilePlainTextView internal constructor(private val file: File, private val readChunkSize: Int) :
    PlainTextView, Content {

    @JsName("from")
    constructor(file: File) : this(file, KBytes.FOUR)

    override val start: ContentCursor
        get() = ContentElementReference.forRoot().start

    override val end: ContentCursor
        get() = ContentElementReference.forRoot().end

    // atomic so iOS doesn't crash
    private val length: AtomicInt = AtomicInt(-1) // -1 means un-init
    private val lock: Semaphore = Semaphore(1)

    override fun getLength(callback: Callback<Int>) = callback.fromCo {
        when (val length = length.get()) {
            -1 -> getLength()
            else -> length.successfully()
        }
    }

    // We can't actually use file::getSizeInBytes because UTF-8 encoding means that byte length <= character length
    // So we must read the entire file to get the correct character count

    // Because reading the entire file can be quite expensive we cache this result and make sure only one task
    // ever reads it
    private suspend fun getLength(): Result<Int> = lock.withPermit {
        when (val length = this.length.get()) {
            -1 -> validChunks()
                .tryFold(0) { acc, (_, length) -> acc + length }
                .orReturn { return@withPermit it }
                .also { this.length.set(it) }
            else -> length
        }.successfully()
    }

    /**
     * WARNING: This implementation does not work properly as the start indices can not be measure accurately
     * Please use getSlice(0,size) to get the entire content of the file for any calculations
     */
    override fun getSlice(
        start: Int,
        end: Int,
        callback: Callback<ContentText>,
    ) = callback.fromCo {
        getSlice(start, end)
    }

    override fun getCursor(charIndex: Int, callback: Callback<ContentCursor>) = callback.fromCo {
        ContentTextPosition(ContentElementReference.forRoot(), charIndex).successfully()
    }

    private suspend fun getSlice(start: Int, end: Int): Result<ContentText> {
        val expectedLength = end - start
        var startIndex = start
        return validChunks(startAt = start, stopAfterReadingNChars = expectedLength)
            .map { r ->
                r.map { (text, _) ->
                    TextElementContentSlice(
                        elementReference = ContentElementReference.forRoot(),
                        range = startIndex to startIndex + text.length,
                        text,
                    )
                        .also { startIndex += text.length }
                }
            }
            .tryCollectTo(mutableListOf())
            .map(ContentTextUtils::concat)
            .map { it.slice(start, end) }
    }

    private suspend fun validChunks(
        startAt: Int = 0,
        stopAfterReadingNChars: Int = Int.MAX_VALUE,
    ): Flow<Result<Pair<String, Int>>> = flow {
        var byteCursor = startAt
        var totalReadChars = 0
        val totalFileLen = file.coGetSizeInBytes().orReturn { return@flow emit(it) }
        while (byteCursor < totalFileLen && totalReadChars < stopAfterReadingNChars) {
            // If the garbage collector behaves, we won't ever have the entire file in memory
            // (for files with more than 4k bytes)
            val buffer = file
                .coGetBytes(byteCursor, (byteCursor + readChunkSize).coerceAtMost(totalFileLen))
                .orReturn { return@flow emit(it) }

            if (buffer.isEmpty()) {
                return@flow
            } else {
                // We have to find the start of the last character in this byte array, so we can make sure not
                // to slice it in two, creating 2 invalid utf8 characters

                // [lastIndex, lastIndex - 5] will cover all the possible lengths of utf8 multibyte characters
                // NOTE: the range is inclusive of both ends
                val startOfLastChar = (buffer.lastIndex downTo (buffer.lastIndex - 5).coerceAtLeast(0)).find {
                    buffer[it].isUtf8CharBoundary()
                }
                    // we could not find the start of the last character, this means this byte array is not
                    // valid utf-8 anyway, so we pretend the entire byte array is a valid string.
                    //
                    // `ByteArray::decodeToString` will replace invalid characters with the replacement character
                    // but that's the best we can do.
                    //
                    // Returning an error and terminating is also an option but that just means the user will be
                    // greeted to an error instead of content when some content might still be readable
                    //
                    // Either way, this use case is unsupported, so any behaviour other than crashing is acceptable
                    ?: buffer.size

                val lastCharLength = buffer[startOfLastChar].characterLength()
                val decodeTo = if (startOfLastChar + lastCharLength <= buffer.size) {
                    // if the last character fits inside this byte array we decode it as well
                    startOfLastChar + lastCharLength
                } else if (startOfLastChar + lastCharLength >= (byteCursor + totalFileLen)) {
                    // if the file doesn't have enough bytes to complete this character
                    // we decode the full buffer to avoid an infinite loop
                    buffer.size
                } else {
                    // otherwise we leave it for the next iteration of the loop
                    startOfLastChar
                }

                val text = buffer.decodeToString(endIndex = decodeTo)
                emit((text to text.length).successfully())

                totalReadChars += text.length
                byteCursor += decodeTo
            }
        }
    }
}
