package com.speechify.client.api.audio

import com.speechify.client.api.adapters.localsynthesis.LocalSynthesisVoice
import com.speechify.client.api.content.view.speech.Speech
import com.speechify.client.api.content.view.speech.SpeechSentence
import com.speechify.client.api.content.view.speech.windowedToBatchesOfAimedCharsCountSum
import com.speechify.client.api.diagnostics.Log
import com.speechify.client.api.util.Callback
import com.speechify.client.api.util.Result
import com.speechify.client.api.util.orThrow
import com.speechify.client.helpers.content.standard.ContentSequenceCharacteristics
import com.speechify.client.helpers.content.standard.getAimedCharsCountInUtteranceOverrideOrNullSafely
import com.speechify.client.internal.coroutines.fromNonCancellableAPIs.suspendCancellableCoroutineForNonCancellableAPIWithSDKResultByDetachRetainingResult
import kotlinx.coroutines.ExperimentalCoroutinesApi
import kotlinx.coroutines.flow.Flow
import kotlinx.coroutines.flow.asFlow
import kotlinx.coroutines.flow.flatMapConcat
import kotlin.js.JsExport

/* The below interfaces are used to expose only specific members of the [Voice] component, while leaving others internal
   to the SDK, and thus SDK able to evolve them.
*/

/**
 * A component that is associated with a specific voice (as identified by [VoiceMetadata])
 */
@JsExport
interface HasVoiceIdentity {
    val metadata: VoiceMetadata
}

/**
 * A component whose associated voice is a 'local synthesis', and thus contains more details on the
 * respective voice (in [localSynthesisVoice]).
 */
@JsExport
interface HasLocalVoiceIdentity {
    val localSynthesisVoice: LocalSynthesisVoice
}

/**
 * The [VoiceBase] is a richer interface for voice that supports synthesis of stream of content, allowing implementers
 * to make use of this for better decisions (strategy for how much to buffer, how to batch content for synthesis,
 * varying them for different positions in the stream, varying synthesis of a given sentence based on clues from
 * previous sentences, etc.).
 *
 * TODO - Phase out the discrete [Voice.synthesize] by making the [VoiceBase.synthesizingColdFlow] exportable
 * (need a boundary-exportable type for Flow ?) and the main point of extension. Then squash [VoiceBase]
 * into [Voice] #TODOMakeVoiceWorkOnStreamInPublicAPI - see [VoiceWithStreamSupportBase] (to be removed when the
 * TODO is addressed) for how we work-around the current need to have [Voice.synthesize].
 * If needed, a `VoiceWithDiscreteSynthesisOnly` base class could be provided for ease of implementation.
 */
@JsExport
abstract class VoiceBase : HasVoiceIdentity {

    abstract val voiceSpec: VoiceSpecOfAvailableVoice

    override val metadata: VoiceMetadata by lazy {
        voiceSpec.toVoiceMetadata()
    }

    /**
     * NOTE: The result's [Utterance]s will encompass batches of the [inputContentFlow].
     */
    internal abstract fun synthesizingColdFlow(
        inputContentFlow: Flow<SpeechSentence>,
        shouldOptimizeForQuickStart: Boolean,
        contentSequenceCharacteristics: ContentSequenceCharacteristics,
    ): Flow<Utterance>
}

/**
The amount of characters that should be aimed for in a single utterance, meaning that if text chunks received so far
from the [com.speechify.client.helpers.content.standard.ContentProvider] is less than the number, then another chunk
will be pulled.

NOTE: There's a danger of affecting audio-downloads if we ever decrease this. To be able to do so, would need to address
the TODO #TODOPersistentCacheBecomesUnusableIfAimedCharsCountInBatchIsReduced

NOTE - there is also tradeoff here:
- keeping this size small is important e.g. for Google Docs (any dynamic-content whose availability is
controlled by the User), because otherwise the user will experience an abrupt scroll away from place
of reading will occur (when the user has autoscroll, while if he doesn't, there will be a
misinforming prompt that the user should scroll down to read). See
[commit of "fix(playback/buffering): Further fixing dynamic-content ..."](https://github.com/SpeechifyInc/multiplatform-sdk/pull/804/commits/57a539604dc45a19338bda9a19e180fe4a515827)
A way for doing this just for the dynamic content could be #TODOVaryAimedCharsCountOnIsPullingContentRequiringUserInteraction
- but having the count larger prevents local synthesis from disproportionally long pauses between
sentences when the speech is fast (this happens at least MacOS' Chrome) -
#LocalSynthesisPausesWhichIsAudibleOnFastReading. Local synthesis exhibits this because it has fixed-length significant pauses
(at least on MacOS, Chrome), fixedness of which stands out disproportionately during fast speech.
The current number is still not ideal for local synthesis, but it was chosen based on this being [_"not so
important (local voices are not the revenue stream)"_](https://speechifyworkspace.slack.com/archives/C049LC42B2Q/p1671620111378889?thread_ts=1671618624.256859&cid=C049LC42B2Q).
TODO one way this could be resolved is by passing more information in the flow than just Sentences:
If the flow contained information of how much the consumer can safely pull without requesting a new chunk (e.g. 'end of chunk' tokens)
then it could try to fill a larger count, but only until such a token. It would only move past such
token when requested to produce another synthesis, but the content before the token is not yet a complete sentence.
TODO another improvement, a temporary one, would be to consider #TODOConsiderIncreasingAimedCharsCountInBatch
 Note however that there would need to be certainty about the increase, because later deciding to decrease it would
 render user's downloaded audio broken and they would need to re-download, unless #TODOPersistentCacheBecomesUnusableIfAimedCharsCountInBatchIsReduced
 is addressed first.
 */
internal const val defaultAimedCharsCountInBatch: Int = 150

/**
 * A Voice provides metadata about the speaker and the capability to "speak" any content by synthesizing an Utterance and manipulating the Player it provides.
 * Voice also contains cache, and thus, in order to free unnecessarily occupied memory resources, instances of it should
 * be tied to a specific document (where cache-hits event), be dereferenced when the document is closed (so they can
 * be garbage collected) and new instance should be created for a different document.
 */
@JsExport
abstract class Voice : VoiceBase() {
    /**
     * Provides a Flow-based API for synthesizing [Utterance]s on-demand from input [SpeechSentence]s. Using
     * Flows gives us more control over the granularity of buffering, synthesis, and caching.
     *
     * It would be great to unify this with the callback-based [synthesize] API, but we can't until we figure out how to
     * expose a Flow-like construct to consumers.
     */
    @OptIn(ExperimentalCoroutinesApi::class)
    override fun synthesizingColdFlow(
        inputContentFlow: Flow<SpeechSentence>,
        shouldOptimizeForQuickStart: Boolean,
        contentSequenceCharacteristics: ContentSequenceCharacteristics,
    ): Flow<Utterance> =
        inputContentFlow
            .windowedToBatchesOfAimedCharsCountSum(
                aimedCharsCountInBatch =
                contentSequenceCharacteristics.getAimedCharsCountInUtteranceOverrideOrNullSafely()
                    ?: defaultAimedCharsCountInBatch,
            )
            .flatMapConcat {
                synthesize(
                    sentencesWithAtLeastOne = it,
                ).orThrow().asFlow()
            }

    private suspend fun synthesize(sentencesWithAtLeastOne: List<SpeechSentence>) =
        synthesize(Speech(sentencesWithAtLeastOne = sentencesWithAtLeastOne))

    /* #InternalForTests */
    internal suspend fun synthesize(speech: Speech): Result<Array<Utterance>> =
        suspendCancellableCoroutineForNonCancellableAPIWithSDKResultByDetachRetainingResult(
            onCancellationLeavingJobRunning = {
                /* #TODOImplementCancellations - TODO - implement cancellations by:
                     * propagating `suspend` APIs where in native Kotlin
                     *  using boundary versions of [kotlinx.coroutines.CompletableDeferred] on boundaries.
                     NOTE: Meanwhile, `suspendCancellableCoroutine` is still better than `suspendCoroutine`, as it prevents
                     a freeze on `cancelAndJoin` (and it's ok to discard a speech synthesis, if there is no Job to use it
                     as it was cancelled).
                 */
                Log.d(
                    { "A wasted speech synthesis: Cancellation of synthesis is not implemented" },
                    sourceAreaId = "Voice.synthesize",
                )
            },
        ) { cont ->
            synthesize(speech, cont::resume)
        }

    /**
     * Returns an array of utterances to be played in order by the client.
     *
     * It would be great to unify this with the Flow-based [synthesizingColdFlow] API, but we can't until we figure out
     * how to expose a Flow-like construct to consumers.
     */
    abstract fun synthesize(
        speech: Speech,
        callback: Callback<Array<Utterance>>,
    )
}
