package com.speechify.client.api.audio

import com.speechify.client.api.content.ContentCursor
import com.speechify.client.api.content.ContentText
import com.speechify.client.api.content.view.speech.Speech
import com.speechify.client.api.util.Callback
import com.speechify.client.api.util.Result
import kotlin.coroutines.resume
import kotlin.coroutines.suspendCoroutine
import kotlin.js.JsExport

/**
 * The Utterance encapsulates a piece of content (the ContentText) and how you can listen to it (the Player).
 *
 * Construction by SDK-consumers can be done via [com.speechify.client.api.audio.UtteranceFactory].
 */
@JsExport
abstract class Utterance internal constructor() {
    /**
     * Provides the [Speech] from which this Utterance was synthesized.
     */
    abstract val speech: Speech

    /**
     * Provides the content from which this Utterance was synthesized. This is typically a rendering of the [speech] into plaintext or SSML.
     */
    abstract val text: ContentText

    abstract val voiceMetadata: VoiceMetadata

    /**
     * Provides context about where the synthesis occurred. Used for telemetry purposes.
     */
    abstract val synthesisLocation: SynthesisLocation

    /**
     * @return a new Utterance by "slicing" both the content and audio contained in this Utterance.
     */
    abstract fun slice(start: ContentCursor, end: ContentCursor): Utterance

    /**
     * Provides a Player that you can use to play audio for the content contained in this Utterance
     */
    internal abstract fun getPlayer(
        initialOptions: PlayerOptions,
        callback: Callback<Player>,
    )
}

/**
 * A convenience and disambiguation overload of [Utterance.slice] for when one cursor is only non-boundary.
 */
internal fun Utterance.sliceFrom(start: ContentCursor) =
    slice(start, text.end)

@JsExport
enum class SynthesisLocation {

    /**
     * Special location used for empty utterances.
     */
    NONE,

    /**
     * Special "location" used for utterances constructed from static data, which may have been loaded from a local cache or remote storage API.
     *
     * For example, pre-recorded blocks within onboarding documents, or chapters of immersive audiobooks.
     *
     * Even though this is not a "location" per se, this name was chosen because:
     * - it lets us avoid mixing analytics with [LOCAL] and [REMOTE] data, which is understood to be TTS-related.
     * - it lets us avoid data corruption in the above use-cases where the bundle init code doesn't actually know whether the static data was loaded remotely or from local cache.
     */
    STATIC,

    /**
     * The synthesis occurred entirely on a remote server, for example all audio server based HD voices.
     */
    REMOTE,

    /**
     * The synthesis occurred entirely on the local device, for example all audio generated from a local TTS engine.
     */
    LOCAL,

    /**
     * The synthesis occurred on the device, but the audio was phonemized / normalized on a remote server.
     * For example AI voices on iOS.
     */
    LOCAL_AUDIO_GENERATION_REMOTE_PHONEMIZATION,
}

internal val Utterance.voiceInfoForDebug
    get() =
        when (this) {
            is LocalUtterance -> this.localSynthesisVoice.id
            is MediaUtterance -> this.mediaUrl
            else -> "Unknown Utterance type"
        }

internal suspend fun Utterance.coGetPlayer(initialOptions: PlayerOptions): Result<Player> =
    suspendCoroutine { getPlayer(initialOptions, it::resume) }
