package com.speechify.client.bundlers.reading.classic

import com.speechify.client.api.audio.AudioController
import com.speechify.client.api.audio.AudioControllerOptions
import com.speechify.client.api.audio.MediaUtterance
import com.speechify.client.api.audio.PreSpeechTransformOptions
import com.speechify.client.api.audio.SpeechMarksChunk
import com.speechify.client.api.audio.SpeechMarksImpl
import com.speechify.client.api.audio.SynthesisLocation
import com.speechify.client.api.audio.Utterance
import com.speechify.client.api.audio.UtteranceBufferSizeOption
import com.speechify.client.api.audio.UtteranceFlowProvider
import com.speechify.client.api.audio.Voice
import com.speechify.client.api.audio.VoiceGender
import com.speechify.client.api.audio.VoiceOfPreferenceProviderWithCache
import com.speechify.client.api.audio.VoicePremiumAvailabilityProvider
import com.speechify.client.api.audio.VoiceSpec
import com.speechify.client.api.audio.VoicesOfPreferenceStateProviderFromConfig
import com.speechify.client.api.audio.toVoiceFactoryFromSpec
import com.speechify.client.api.audio.toVoiceMetadataAsMedia
import com.speechify.client.api.content.ContentCursor
import com.speechify.client.api.content.ContentText
import com.speechify.client.api.content.ContentTextUtils
import com.speechify.client.api.content.ObjectRef
import com.speechify.client.api.content.view.speech.SpeechFlow
import com.speechify.client.api.content.view.speech.SpeechUtils
import com.speechify.client.api.content.view.standard.getContentTexts
import com.speechify.client.api.services.audio.AudioServerResponse
import com.speechify.client.api.services.library.LibraryServiceDelegate
import com.speechify.client.api.util.Callback
import com.speechify.client.api.util.Result
import com.speechify.client.api.util.fromCoWithTelemetryLoggingErrors
import com.speechify.client.api.util.orThrow
import com.speechify.client.api.util.successfully
import com.speechify.client.bundlers.content.ContentBundlerOptions
import com.speechify.client.bundlers.listening.ListeningBundle
import com.speechify.client.bundlers.reading.ReadingBundle
import com.speechify.client.helpers.content.standard.ContentSequenceCharacteristics
import com.speechify.client.helpers.content.standard.ContentSequenceCharacteristicsOfImmutableAlwaysLiveNoUserEffectContent
import com.speechify.client.helpers.content.standard.StaticStandardView
import com.speechify.client.helpers.features.ListeningProgressTracker
import com.speechify.client.helpers.ui.controls.PlaybackControls
import com.speechify.client.helpers.ui.controls.PlaybackControls.Companion.createPlaybackControls
import com.speechify.client.internal.createTopLevelCoroutineScope
import com.speechify.client.internal.sync.AtomicRef
import com.speechify.client.internal.util.collections.flows.flowFromAsyncSeed
import com.speechify.client.internal.util.extensions.collections.flows.BufferObserver
import com.speechify.client.internal.util.extensions.collections.flows.BufferedFlowMeasurement
import com.speechify.client.internal.util.extensions.collections.flows.ProducerExceptionBehavior
import com.speechify.client.internal.util.extensions.collections.flows.bufferWithMeasurement
import com.speechify.client.internal.util.www.asDataUrl
import kotlinx.coroutines.CoroutineScope
import kotlinx.coroutines.ExperimentalCoroutinesApi
import kotlinx.coroutines.channels.Channel
import kotlinx.coroutines.flow.Flow
import kotlinx.coroutines.flow.MutableStateFlow
import kotlinx.coroutines.flow.asFlow
import kotlinx.coroutines.flow.asStateFlow
import kotlinx.coroutines.flow.emitAll
import kotlinx.coroutines.flow.flatMapConcat
import kotlinx.coroutines.flow.flow
import kotlinx.coroutines.flow.flowOf
import kotlin.js.JsExport

@JsExport
/**
 * Use [PreparedBlock]s to build your "prepared onboarding document", specifying the text and audio for each section.
 */
sealed class PreparedBlock(
    val text: String,
    val audio: PreparedAudio,
) {
    /**
     * This block will appear as a paragraph in the resulting standard view
     */
    class Paragraph(text: String, audio: PreparedAudio, val ref: ObjectRef<Any?> = ObjectRef(null)) :
        PreparedBlock(text, audio)

    /**
     * This block will appear as a heading in the resulting standard view
     */
    class Heading(text: String, audio: PreparedAudio, val ref: ObjectRef<Any?> = ObjectRef(null)) :
        PreparedBlock(text, audio)

    init {
        // Unless we figure out how to use the normal chunked UtteranceFlowProvider, this limit is necessary
        // to avoid exceeding the TTS limit for a block.
        check(audio is PreparedAudio.Data || audio is PreparedAudio.Url || text.length < 2000) {
            "PreparedText length must be < 2000. " +
                "This is because we don't use chunked synthesis for Prepared documents, " +
                "and exceeding 2000 chars will likely fail remote TTS."
        }
    }
}

@JsExport
/**
 * Specifies how the audio should be generated for a given [PreparedBlock]
 */
sealed class PreparedAudio {
    /**
     * Specifies that a single pre-generated Audio Server Response is used to synthesize this block.
     *
     * The [data] must be a serialized protobuf conforming to the [Audio Server response schema](https://audio.docs.speechify.dev/synthesis/overview.html#response).
     */
    class Data(val data: ByteArray) : PreparedAudio()

    /**
     * Specifies that a single pre-generated Audio Server Response is used to synthesize this block.
     *
     * This is an HTTP or [DATA URL](https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URLs) resolving to a serialized protobuf conforming to the [Audio Server response schema](https://audio.docs.speechify.dev/synthesis/overview.html#response).
     */
    data class Url(val url: String) : PreparedAudio()

    /**
     * Specifies that the normal TTS Playback controls determine the [Voice] used to synthesize this block
     */
    object Default : PreparedAudio()
}

/**
 * Use this bundle creation API when you need fine-grained control over the structure and audio of a small document for
 * onboarding flows.
 *
 * Do not use this for general-purpose listening experiences, because it skips the normal subscripion checks to validate
 * the default voice!
 */
@JsExport
fun ClassicReadingBundler.createBundleForPreparedOnboardingDocument(
    content: Array<PreparedBlock>,
    callback: Callback<ReadingBundle>,
) =
    callback.fromCoWithTelemetryLoggingErrors(
        telemetryEventName = "ClassicReadingBundler.createBundleForPreparedOnboardingDocument",
    ) {
        coCreateBundleForPreparedBlocks(content)
    }

private suspend fun ClassicReadingBundler.coCreateBundleForPreparedBlocks(
    content: Array<PreparedBlock>,
): Result<ReadingBundle> {
    val standardView = content.fold(StaticStandardView.Builder()) { builder, item ->
        when (item) {
            is PreparedBlock.Heading -> builder.addHeading(item.text, item.ref)
            is PreparedBlock.Paragraph -> builder.addParagraph(item.text, item.ref)
        }
    }.build()!!

    val scope = createTopLevelCoroutineScope()
    val contentBundle = this.contentBundler.coCreateBundleForStandardView(
        standardView = standardView,
        bundleMetadata = null,
    )

    // Make a single utterance flow provider by "concatenating" all the providers for each individual block
    val utteranceFlowProvider = UtteranceFlowProviderFromZippedContentAndProviders(
        content.zip(standardView.blocks).map { (preparedBlock, standardBlock) ->
            val text = ContentTextUtils.concat(standardBlock.getContentTexts())
            text to when (preparedBlock.audio) {
                PreparedAudio.Default -> SimpleUtteranceFlowProviderFromContentText(
                    contentText = text,
                )
                is PreparedAudio.Data -> UtteranceFlowProviderFromLazyStaticUtterances {
                    this.createUtterancesFromStaticData(text, preparedBlock.audio)
                }

                is PreparedAudio.Url -> UtteranceFlowProviderFromLazyStaticUtterances {
                    this.createUtterancesFromStaticUrl(text, preparedBlock.audio)
                }
            }
        },
    )

    // Copy-paste the ListeningBundler initialization code, skipping subscription checks at #SkipSubscriptionChecks,
    // since nobody has a subscription during onboarding.
    val voiceSpecAvailabilityProvider = speechifyClient.adaptersProvider.voiceSpecAvailabilityProvider

    val voices =
        listeningBundler.config.allVoices.mapNotNull { spec ->
            voiceSpecAvailabilityProvider
                .getSpecOfAvailableVoiceOrNull(
                    specToTry = spec,
                )
        }

    val listeningBundle = ListeningBundle(
        config = this.listeningBundler.config,
        audioController = AudioController(
            utteranceFlowProvider = utteranceFlowProvider,
            speechView = contentBundle.speechView,
            initialOptions = AudioControllerOptions(
                speedInWordsPerMinute = this.listeningBundler.config.defaultSpeedWPM,
                bufferOnInit = true,
                startingCursor = null,
                contentTransformOptions = ContentBundlerOptions(),
                utteranceBufferSizeOption = this.listeningBundler.config.options,
            ),
            voicesOfPreferenceStateProvider = VoicesOfPreferenceStateProviderFromConfig(
                voicePreferences = listeningBundler.config.voicePreferences,
                allVoicesForNoPreference = voices,
                voicePremiumAvailabilityProvider = object : VoicePremiumAvailabilityProvider {
                    /**
                     * As per #SkipSubscriptionChecks - nobody has a subscription during onboarding.
                     */
                    override val isPremiumVoiceAvailableFlow: Flow<Boolean> =
                        flowOf(true)
                },
                voiceSpecAvailabilityProvider = voiceSpecAvailabilityProvider,
            ),
            voiceFactoryFromSpec = listeningBundler.bundlerPlugins.voiceFactory.toVoiceFactoryFromSpec(
                speechSynthesisConfig = listeningBundler.config,
            ),
            scope = scope,
        ),
        contentBundle = contentBundle,
        voicesAvailableFromConfig = voices,
        voicesAudioDownloads = emptyArray(),
        hasDetectedGapsInDownloadedAudioThisListeningSessionFlow = MutableStateFlow(false).asStateFlow(),
    )
    val startingCursor = standardView.start

    // Copy-paste the ReadingBundler initialization code
    val playbackControls = listeningBundle.createPlaybackControls(startingCursor).orReturn { return it }
    val tracker = ListeningProgressTracker(
        audiobookLibraryService = speechifyClient.audiobookLibraryService,
        libraryService = speechifyClient.libraryService.delegate,
        playbackControls = playbackControls,
        contentImporter = contentBundle.importer,
        importService = speechifyClient.importService,
    )
    val bundle = ClassicReadingBundle(
        dependencies = object : ReadingBundle.Dependencies {
            override val playbackControls: PlaybackControls = playbackControls
            override val listeningProgressTracker: ListeningProgressTracker = tracker
            override val libraryService: LibraryServiceDelegate = speechifyClient.libraryService.delegate
            override val scope: CoroutineScope = scope
        },
        bundlingSourceTelemetryEventBuilder = null,
        bundleMetadata = null,
    )
    return bundle.successfully()
}

private fun ClassicReadingBundler.createUtterancesFromStaticData(
    contentText: ContentText,
    audio: PreparedAudio.Data,
): Array<Utterance> {
    val parsed = AudioServerResponse.fromBytes(audio.data)
    val speechMarks = SpeechMarksImpl(
        parsed.speechMarks.chunks.map {
            SpeechMarksChunk(
                startCharacterIndex = it.start,
                endCharacterIndex = it.end,

                // the audio server gives us doubles but the unit is milliseconds
                startTimeInMilliseconds = it.startTime.toInt(),
                endTimeInMilliseconds = it.endTime.toInt(),
            )
        },
    )
    val mediaUrl = parsed.audioData.asDataUrl("audio/${parsed.audioFormat}")
    return arrayOf(
        MediaUtterance(
            mediaUrl = mediaUrl,
            speechMarks = speechMarks,
            speech = SpeechUtils.fromText(contentText),
            text = contentText,
            mediaPlayerFactory = speechifyClient.adaptersProvider.localMediaPlayer,
            synthesisLocation = SynthesisLocation.STATIC,
            voiceMetadata = createPreparedVoiceMetadata(id = "preparedAudio.binary"),
        ) as Utterance,
    )
}

private suspend fun ClassicReadingBundler.createUtterancesFromStaticUrl(
    contentText: ContentText,
    audio: PreparedAudio.Url,
): Array<Utterance> {
    val data = this.speechifyClient.adaptersProvider.httpClient.get(audio.url)
        .orThrow()
        .body!!
    val parsed = AudioServerResponse.fromBytes(data)
    val speechMarks = SpeechMarksImpl(
        parsed.speechMarks.chunks.map {
            SpeechMarksChunk(
                startCharacterIndex = it.start,
                endCharacterIndex = it.end,

                // the audio server gives us doubles but the unit is milliseconds
                startTimeInMilliseconds = it.startTime.toInt(),
                endTimeInMilliseconds = it.endTime.toInt(),
            )
        },
    )
    val mediaUrl = parsed.audioData.asDataUrl("audio/${parsed.audioFormat}")
    return arrayOf(
        MediaUtterance(
            mediaUrl = mediaUrl,
            speechMarks = speechMarks,
            speech = SpeechUtils.fromText(contentText),
            text = contentText,
            mediaPlayerFactory = speechifyClient.adaptersProvider.localMediaPlayer,
            synthesisLocation = SynthesisLocation.STATIC,
            voiceMetadata = createPreparedVoiceMetadata(id = "preparedAudio.url"),
        ) as Utterance,
    )
}

private fun createPreparedVoiceMetadata(id: String) =
    VoiceSpec.Static(
        displayName = "Prepared Audio $id",
        isPremium = false,
        /* TODO Consider making `languageCode` `null`able (should work, just need to test it). */
        languageCode = "en-US",
        gender = VoiceGender.MALE,
        /** NOTE: [VoiceSpec.Static.idQualified] is not used as per #QuirkVoiceSpecStaticIdUnused
         * TODO: Should there be [VoiceSpec.Static.idQualified] at all? It's read to translate to an AudioServer request, but
         *  [VoiceSpec.Static] doesn't go to AudioServer #TODOConsiderRemovingIdFromStaticVoiceSpec
         */
        id = id,
        avatarUrl = null,
    )
        .toVoiceMetadataAsMedia()

private class UtteranceFlowProviderFromLazyStaticUtterances(
    getUtterances: suspend () -> Array<Utterance>,
) : SimpleUtteranceFlowProvider {
    val memoized = memoize(getUtterances)
    override fun getUtterancesFlow(
        speechFlow: SpeechFlow,
        startingCursor: ContentCursor?,
        voiceOfPreferenceProvider: VoiceOfPreferenceProviderWithCache,
    ): Flow<Utterance> = flow {
        emitAll(
            memoized()
                .filter { startingCursor == null || it.text.end.isAfterOrAt(startingCursor) }
                .asFlow(),
        )
    }
}

private class SimpleUtteranceFlowProviderFromContentText(
    val contentText: ContentText,
) : SimpleUtteranceFlowProvider {
    override fun getUtterancesFlow(
        speechFlow: SpeechFlow,
        startingCursor: ContentCursor?,
        voiceOfPreferenceProvider: VoiceOfPreferenceProviderWithCache,
    ): Flow<Utterance> = flowFromAsyncSeed(
        getSeed = { voiceOfPreferenceProvider.getPreferredVoiceWithCache() },
    ) { preferredVoice ->
        preferredVoice.synthesizingColdFlow(
            SpeechUtils.textToSentences(contentText)
                .filter { startingCursor == null || it.text.end.isAfterOrAt(startingCursor) }.asFlow(),
            /**
             * `shouldOptimizeForQuickStart = true` to preserve existing behavior (its usefulness is somewhat
             * questionable - only chance at a beneficial effect is if [SimpleUtteranceFlowProviderFromContentText] is used as the first item).
             * TODO - make [com.speechify.client.api.audio.UtteranceFlowProviderFromSpeechFlow] handle prepared audio
             *  provided in the [speechFlow], and remove the whole approach with [SimpleUtteranceFlowProviderFromContentText]
             *  and [UtteranceFlowProviderFromZippedContentAndProviders].
             */
            shouldOptimizeForQuickStart = true,
            contentSequenceCharacteristics = object :
                ContentSequenceCharacteristicsOfImmutableAlwaysLiveNoUserEffectContent { },
        )
    }
}

private class UtteranceFlowProviderFromZippedContentAndProviders(
    val providers: List<Pair<ContentText, SimpleUtteranceFlowProvider>>,
) : UtteranceFlowProvider() {
    @OptIn(ExperimentalCoroutinesApi::class)
    override fun getUtterancesFlow(
        speechFlow: SpeechFlow,
        voiceOfPreferenceProvider: VoiceOfPreferenceProviderWithCache,
        startingCursor: ContentCursor?,
        preSpeechTransform: PreSpeechTransformOptions,
        shouldOptimizeForQuickStart: Boolean,
        reportBufferMeasurement: (measurement: BufferedFlowMeasurement<Utterance>) -> Unit,
        contentSequenceCharacteristics: ContentSequenceCharacteristics,
        utteranceBufferSizeOption: UtteranceBufferSizeOption,
        bufferObserver: BufferObserver<Utterance>,
    ): Flow<Utterance> {
        // NOTE(anson): this probably won't work on Android without a delay(200), since we've previously found a
        // race condition appears when media utterances are returned too quickly from the flow
        return providers.filter { (text) -> startingCursor == null || text.end.isAfterOrAt(startingCursor) }
            .asFlow()
            .flatMapConcat { (_, provider) ->
                provider.getUtterancesFlow(
                    speechFlow = flow {
                        /**
                         * TODO - add support for progressing [speechFlow], so that [SimpleUtteranceFlowProviderFromContentText] can
                         *  be replaced with [com.speechify.client.api.audio.UtteranceFlowProviderFromSpeechFlow].
                         *  For now marking this flow as unusable (it's not used by any of the providers here, but
                         *  if one was added that tries to read it, we'd need to implement progressing the [speechFlow]).
                         *  This is also a case of #SpeechFlowNotAlwaysPulled, which leaves a complexity in the playback
                         *  stack that it needs to destroy the `speechFlow`-producing coroutine manually.
                         */
                        TODO("Add support for progressing speechFlow")
                    },
                    voiceOfPreferenceProvider = voiceOfPreferenceProvider,
                    startingCursor = startingCursor,
                )
            }
            .bufferWithMeasurement(
                bufferObserver = bufferObserver,
                bufferSize = Channel.RENDEZVOUS,
                // keeping default behavior here, consistent with the use of RENDEZVOUS buffer above
                producerExceptionBehavior = ProducerExceptionBehavior.FailAndCancelConsumerImmediately,
                reportMeasurement = reportBufferMeasurement,
            )
    }
}

/**
 * A simpler [UtteranceFlowProvider] interface for implementing use cases that don't require parameters like
 * measurement reporting, [BufferObserver]'s, etc.
 */
internal interface SimpleUtteranceFlowProvider {
    fun getUtterancesFlow(
        speechFlow: SpeechFlow,
        startingCursor: ContentCursor?,
        voiceOfPreferenceProvider: VoiceOfPreferenceProviderWithCache,
    ): Flow<Utterance>
}

private fun <R> memoize(fn: suspend () -> R): suspend () -> R {
    val cache: AtomicRef<R?> = AtomicRef(null)
    return {
        when (val cur = cache.value) {
            null -> fn().also { cache.compareAndSet(null, it) }
            else -> cur
        }
    }
}
