package com.speechify.client.api.services.audio

import com.speechify.client.api.ClientConfig
import com.speechify.client.api.audio.AudioMediaFormat
import com.speechify.client.api.audio.SpeechMarks
import com.speechify.client.api.audio.SpeechMarksChunk
import com.speechify.client.api.audio.SpeechMarksImpl
import com.speechify.client.api.audio.SynthesizeOptions
import com.speechify.client.api.audio.SynthesizeResponse
import com.speechify.client.api.telemetry.withTelemetry
import com.speechify.client.api.util.Result
import com.speechify.client.api.util.successfully
import com.speechify.client.internal.http.HttpClient
import com.speechify.client.internal.http.parse
import kotlinx.serialization.SerialName
import kotlinx.serialization.Serializable

/**
 * The [AudioService] uses network requests to provide media-backed voices via Speechify's
 * [Audio Server](https://github.com/SpeechifyInc/audio-server).
 */
internal class LegacyAudioService internal constructor(
    private val httpClient: HttpClient,
    private val clientConfig: ClientConfig,
) : AudioService {

    override suspend fun synthesize(
        text: String,
        precedingContext: String?,
        options: SynthesizeOptions,
    ): Result<SynthesizeResponse> = withTelemetry("AudioServer.synthesize") {
        @Suppress(
            "NAME_SHADOWING", /* TODO - flatten `options` out: log and request from `SynthesizeOptions` directly */
        )
        val options = options.toAudioServiceSynthesizeOptions()

        it.addProperty(TELEMETRY_PROP__AUDIO_SERVER_VERSION, "legacy")
        it.addProperty("name", options.voiceParams.name)
        it.addProperty("lang", options.voiceParams.languageCode)
        it.addProperty("engine", options.voiceParams.engine)
        it.addProperty("fmt", options.audioMediaFormat)
        it.addProperty("len", text.length)

        val requestBody = GenerateAudioFilesRequestBody(
            options.audioMediaFormat ?: throw IllegalArgumentException(
                "Audio format must be specified when not using `ClientOptions.enableAudioServerV1()`",
            ),
            /** NOTE: Not using [maxTotalSsmlCharactersCount] since we don't add any SSML (so not clear where it even
             applies)
             */

            paragraphChunks = listOf(text),
            VoiceParams(
                options.voiceParams.name,
                options.voiceParams.engine,
                options.voiceParams.languageCode,
            ),
        )

        val httpResult = httpClient.post(
            "${clientConfig.platformAudioServiceUrl}/generateAudioFiles",
            canRetryOnResponseNotReceived = true, /* This is a POST request which normally is interpreted as
             non-idempotent ([standard](https://developer.mozilla.org/en-US/docs/Glossary/Idempotent#see_also)), so we
              very much need to add this hint. */
            isErrorResponseAllowingRetry = { response ->
                when (response.status.toInt()) {
                    429, 500, 503 -> true /* As per prescription by Audio Server team
                     [here](https://speechifyworkspace.slack.com/archives/C03JLSQMBEJ/p1677604524061729?thread_ts=1676466650.908299&cid=C03JLSQMBEJ) */
                    else -> false
                }
            },
            shouldAbortOnCoroutineCancellation = true,
            expectedResponseBodyByteCountBelow = defaultAudioServerExpectedResponseBodyByteCountBelow,
            telemetryEventBuilder = it,
        ) {
            bodyJson(requestBody)
            header("Accept-Base64", "true")
        }

        // Log status code since non-200 will probably manifest as parsing failure below
        httpResult.ifSuccessful { httpResponse -> it.addProperty("status", httpResponse.status.toString()) }

        val response =
            httpResult.parse<GenerateAudioFilesResponse>().orReturn { failure -> return@withTelemetry failure }

        return@withTelemetry SynthesizeResponse(
            response.format,
            getMediaUrl(response.audioStream, response.format),
            speechMarks = response.getSpeechMarks(),
        ).successfully()
    }

    /*
     Numbers as per Liam Dyer's [chat](https://speechifyworkspace.slack.com/archives/C03JLSQMBEJ/p1677612298351319?thread_ts=1677608320.875599&cid=C03JLSQMBEJ):
     Legacy `/generateAudioFiles`: Text (Excluding SSML): 3000 characters SSML: 6000 characters
     (also to be added to [docs](https://audio.docs.speechify.dev/synthesis/overview.html))
     */
    override val maxTextCharactersCount = 3000
    private val maxTotalSsmlCharactersCount = 6000

    private fun getMediaUrl(base64AudioData: String, format: AudioMediaFormat): String {
        return "data:audio/${format.name.lowercase()};base64,$base64AudioData"
    }

    private fun GenerateAudioFilesResponse.getSpeechMarks(): SpeechMarks {
        return SpeechMarksImpl(
            speechMarks
                .chunks
                .asSequence()
                .flatMap {
                    it.chunks.asSequence()
                        .ifEmpty {
                            sequenceOf(it) /* If for any reason there are no more details than the sentence's chunk
                             itself, return at least the info for the sentence. */
                        }
                }
                .map {
                    SpeechMarksChunk(
                        it.start,
                        it.end,
                        it.startTime.toInt(),
                        it.endTime.toInt(),
                    )
                }.toList(),
        )
    }
}

internal const val TELEMETRY_PROP__AUDIO_SERVER_VERSION: String = "sv"

@Serializable
internal data class GenerateAudioFilesRequestBody(
    val audioFormat: AudioMediaFormat,
    val paragraphChunks: List<String>,
    val voiceParams: VoiceParams,
)

@Serializable
internal data class GenerateAudioFilesResponse(
    val format: AudioMediaFormat,
    val audioStream: String,
    val speechMarks: SpeechMarksData,
)

@Serializable
internal data class SpeechMarksData(val chunks: List<SpeechMarksChunkData>)

@Serializable
internal data class SpeechMarksChunkData(
    @SerialName("type") val kind: Type,
    val value: String,
    val startTime: Double,
    val endTime: Double,
    val start: Int,
    val end: Int,
    val chunks: List<SpeechMarksChunkData> = emptyList(),
) {
    @Serializable
    internal enum class Type {
        @SerialName("sentence")
        Sentence,

        @SerialName("word")
        Word,
    }
}
