import assert from 'assert';

import { Voice, VoiceList as VoiceListResponse } from '@speechifyinc/centralized-voice-list';
import { type VoiceSpecOfAvailableVoice } from '@speechifyinc/multiplatform-sdk';
import { capitalize, get } from 'lodash';

import { LocalStorageCache, LocalStorageCacheKey } from 'lib/cache/localStorage';
import { logError } from 'lib/observability';
import { getLocalVoicePromise } from 'modules/voices/utils/localVoices';
import { getAudio } from 'utils/audio';
import { ErrorWithContext } from 'utils/error';
import { Nullable } from 'utils/types';

import { MultiplatformSDKInstance } from '../sdk';
import { SDKFacade } from './_base';

const ignoredVoiceUris = ['Google', 'com.apple.speech', 'com.apple.eloquence', 'Grandma', 'Grandpa', 'Bells', 'Trinoids'];
const defaultVoiceListUrl = `${process.env.NEXT_PUBLIC_AUDIO_SERVER_URL}/v1/synthesis/client-voices`;

const LOCAL_SYNTH_ENGINE = 'speechSynth';

type Gender = 'male' | 'female' | 'notSpecified';

type SpeechActionType = 'speak' | 'pause' | 'cancel';

interface SpeechAction {
  type: SpeechActionType;
  text?: string;
  voice?: string;
}

export type VoiceInfo = {
  id: string;
  name?: string;
  displayName: string;
  gender: string;
  thumbnail: string;
  country: Nullable<string>;
  labels: Nullable<string[]>;
  language: string;
  languageCodeWithLocale: string;
  isLocalVoice?: boolean;
  isPremium: boolean;
  // TODO(albertusdev): Declare below fields as non-nullable and update Storybook snapshot
  engine?: string;
  isCustomVoice?: boolean;
  isSpeechifyVoice?: boolean;
  rawName?: string;
};

// 1 hour
const CVL_LOCAL_CACHE_TTL_IN_MS = 1000 * 60 * 60;

const ENGLISH_FREE_FALLBACK_VOICE_PREFERENCES = ['Samantha', 'David', 'Alex', 'Daniel', 'Karen', 'Moira'];

export class SDKVoiceFacade extends SDKFacade {
  private audioPlayer: HTMLAudioElement;
  private cvlVoiceResponse: VoiceListResponse | null = null;

  private speechSynthesisVoices: SpeechSynthesisVoice[] = [];
  private localVoiceSpecs: VoiceSpecOfAvailableVoice[] = [];

  private personalVoiceSpecs: VoiceSpecOfAvailableVoice[] = [];

  private recommendedVoiceSpecs: VoiceSpecOfAvailableVoice[] = [];
  private allTabsVoiceSpecs: VoiceSpecOfAvailableVoice[] = [];

  private defaultVoiceSpec: VoiceSpecOfAvailableVoice | undefined = undefined;
  private fallbackVoiceSpec: VoiceSpecOfAvailableVoice | undefined = undefined;

  private voiceSpecsCache: Map<string, VoiceSpecOfAvailableVoice> = new Map();

  private _mapIdQualifiedToCVLName: Map<string, string> = new Map();
  private static _singleton: SDKVoiceFacade;

  private _allVoicesPromise: Promise<VoiceSpecOfAvailableVoice[]>;

  private _cvlLocalCache = new LocalStorageCache(LocalStorageCacheKey.cvl, CVL_LOCAL_CACHE_TTL_IN_MS);

  constructor(sdk: MultiplatformSDKInstance) {
    super(sdk);
    this.audioPlayer = new Audio();
    this._allVoicesPromise = this.refetchAllVoices();
    SDKVoiceFacade._singleton = this;
  }

  static override get singleton(): SDKVoiceFacade {
    return SDKVoiceFacade._singleton;
  }

  private fetchRemoteCVL = async () => {
    const getVoiceListResponse = async () => {
      const cache = await this._cvlLocalCache.retrieve();
      if (cache && !cache.isStale) {
        return cache.data;
      }

      try {
        const { firebaseAuth } = await import('lib/firebase/firebase.client');
        const idToken = await firebaseAuth.currentUser?.getIdToken();
        const response = await fetch(defaultVoiceListUrl, {
          headers: {
            ContentType: 'application/json',
            Authorization: `Bearer ${idToken}`,
            'X-Speechify-Client': 'WebApp',
            'X-Speechify-Client-Version': process.env.version || '0.0.0'
          }
        })
          .then(res => res.json() as Promise<VoiceListResponse>)
          .catch(() => fetch('/centralized-voice-list.json').then(res => res.json() as Promise<VoiceListResponse>));

        await this._cvlLocalCache.store(response);

        return response;
      } catch (error) {
        logError(new ErrorWithContext('Error while fetching voice list', error instanceof Error ? { cause: `${error.message}` } : {}));
        return null;
      }
    };

    this.cvlVoiceResponse = await getVoiceListResponse();
    if (!this.cvlVoiceResponse) {
      return;
    }

    const allTab = this.cvlVoiceResponse.tabs.find(tab => tab.displayName === 'All');
    assert(allTab, 'All tab not found in voice list');
    const voices = allTab.categories.flatMap(t => t.voices);
    this.allTabsVoiceSpecs = voices
      .map(v => this.generateVoiceSpec(v))
      .filter((v: VoiceSpecOfAvailableVoice | undefined): v is VoiceSpecOfAvailableVoice => v !== undefined);

    const recommendedTab = this.cvlVoiceResponse.tabs.find(tab => tab.displayName === 'Recommended');
    assert(recommendedTab, 'Recommended tab not found in voice list');
    const recommendedVoices = recommendedTab.categories.flatMap(t => t.voices);
    this.recommendedVoiceSpecs = recommendedVoices
      .map(v => this.generateVoiceSpec(v))
      .filter((v: VoiceSpecOfAvailableVoice | undefined): v is VoiceSpecOfAvailableVoice => v !== undefined);

    this.defaultVoiceSpec = this.generateVoiceSpec(this.cvlVoiceResponse.config.defaultVoice);
    this.fallbackVoiceSpec = this.generateVoiceSpec(this.cvlVoiceResponse.config.fallbackVoice);
  };

  private fetchLocalVoices = async () => {
    const localVoices = await getLocalVoicePromise();

    const includedVoices = localVoices.filter(voice => !ignoredVoiceUris.find(key => voice.voiceURI.startsWith(key)));

    this.speechSynthesisVoices = includedVoices;
    this.localVoiceSpecs = includedVoices.map(v => this.generateLocalVoiceSpec(v));
  };

  private fetchPersonalVoices = async () => {
    const personalVoicesService = this.sdk.client.personalVoiceService;
    const promisify = this.sdk.promisify;
    const voiceSpecs = await promisify(personalVoicesService.getPersonalVoiceSpecs.bind(personalVoicesService))();
    this.personalVoiceSpecs = voiceSpecs;
  };

  private getFlagFromLang = (languageCode: string) => {
    const flag = languageCode.split('-')[1] || 'US';
    const isNumber = /^\d+$/.test(flag);

    return `/flags/${isNumber ? 'XX' : flag}.svg`;
  };

  private getVoiceGender(gender: Gender) {
    const { VoiceGender } = this.sdk.sdkModule;
    return gender === 'male' ? VoiceGender.MALE : gender === 'female' ? VoiceGender.FEMALE : VoiceGender.UNSPECIFIED;
  }

  public generateVoiceSpec = (voice: Voice): VoiceSpecOfAvailableVoice | undefined => {
    if (voice.engine === LOCAL_SYNTH_ENGINE) {
      const speechSynthesis = this.speechSynthesisVoices.find(localVoice =>
        [localVoice.name === voice.name, localVoice.lang === voice.language].every(Boolean)
      );
      if (!speechSynthesis) {
        return undefined;
      }
      return this.generateLocalVoiceSpec(speechSynthesis);
    }
    return this.generateRemoteVoiceSpec(voice);
  };

  private generateRemoteVoiceSpec = (voice: Voice): VoiceSpecOfAvailableVoice => {
    const { WebBoundaryMap } = this.sdk;
    const { VoiceSpec } = this.sdk.sdkModule;

    const voiceSpec = new VoiceSpec.CVLVoiceSpec(
      voice.displayName,
      true,
      voice.engine,
      voice.language,
      this.getVoiceGender(voice.gender),
      voice.avatarImage,
      voice.labels ?? ['premium'], // Add premiun label for all non labelled voices
      new WebBoundaryMap(voice.localizedDisplayName),
      voice.previewAudio,
      this.getVoicePreviewForLanguage(voice.displayName, voice.language, voice.gender),
      voice.name
    );

    this._mapIdQualifiedToCVLName.set(voiceSpec.idQualified, voice.name);
    return voiceSpec;
  };

  private generateLocalVoiceSpec = (voice: SpeechSynthesisVoice): VoiceSpecOfAvailableVoice => {
    const { VoiceSpec, LocalSynthesisVoice, VoiceRef } = this.sdk.sdkModule;
    return new VoiceSpec.LocalAvailable(
      new LocalSynthesisVoice(voice.voiceURI, voice.name, voice.lang, this.getVoiceGender('notSpecified'), new VoiceRef(voice)),
      voice.name,
      voice.voiceURI,
      this.getFlagFromLang(voice.lang),
      this.getVoicePreviewForLanguage(voice.name, voice.lang, 'notSpecified')
    );
  };

  private getVoicePreviewForLanguage = (name: string, languageCode: string, gender: Gender) => {
    if (!this.cvlVoiceResponse) {
      throw new Error('Tried to access voice list before fetching');
    }

    const { voicePreviewTemplates } = this.cvlVoiceResponse;
    const template = voicePreviewTemplates[languageCode] ?? voicePreviewTemplates['*'];

    if (!template) {
      return null;
    }

    let text: string;
    if (gender === 'female') {
      text = template.female ?? template.notSpecified;
    } else {
      text = template.male ?? template.notSpecified;
    }
    return text.replace('{{ name }}', name);
  };

  refetchAllVoices = () => {
    // eslint-disable-next-line no-async-promise-executor
    const promise = new Promise<VoiceSpecOfAvailableVoice[]>(async (resolve, reject) => {
      try {
        await Promise.allSettled([this.fetchRemoteCVL(), this.fetchLocalVoices(), this.fetchPersonalVoices()]);
        // TODO(harshit): sometimes local voices doesn't resovle on first run
        await this.fetchLocalVoices();

        this.allVoiceSpecs.forEach(voice => {
          this.voiceSpecsCache.set(voice.idQualified, voice);
        });
        resolve(this.allVoiceSpecs);
      } catch (e) {
        reject(new Error('Error while resolving voices', { cause: e }));
      }
    });
    this._allVoicesPromise = promise;
    return promise;
  };

  private getDefaultVoiceSpec = (): VoiceSpecOfAvailableVoice => {
    const voice = this.defaultVoiceSpec || this.fallbackVoiceSpec;
    assert(voice, 'Both defaultVoiceSpec and fallbackVoiceSpec is undefined');
    return voice;
  };

  public getDefaultVoiceInfo = () => {
    const voice = this.getDefaultVoiceSpec();
    return this.mapToVoiceInfo(voice);
  };

  public getRecommendedVoiceInfos = () => {
    return this.recommendedVoiceSpecs.map(v => this.mapToVoiceInfo(v));
  };

  public getPersonalVoiceInfos = () => {
    return this.personalVoiceSpecs.map(v => this.mapToVoiceInfo(v, true));
  };

  public deletePersonalVoice = async (voiceInfo: VoiceInfo) => {
    const personalVoicesService = this.sdk.client.personalVoiceService;
    const promisify = this.sdk.promisify;
    this.personalVoiceSpecs = this.personalVoiceSpecs.filter(voice => voice.idQualified !== voiceInfo.id);
    const voiceId = (voiceInfo.rawName || '').replace('PVL:', '');
    return promisify(personalVoicesService.deleteVoice.bind(personalVoicesService))(voiceId);
  };

  public getAllVoiceInfos = () => {
    return this.allVoiceSpecs.map(v => this.mapToVoiceInfo(v));
  };

  public getVoiceSpec = (voiceInfo: Nullable<VoiceInfo>): VoiceSpecOfAvailableVoice => {
    const defaultVoice = this.getDefaultVoiceSpec();
    if (!voiceInfo) return defaultVoice;
    return this.voiceSpecsCache.get(voiceInfo.id) || defaultVoice;
  };

  private get allVoiceSpecs() {
    return [...this.allTabsVoiceSpecs, ...this.recommendedVoiceSpecs, ...this.localVoiceSpecs, ...this.personalVoiceSpecs].filter(
      (v): v is VoiceSpecOfAvailableVoice => Boolean(v)
    );
  }

  // TODO(overhaul): Refactor the speech synthesis controller to be a separate class
  private controlSpeechInIframe(action: SpeechAction & { onEnd?: () => void }): void {
    const iframe = document.querySelector<HTMLIFrameElement>('#speechSynthesisFrame');

    assert(iframe?.contentWindow, `iframe.contentWindow for "#speechSynthesisFrame" should be defined`);

    if (action.type === 'speak' && action.text && action.voice) {
      iframe.contentWindow.postMessage(
        {
          type: action.type,
          text: action.text,
          voice: action.voice.toLowerCase()
        },
        '*'
      );

      // Set up a listener for the 'previewEnded' message
      this.setupIframeMessageListener(action.onEnd);
    } else if (action.type === 'pause' || action.type === 'cancel') {
      iframe.contentWindow.postMessage({ type: action.type }, '*');
    } else {
      logError(new Error(`Invalid action or missing parameters for speak action: ${action}`));
    }
  }

  private setupIframeMessageListener(onEnd?: () => void): void {
    const messageHandler = (event: MessageEvent) => {
      if (event.data.type === 'previewEnded') {
        onEnd?.();
        // Remove the event listener after it's been called
        window.removeEventListener('message', messageHandler);
      }
    };

    window.addEventListener('message', messageHandler);
  }

  private speakUsingSpeechSynthesis(text: string, voice: string, onPreviewStateChange?: (isPlaying: boolean) => void) {
    this.controlSpeechInIframe({ type: 'cancel' });

    // TODO(overhaul)
    // delay the speak action to ensure the iframe is ready
    setTimeout(() => {
      this.controlSpeechInIframe({ type: 'speak', text, voice, onEnd: () => onPreviewStateChange?.(false) });
    }, 250);

    // Notify that preview has started
    onPreviewStateChange?.(true);
  }

  private stopSpeechSynthesis() {
    this.controlSpeechInIframe({ type: 'cancel' });
  }

  private play = (url: string, onPreviewStateChange?: (isPlaying: boolean) => void) => {
    this.audioPlayer.src = url;
    this.audioPlayer.play();
    // Notify that preview has started
    onPreviewStateChange?.(true);

    this.audioPlayer.onended = () => {
      onPreviewStateChange?.(false);
    };
  };

  public mapToVoiceInfo = (voiceSpec: VoiceSpecOfAvailableVoice, isCustomVoice: boolean = false): VoiceInfo => {
    const { VoiceSpec } = this.sdk.sdkModule;
    const voiceMeta = voiceSpec.toVoiceMetadata();
    const remoteLabels = voiceMeta.labels;
    const labels = isCustomVoice ? ['custom', ...(remoteLabels ? remoteLabels : [])] : remoteLabels;
    const name = this._mapIdQualifiedToCVLName.has(voiceSpec.idQualified) ? this._mapIdQualifiedToCVLName.get(voiceSpec.idQualified) : undefined;
    const engine = voiceMeta.engine;

    return {
      id: voiceSpec.idQualified,
      name,
      displayName: voiceSpec.displayName,
      isLocalVoice: voiceSpec instanceof VoiceSpec.LocalAvailable,
      thumbnail: voiceSpec.avatarUrl ?? this.getFlagFromLang(voiceSpec.languageCode!),
      labels,
      language: voiceSpec.languageCode ? voiceSpec.languageCode.split('-')[0] : 'en',
      country: voiceSpec.languageCode ? voiceSpec.languageCode.split('-')[1] : 'US',
      languageCodeWithLocale: voiceSpec.languageCode || 'en-US',
      gender: capitalize(voiceSpec.gender.name),
      isPremium: voiceSpec.isPremium,
      isCustomVoice,
      isSpeechifyVoice: voiceSpec instanceof VoiceSpec.Speechify,
      engine,
      rawName: get(voiceSpec, 'name', '')
    };
  };

  public playPreview = async (voiceInfo: Nullable<VoiceInfo>, onPreviewStateChange?: (isPlaying: boolean) => void) => {
    if (!voiceInfo) return;

    const voiceSpec = SDKVoiceFacade.singleton.getVoiceSpec(voiceInfo);
    const voiceMeta = voiceSpec.toVoiceMetadata();
    const isPersonalVoice = get(voiceSpec, 'name', '').startsWith('PVL:');
    const voiceName = isPersonalVoice ? get(voiceSpec, 'name', '') : voiceInfo.name;

    this.stopPreview(onPreviewStateChange);

    if (voiceMeta?.previewAudioUrl) {
      this.play(voiceMeta.previewAudioUrl, onPreviewStateChange);
    } else {
      const preview = this.getVoicePreviewForLanguage(voiceInfo.displayName, voiceInfo.languageCodeWithLocale, voiceInfo.gender.toLowerCase() as Gender);

      if (preview) {
        if (voiceInfo?.isLocalVoice) {
          this.speakUsingSpeechSynthesis(preview, voiceInfo.displayName.toLocaleLowerCase(), onPreviewStateChange);
        } else {
          const previewAudio = await getAudio(`<speak>${preview}</speak>`, {
            name: voiceName,
            engine: voiceMeta.engine,
            languageCode: voiceInfo.languageCodeWithLocale
          });

          const audioData = get(previewAudio, 'audioData');
          if (previewAudio && audioData) {
            const previewUrl = URL.createObjectURL(new Blob([audioData], { type: 'audio/ogg' }));
            this.play(previewUrl, onPreviewStateChange);
          }
        }
      }
    }
  };

  public stopPreview = (onPreviewStateChange?: (isPlaying: boolean) => void) => {
    this.audioPlayer.pause();
    this.stopSpeechSynthesis();

    // Notify that preview has stopped
    onPreviewStateChange?.(false);
  };

  public getAllVoices = async () => {
    return this._allVoicesPromise;
  };

  public getFallbackFreeVoiceInfo = (languageCode: string = 'en') => {
    const getFirstAvailableVoice = (languageCode: string): VoiceSpecOfAvailableVoice => {
      return this.localVoiceSpecs.find(voice => voice.languageCode?.startsWith(languageCode)) ?? this.localVoiceSpecs[0];
    };

    let voiceSpec: VoiceSpecOfAvailableVoice;
    if (languageCode === 'en') {
      const preferredFreeVoices = ENGLISH_FREE_FALLBACK_VOICE_PREFERENCES.map(preferredVoiceName =>
        this.localVoiceSpecs.find(voice => voice.displayName && voice.displayName.toLowerCase().includes(preferredVoiceName.toLowerCase()))
      ).filter((v): v is VoiceSpecOfAvailableVoice => Boolean(v));

      voiceSpec = preferredFreeVoices[0] ?? getFirstAvailableVoice('en');
    } else {
      // Fallback to English if the selected voice language does not have local counterpart
      voiceSpec = getFirstAvailableVoice(languageCode) || getFirstAvailableVoice('en');
    }

    return this.mapToVoiceInfo(voiceSpec);
  };
}
