const MAX_DECIBELS = -10;
const MIN_DECIBELS = -45;
// mp4 appears to be the most widely supported audio format
// and seems to work on Chrome and Safari
const AUDIO_FILE_TYPE = "audio/mp4";

let count = 0;

/**
 * Helper class to interact with browser recording APIs to record
 * longer voice inputs for transcription/dictation that are manually
 * started and stopped by the user. (Versus VoiceInputRecorder which
 * is automatically started and stopped by the user's speaking.)
 *
 * Unlike VoiceInputRecorder, this class also does not do any silence
 * trimming because that would be fairly expensive to do for the
 * longer recordings we're expecting here.
 *
 * This class uses the WebAudio API configured in the following way,
 * using the style of the WebAudio API routing diagrams
 * https://developer.mozilla.org/en-US/docs/Web/API/Web_Audio_API
 *
 *  AudioContext
 * ┌───────────────────────────────────────────┐
 * │  ┌───────────┐  ┌───────────┐  ┌────────┐ │
 * │  │Mic Source ├─►│Voice Band ├─►│Analyser│ │
 * │  │Stream Node│  │Filter Node│  │Node    │ │
 * │  └───────────┘  └───────────┘  └───┬────┘ │
 * │       ┌────────────────────────────┘      │
 * │  ┌────▼──────┐                            │
 * │  │Destination│                            │
 * │  │Stream Node│                            │
 * │  └────┬──────┘                            │
 * └───────┼───────────────────────────────────┘
 *   ┌─────▼─────────┐   ┌─────────┐
 *   │               │   │Untrimmed│
 *   │ MediaRecorder ┼──►│   mp4   │
 *   │               │   │Recording│
 *   └───────────────┘   └─────────┘
 *
 * In words, this class sets up a MediaStream from the user's microphone
 * and routes it through a bandpass filter to isolate the human voice vocal
 * range so we can reject background noise.
 *
 * The filtered audio is then passed to an AnalyserNode to detect when
 * the user is speaking. When the user stops speaking, we use whatever the
 * MediaRecorder has captured thus far to create a new File object with
 * the audio data.
 *
 */
export default class TranscriptionRecorder {
  /**
   * Private constructor to create a new TranscriptionRecorder instance.
   */
  constructor() {
    this.recordingActive = false;
    this.id = count;
    count += 1;
  }

  /**
   * Async factory method to create a new TranscriptionRecorder instance.
   * Await this method instead of calling the constructor directly.
   * When the promise resolves, the recorder is listening for voice input.
   *
   * @returns {Promise<TranscriptionRecorder>}
   */
  static createRecorder = () => new TranscriptionRecorder().initialize();

  static supportsAudioRecording = () => !!navigator?.mediaDevices?.getUserMedia;

  /**
   * Manually stop recording audio entirely.
   *
   * @returns {Promise<File>} A promise that resolves to the final audio file.
   */
  stop = () => {
    if (this.animationFrame) {
      window.cancelAnimationFrame(this.animationFrame);
    }

    const stoppingPromise = new Promise(resolve => {
      this.stoppingResolve = resolve;
    });
    this.microphoneMediaStream.getTracks().forEach(track => track.stop());
    this.mediaRecorder.stop();

    return stoppingPromise;
  };

  pause = () => this.mediaRecorder.pause();

  resume = () => this.mediaRecorder.resume();

  //
  // private methods
  //

  initialize = async () => {
    // start listening. this also triggers the user to accept the browser
    // permissions if necessary.
    this.microphoneMediaStream = await navigator.mediaDevices.getUserMedia({
      audio: true,
      video: false,
    });

    this.startProcessingStream();
    this.createMediaRecorder();

    return this;
  };

  /**
   * Create a new MediaRecorder instance with the appropriate
   * event handlers and start recording audio.
   */
  createMediaRecorder = () => {
    let recordingFile;

    this.mediaRecorder = new MediaRecorder(this.analyzedStream, {
      mimeType: AUDIO_FILE_TYPE,
    });

    // the data only ever arrives in one chunk, so set up a handler
    // that captures it and wraps it in a File object
    this.mediaRecorder.addEventListener("dataavailable", e => {
      const blob = new Blob([e.data], { type: AUDIO_FILE_TYPE });
      recordingFile = new File([blob], `file${Date.now()}.mp4`);
    });

    // stop event can happen either because our analyser detected silence
    // or because we've been manually stopped via the stop() method.
    this.mediaRecorder.addEventListener("stop", () => {
      this.stoppingResolve?.(recordingFile);
    });

    // start recording upon creation
    this.mediaRecoderStartTime = performance.now();
    this.mediaRecorder.start();
  };

  /**
   * Set up the WebAudio API to process the microphone stream. This
   * includes setting up a bandpass filter to isolate the human voice
   * vocal range and an analyser node to detect when the user is speaking.
   */
  startProcessingStream = () => {
    this.audioContext = new AudioContext();
    const audioStreamSource = this.audioContext.createMediaStreamSource(
      this.microphoneMediaStream
    );

    const voicePassFilter = this.audioContext.createBiquadFilter();
    voicePassFilter.type = "bandpass";
    // Calculated here for the 300-3400 Hz range, which wikipedia says is the
    // "narrow" band for human speech.
    // https://www.learningaboutelectronics.com/Articles/Quality-factor-calculator.php#answer
    voicePassFilter.frequency.value = 1550;
    voicePassFilter.Q.value = 0.325790481883;
    audioStreamSource.connect(voicePassFilter);

    // the analyser will allow us to inspect the audio stream
    // to detect when the user is speaking
    this.analyser = this.audioContext.createAnalyser();
    this.analyser.maxDecibels = MAX_DECIBELS;
    this.analyser.minDecibels = MIN_DECIBELS;
    audioStreamSource.connect(this.analyser);

    const bufferLength = this.analyser.frequencyBinCount;
    this.domainData = new Uint8Array(bufferLength);

    const destination = this.audioContext.createMediaStreamDestination();
    audioStreamSource.connect(destination);
    this.analyzedStream = destination.stream;
  };
}
