onAudioEnd not work #699

jackiealex · 2023-07-25T09:22:24Z

Base information:
SDK version: 1.30.1
How to use the sdk: by script loader

Background:
How to know when the play of tts voice is end, i've tried the player.onAudioEnd hooks, that did not work. other methods can also be used as alternatives, i can not find any ways. you can see code comment —— NOT WORK

My Code:

/* eslint-disable */

const load = require('load-script')
const EventEmitter = require('eventemitter3')


const SPEECH_KEY = 'my key XXX'
const SPEECH_REGION = 'my region XXX'


// load promise
function loadPromise(url) {
  return new Promise((resolve, reject) => {
    load(url, function (err, script) {
      if (err) {
        reject(err)
      }
      else {
        resolve(script)
      }
    })
  })
}

export const StatusTypes = {
  CONNECTING: 'CONNECTING',
  OPEN: 'OPEN',
  SPEAKING: 'SPEAKING',
  CLOSING: 'CLOSING',
  CLOSED: 'CLOSED',
}

export default class TextToSpeech {
  isSDKReady = false
  SpeechSDK = null
  speaker = null
  status = StatusTypes.CLOSED
  constructor() {
    this.initEventEmitter()
    this.initSDK()
  }
  initEventEmitter() {
    this.ee = new EventEmitter();
  }
  on(name, fn) {
    this.ee.on(name, fn, {})
  }
  initSDK() {
    if (this.isSDKReady) {
      return;
    }
    loadPromise('/azure-1.30.1/microsoft.cognitiveservices.speech.sdk.bundle-min.js').then((err, script) => {
        this.isSDKReady = true
        this.SpeechSDK = window.SpeechSDK
     })
  }
  createSpeaker() {
    if (!this.isSDKReady) {
      return;
    }
    const speechConfig = this.SpeechSDK.SpeechConfig.fromSubscription(SPEECH_KEY, SPEECH_REGION);

    // The language of the voice that speaks.
    speechConfig.speechSynthesisVoiceName = "zh-CN-XiaoxiaoNeural";

    const player = new SpeechSDK.SpeakerAudioDestination();
    player.onAudioStart = function(_) {
      debugger
    }
    player.onAudioEnd = function (_) {
      debugger // NOT WORK
    };

    var audioConfig  = SpeechSDK.AudioConfig.fromSpeakerOutput(player);

    const synthesizer = new this.SpeechSDK.SpeechSynthesizer(speechConfig, audioConfig);
    return synthesizer
  }
  speak(text, cb) {
    if (!this.speaker) {
      this.speaker = this.createSpeaker()

      this.speaker.bookmarkReached = function (s, e) {
        var str = `BookmarkReached event: \
            \r\n\tAudioOffset: ${(e.audioOffset + 5000) / 10000}ms \
            \r\n\tText: \"${e.text}\".`;
        console.log(str);
      };

      this.speaker.synthesisCanceled = function (s, e) {
        console.log("SynthesisCanceled event");
      };

// NOT WORK
      this.speaker.synthesisCompleted = function (s, e) {
        var str = `SynthesisCompleted event: \
                    \r\n\tAudioData: ${e.result.audioData.byteLength} bytes \
                    \r\n\tAudioDuration: ${e.result.audioDuration}`;
        console.log(str);
      };

      this.speaker.synthesisStarted = function (s, e) {
        console.log("SynthesisStarted event");
      };

      this.speaker.synthesizing = function (s, e) {
        var str = `Synthesizing event: \
            \r\n\tAudioData: ${e.result.audioData.byteLength} bytes`;
        console.log(str);
      };

      this.speaker.visemeReceived = function (s, e) {
        var str = `VisemeReceived event: \
            \r\n\tAudioOffset: ${(e.audioOffset + 5000) / 10000}ms \
            \r\n\tVisemeId: ${e.visemeId}`;
        console.log(str);
      };

      this.speaker.wordBoundary = function (s, e) {
        // Word, Punctuation, or Sentence
        var str = `WordBoundary event: \
            \r\n\tBoundaryType: ${e.boundaryType} \
            \r\n\tAudioOffset: ${(e.audioOffset + 5000) / 10000}ms \
            \r\n\tDuration: ${e.duration} \
            \r\n\tText: \"${e.text}\" \
            \r\n\tTextOffset: ${e.textOffset} \
            \r\n\tWordLength: ${e.wordLength}`;
        console.log(str);
      };

    }
    if (this.status !== StatusTypes.SPEAKING) {
      this.setStatus(StatusTypes.SPEAKING)
      this.ee.emit('status-change', this.status)
    }



    this.speaker.speakTextAsync("嗯，好的",  (result) => {
      if (result.reason == ResultReason.SynthesizingAudioCompleted) {
        console.log("Speech synthesis completed");
        debugger;
      } else {
          console.log("Speech synthesis was cancelled. Error: " + result.errorDetails);
      }
      cb && cb(this.speaker)
    }, (err) => {
      console.trace("err - " + err);
      cb && cb(this.speaker)
    });


  }
  stop() {
    if (this.speaker) {
      this.setStatus(StatusTypes.CLOSED)
      this.ee.emit('status-change', this.status)
      this.speaker.close()
    }
  }
  setStatus(status) {
    this.status = status
  }
}

jackiealex · 2023-08-16T06:33:21Z

My solution: it helps the developers to know the timing of voice ending

const audioBuffer = this.audioContext.createBuffer(1, audioData.length, 22050)
const bufferSource = this.bufferSource = this.audioContext.createBufferSource()
bufferSource.buffer = audioBuffer
bufferSource.onended = event => {
     emit('status', 'end')// any ways to emit the event are ok
}

such code would help a lot, the developers may use it to synthesis many statements, once knowing the timing of statement complement, they will synthesis the next statement by producer-consumer model

jackiealex · 2023-08-16T06:34:30Z

@yulin-li

granik · 2024-04-30T18:09:40Z

@jackiealex, you have to execute .close() method inside the callback of speakTextAsync to make onAudioEnd event work.

See this stackoverflow issue: https://stackoverflow.com/questions/62564402/microsoft-cognitive-tts-onaudioend-event-not-working

P.S. I had the same problem and this solved it for me.

jackiealex mentioned this issue Jul 25, 2023

speakTextAsync SynthesizingAudioCompleted indicator is raised in beginning of audio instead of end #656

Open

glharper assigned yulin-li and glharper and unassigned glharper Jul 25, 2023

ralph-msft added the in review Acknowledged and being looked at now label Jul 25, 2023

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

onAudioEnd not work #699

onAudioEnd not work #699

jackiealex commented Jul 25, 2023 •

edited

jackiealex commented Aug 16, 2023 •

edited

jackiealex commented Aug 16, 2023

granik commented Apr 30, 2024

onAudioEnd not work #699

onAudioEnd not work #699

Comments

jackiealex commented Jul 25, 2023 • edited

jackiealex commented Aug 16, 2023 • edited

jackiealex commented Aug 16, 2023

granik commented Apr 30, 2024

jackiealex commented Jul 25, 2023 •

edited

jackiealex commented Aug 16, 2023 •

edited