Skip to content

Commit a009df5

Browse files
committed
feat(SpeechToText): Add support for EndOfPhraseSilenceTime and SplitTranscriptAtPhraseEnd to STT web
1 parent 0cf594a commit a009df5

File tree

2 files changed

+35
-1
lines changed

2 files changed

+35
-1
lines changed

Examples/ExampleStreaming.cs

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/**
2-
* (C) Copyright IBM Corp. 2015, 2019.
2+
* (C) Copyright IBM Corp. 2015, 2020.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -109,6 +109,7 @@ public bool Active
109109
_service.SmartFormatting = true;
110110
_service.SpeakerLabels = false;
111111
_service.WordAlternativesThreshold = null;
112+
_service.EndOfPhraseSilenceTime = null;
112113
_service.StartListening(OnRecognize, OnRecognizeSpeaker);
113114
}
114115
else if (!value && _service.IsListening)

Scripts/Services/SpeechToText/V1/SpeechToTextServiceExtension.cs

+33
Original file line numberDiff line numberDiff line change
@@ -257,6 +257,35 @@ public float SilenceThreshold
257257
/// (optional)
258258
/// </summary>
259259
public float? ProcessingMetricsInterval { get; set; }
260+
/// <summary>
261+
/// If `true`, specifies the duration of the pause service splits a transcript into multiple final results.
262+
/// If the service detects pauses or extended silence
263+
/// before it reaches the end of the audio stream, its response can include multiple final results. Silence
264+
/// indicates a point at which the speaker pauses between spoken words or phrases.
265+
///
266+
/// Specify a value for the pause interval in the range of 0.0 to 120.0.
267+
/// * A value greater than 0 specifies the interval that the service is to use for speech recognition.
268+
/// * A value of 0 indicates that the service is to use the default interval. It is equivalent to omitting the
269+
/// parameter.
270+
///
271+
/// The default pause interval for most languages is 0.8 seconds; the default for Chinese is 0.6 seconds.
272+
///
273+
/// See [End of phrase silence
274+
/// time](https://cloud.ibm.com/docs/services/speech-to-text?topic=speech-to-text-output#silence_time).
275+
/// <summary>
276+
public double? EndOfPhraseSilenceTime { get; set; }
277+
/// <summary>
278+
/// If `true`, directs the service to split the transcript into
279+
/// multiple final results based on semantic features of the input, for example, at the conclusion of meaningful
280+
/// phrases such as sentences. The service bases its understanding of semantic features on the base language
281+
/// model that you use with a request. Custom language models and grammars can also influence how and where the
282+
/// service splits a transcript. By default, the service splits transcripts based solely on the pause interval.
283+
///
284+
/// See [Split transcript at phrase
285+
/// end](https://cloud.ibm.com/docs/services/speech-to-text?topic=speech-to-text-output#split_transcript).
286+
/// (optional, default to false)
287+
/// <summary>
288+
public bool? SplitTranscriptAtPhraseEnd { get; set; }
260289
#endregion
261290

262291
#region Sessionless - Streaming
@@ -498,6 +527,10 @@ private void SendStart()
498527
start["grammar_name"] = GrammarName;
499528
if (Redaction != null)
500529
start["redaction"] = Redaction;
530+
if (EndOfPhraseSilenceTime != null)
531+
start["end_of_phrase_silence_time"] = EndOfPhraseSilenceTime;
532+
if (SplitTranscriptAtPhraseEnd != null)
533+
start["split_transcript_at_phrase_end"] = SplitTranscriptAtPhraseEnd;
501534
start["processing_metrics"] = ProcessingMetrics;
502535
start["processing_metrics_interval"] = ProcessingMetricsInterval;
503536

0 commit comments

Comments
 (0)