use of com.ibm.watson.developer_cloud.speech_to_text.v1.model.RecognizeOptions in project java-sdk by watson-developer-cloud.
the class SpeechToText method recognizeUsingWebSocket.
/**
* Sends audio and returns transcription results for recognition requests over a WebSocket connection. Requests and
* responses are enabled over a single TCP connection that abstracts much of the complexity of the request to offer
* efficient implementation, low latency, high throughput, and an asynchronous response. By default, only final
* results are returned for any request; to enable interim results, set the interimResults parameter to true.
*
* The service imposes a data size limit of 100 MB per utterance (per recognition request). You can send multiple
* utterances over a single WebSocket connection. The service automatically detects the endianness of the incoming
* audio and, for audio that includes multiple channels, downmixes the audio to one-channel mono during transcoding.
* (For the audio/l16 format, you can specify the endianness.)
*
* @param recognizeOptions the recognize options
* @param callback the {@link RecognizeCallback} instance where results will be sent
* @return the {@link WebSocket}
*/
public WebSocket recognizeUsingWebSocket(RecognizeOptions recognizeOptions, RecognizeCallback callback) {
Validator.notNull(recognizeOptions, "recognizeOptions cannot be null");
Validator.notNull(recognizeOptions.audio(), "audio cannot be null");
Validator.notNull(callback, "callback cannot be null");
HttpUrl.Builder urlBuilder = HttpUrl.parse(getEndPoint() + "/v1/recognize").newBuilder();
if (recognizeOptions.model() != null) {
urlBuilder.addQueryParameter("model", recognizeOptions.model());
}
if (recognizeOptions.customizationId() != null) {
urlBuilder.addQueryParameter("customization_id", recognizeOptions.customizationId());
}
if (recognizeOptions.acousticCustomizationId() != null) {
urlBuilder.addQueryParameter("acoustic_customization_id", recognizeOptions.acousticCustomizationId());
}
if (recognizeOptions.version() != null) {
urlBuilder.addQueryParameter("version", recognizeOptions.version());
}
if (recognizeOptions.customizationWeight() != null) {
urlBuilder.addQueryParameter("customization_weight", String.valueOf(recognizeOptions.customizationWeight()));
}
String url = urlBuilder.toString().replace("https://", "wss://");
Request.Builder builder = new Request.Builder().url(url);
setAuthentication(builder);
setDefaultHeaders(builder);
OkHttpClient client = configureHttpClient();
return client.newWebSocket(builder.build(), new SpeechToTextWebSocketListener(recognizeOptions, callback));
}
use of com.ibm.watson.developer_cloud.speech_to_text.v1.model.RecognizeOptions in project java-sdk by watson-developer-cloud.
the class CustomizationExample method main.
/**
* The main method.
*
* @param args the arguments
* @throws InterruptedException the interrupted exception
*/
public static void main(String[] args) throws InterruptedException {
SpeechToText service = new SpeechToText();
service.setUsernameAndPassword("<username>", "<password>");
// Create language model
CreateLanguageModelOptions createOptions = new CreateLanguageModelOptions.Builder().name("IEEE-permanent").baseModelName("en-US_BroadbandModel").description("My customization").build();
LanguageModel myModel = service.createLanguageModel(createOptions).execute();
String id = myModel.getCustomizationId();
try {
// Add a corpus file to the model
AddCorpusOptions addOptions = new AddCorpusOptions.Builder().customizationId(id).corpusName("corpus-1").corpusFile(new File(CORPUS_FILE)).corpusFileContentType(HttpMediaType.TEXT_PLAIN).allowOverwrite(false).build();
service.addCorpus(addOptions).execute();
// Get corpus status
GetCorpusOptions getOptions = new GetCorpusOptions.Builder().customizationId(id).corpusName("corpus-1").build();
for (int x = 0; x < 30 && (service.getCorpus(getOptions).execute()).getStatus() != Status.ANALYZED; x++) {
Thread.sleep(5000);
}
// Get all corpora
ListCorporaOptions listCorporaOptions = new ListCorporaOptions.Builder().customizationId(id).build();
Corpora corpora = service.listCorpora(listCorporaOptions).execute();
System.out.println(corpora);
// Get specific corpus
Corpus corpus = service.getCorpus(getOptions).execute();
System.out.println(corpus);
// Now add some user words to the custom model
service.addWord(new AddWordOptions.Builder().customizationId(id).wordName("IEEE").word("IEEE").displayAs("IEEE").addSoundsLike("I. triple E.").build()).execute();
service.addWord(new AddWordOptions.Builder().customizationId(id).wordName("hhonors").word("hhonors").displayAs("IEEE").addSoundsLike("H. honors").addSoundsLike("Hilton honors").build()).execute();
// Display all words in the words resource (OOVs from the corpus and
// new words just added) in ascending alphabetical order
ListWordsOptions listWordsAlphabeticalOptions = new ListWordsOptions.Builder().customizationId(id).wordType(ListWordsOptions.WordType.ALL).build();
Words words = service.listWords(listWordsAlphabeticalOptions).execute();
System.out.println("\nASCENDING ALPHABETICAL ORDER:");
System.out.println(words);
// Then display all words in the words resource in descending order
// by count
ListWordsOptions listWordsCountOptions = new ListWordsOptions.Builder().customizationId(id).wordType(ListWordsOptions.WordType.ALL).sort("-" + ListWordsOptions.Sort.COUNT).build();
words = service.listWords(listWordsCountOptions).execute();
System.out.println("\nDESCENDING ORDER BY COUNT:");
System.out.println(words);
// Now start training of the model
TrainLanguageModelOptions trainOptions = new TrainLanguageModelOptions.Builder().customizationId(id).wordTypeToAdd(TrainLanguageModelOptions.WordTypeToAdd.ALL).build();
service.trainLanguageModel(trainOptions).execute();
for (int x = 0; x < 30 && myModel.getStatus() != LanguageModel.Status.AVAILABLE; x++) {
GetLanguageModelOptions getOptions = new GetLanguageModelOptions.Builder().customizationId(id).build();
myModel = service.getLanguageModel(getOptions).execute();
Thread.sleep(10000);
}
File audio = new File(AUDIO_FILE);
RecognizeOptions recognizeOptionsWithModel = new RecognizeOptions.Builder().model(RecognizeOptions.EN_US_BROADBANDMODEL).customizationId(id).audio(audio).contentType(HttpMediaType.AUDIO_WAV).build();
RecognizeOptions recognizeOptionsWithoutModel = new RecognizeOptions.Builder().model(RecognizeOptions.EN_US_BROADBANDMODEL).audio(audio).contentType(HttpMediaType.AUDIO_WAV).build();
// First decode WITHOUT the custom model
SpeechRecognitionResults transcript = service.recognize(recognizeOptionsWithoutModel).execute();
System.out.println(transcript);
// Now decode with the custom model
transcript = service.recognize(recognizeOptionsWithModel).execute();
System.out.println(transcript);
} finally {
DeleteLanguageModelOptions deleteOptions = new DeleteLanguageModelOptions.Builder().customizationId(id).build();
service.deleteLanguageModel(deleteOptions).execute();
}
}
use of com.ibm.watson.developer_cloud.speech_to_text.v1.model.RecognizeOptions in project java-sdk by watson-developer-cloud.
the class MicrophoneWithWebSocketsExample method main.
/**
* The main method.
*
* @param args the arguments
* @throws Exception the exception
*/
public static void main(final String[] args) throws Exception {
SpeechToText service = new SpeechToText();
service.setUsernameAndPassword("<username>", "<password>");
// Signed PCM AudioFormat with 16kHz, 16 bit sample size, mono
int sampleRate = 16000;
AudioFormat format = new AudioFormat(sampleRate, 16, 1, true, false);
DataLine.Info info = new DataLine.Info(TargetDataLine.class, format);
if (!AudioSystem.isLineSupported(info)) {
System.out.println("Line not supported");
System.exit(0);
}
TargetDataLine line = (TargetDataLine) AudioSystem.getLine(info);
line.open(format);
line.start();
AudioInputStream audio = new AudioInputStream(line);
RecognizeOptions options = new RecognizeOptions.Builder().audio(audio).interimResults(true).timestamps(true).wordConfidence(true).contentType(HttpMediaType.AUDIO_RAW + ";rate=" + sampleRate).build();
service.recognizeUsingWebSocket(options, new BaseRecognizeCallback() {
@Override
public void onTranscription(SpeechRecognitionResults speechResults) {
System.out.println(speechResults);
}
});
System.out.println("Listening to your voice for the next 30s...");
Thread.sleep(30 * 1000);
// closing the WebSockets underlying InputStream will close the WebSocket itself.
line.stop();
line.close();
System.out.println("Fin.");
}
use of com.ibm.watson.developer_cloud.speech_to_text.v1.model.RecognizeOptions in project java-sdk by watson-developer-cloud.
the class RecognizeUsingWebSocketsExample method main.
public static void main(String[] args) throws FileNotFoundException, InterruptedException {
SpeechToText service = new SpeechToText();
service.setUsernameAndPassword("<username>", "<password>");
FileInputStream audio = new FileInputStream("src/test/resources/speech_to_text/sample1.wav");
RecognizeOptions options = new RecognizeOptions.Builder().audio(audio).interimResults(true).contentType(HttpMediaType.AUDIO_WAV).build();
service.recognizeUsingWebSocket(options, new BaseRecognizeCallback() {
@Override
public void onTranscription(SpeechRecognitionResults speechResults) {
System.out.println(speechResults);
}
@Override
public void onDisconnected() {
lock.countDown();
}
});
lock.await(1, TimeUnit.MINUTES);
}
use of com.ibm.watson.developer_cloud.speech_to_text.v1.model.RecognizeOptions in project java-sdk by watson-developer-cloud.
the class RecognizeUsingWebSocketsWithSpeakerLabelsExample method main.
/**
* The main method.
*
* @param args the arguments
* @throws FileNotFoundException the file not found exception
* @throws InterruptedException the interrupted exception
*/
public static void main(String[] args) throws FileNotFoundException, InterruptedException {
FileInputStream audio = new FileInputStream("src/test/resources/speech_to_text/twospeakers.wav");
SpeechToText service = new SpeechToText();
service.setUsernameAndPassword("<username>", "<password>");
RecognizeOptions options = new RecognizeOptions.Builder().audio(audio).interimResults(true).speakerLabels(true).model(RecognizeOptions.EN_US_NARROWBANDMODEL).contentType(HttpMediaType.AUDIO_WAV).build();
RecoTokens recoTokens = new RecoTokens();
service.recognizeUsingWebSocket(options, new BaseRecognizeCallback() {
@Override
public void onTranscription(SpeechRecognitionResults speechResults) {
recoTokens.add(speechResults);
}
@Override
public void onDisconnected() {
lock.countDown();
}
});
lock.await(1, TimeUnit.MINUTES);
}
Aggregations