use of com.ibm.watson.speech_to_text.v1.model.SpeechRecognitionResults in project java-sdk by watson-developer-cloud.
the class SpeechToTextTest method testRecognize.
/**
* Test recognize.
*
* @throws URISyntaxException the URI syntax exception
* @throws InterruptedException the interrupted exception
*/
@Test
public void testRecognize() throws URISyntaxException, InterruptedException, FileNotFoundException {
server.enqueue(new MockResponse().addHeader(CONTENT_TYPE, HttpMediaType.APPLICATION_JSON).setBody(GSON.toJson(recognitionResults)));
RecognizeOptions recognizeOptions = new RecognizeOptions.Builder().audio(SAMPLE_WAV).contentType(RecognizeOptions.ContentType.AUDIO_WAV).build();
final SpeechRecognitionResults result = service.recognize(recognizeOptions).execute();
final RecordedRequest request = server.takeRequest();
assertNotNull(result);
assertEquals(result, recognitionResults);
assertEquals("POST", request.getMethod());
assertEquals(PATH_RECOGNIZE, request.getPath());
assertEquals(HttpMediaType.AUDIO_WAV, request.getHeader(CONTENT_TYPE));
}
use of com.ibm.watson.speech_to_text.v1.model.SpeechRecognitionResults in project java-sdk by watson-developer-cloud.
the class SpeechToTextTest method testRecognizeWithSpeakerLabels.
/**
* Test diarization.
*
* @throws URISyntaxException the URI syntax exception
* @throws InterruptedException the interrupted exception
* @throws FileNotFoundException the file not found exception
*/
@Test
public void testRecognizeWithSpeakerLabels() throws URISyntaxException, InterruptedException, FileNotFoundException {
FileInputStream jsonFile = new FileInputStream("src/test/resources/speech_to_text/diarization.json");
String diarizationStr = getStringFromInputStream(jsonFile);
JsonObject diarization = new JsonParser().parse(diarizationStr).getAsJsonObject();
server.enqueue(new MockResponse().addHeader(CONTENT_TYPE, HttpMediaType.APPLICATION_JSON).setBody(diarizationStr));
RecognizeOptions recognizeOptions = new RecognizeOptions.Builder().audio(SAMPLE_WAV).contentType(RecognizeOptions.ContentType.AUDIO_WAV).speakerLabels(true).build();
SpeechRecognitionResults result = service.recognize(recognizeOptions).execute();
final RecordedRequest request = server.takeRequest();
assertEquals("POST", request.getMethod());
assertEquals(PATH_RECOGNIZE + "?speaker_labels=true", request.getPath());
assertEquals(diarization.toString(), GSON.toJsonTree(result).toString());
}
use of com.ibm.watson.speech_to_text.v1.model.SpeechRecognitionResults in project java-sdk by watson-developer-cloud.
the class SpeechToTextTest method testRecognizeWithCustomization.
/**
* Test recognize with customization.
*
* @throws FileNotFoundException the file not found exception
* @throws InterruptedException the interrupted exception
*/
@Test
public void testRecognizeWithCustomization() throws FileNotFoundException, InterruptedException {
String id = "foo";
String version = "version";
String recString = getStringFromInputStream(new FileInputStream("src/test/resources/speech_to_text/recognition.json"));
JsonObject recognition = new JsonParser().parse(recString).getAsJsonObject();
server.enqueue(new MockResponse().addHeader(CONTENT_TYPE, HttpMediaType.APPLICATION_JSON).setBody(recString));
RecognizeOptions recognizeOptions = new RecognizeOptions.Builder().audio(SAMPLE_WAV).contentType(RecognizeOptions.ContentType.AUDIO_WAV).customizationId(id).version(version).build();
SpeechRecognitionResults result = service.recognize(recognizeOptions).execute();
final RecordedRequest request = server.takeRequest();
assertEquals("POST", request.getMethod());
assertEquals(PATH_RECOGNIZE + "?customization_id=" + id + "&version=" + version, request.getPath());
assertEquals(recognition, GSON.toJsonTree(result));
}
use of com.ibm.watson.speech_to_text.v1.model.SpeechRecognitionResults in project java-sdk by watson-developer-cloud.
the class SpeechToText method recognize.
/**
* Recognizes an audio file and returns {@link SpeechRecognitionResults}.<br>
* <br>
* Here is an example of how to recognize an audio file:
*
* <pre>
* SpeechToText service = new SpeechToText();
* service.setUsernameAndPassword("USERNAME", "PASSWORD");
* service.setEndPoint("SERVICE_URL");
*
* RecognizeOptions options = new RecognizeOptions().maxAlternatives(3).continuous(true);
*
* File audio = new File("sample1.wav");
*
* SpeechResults results = service.recognize(audio, options).execute();
* System.out.println(results);
* </pre>
*
* @param recognizeOptions the recognize options
* @return the {@link SpeechRecognitionResults}
*/
public ServiceCall<SpeechRecognitionResults> recognize(RecognizeOptions recognizeOptions) {
String[] pathSegments = { "v1/recognize" };
RequestBuilder builder = RequestBuilder.post(RequestBuilder.constructHttpUrl(getEndPoint(), pathSegments));
if (recognizeOptions != null) {
if (recognizeOptions.contentType() != null) {
builder.header("Content-Type", recognizeOptions.contentType());
}
if (recognizeOptions.model() != null) {
builder.query("model", recognizeOptions.model());
}
if (recognizeOptions.customizationId() != null) {
builder.query("customization_id", recognizeOptions.customizationId());
}
if (recognizeOptions.acousticCustomizationId() != null) {
builder.query("acoustic_customization_id", recognizeOptions.acousticCustomizationId());
}
if (recognizeOptions.customizationWeight() != null) {
builder.query("customization_weight", String.valueOf(recognizeOptions.customizationWeight()));
}
if (recognizeOptions.version() != null) {
builder.query("version", recognizeOptions.version());
}
if (recognizeOptions.inactivityTimeout() != null) {
builder.query("inactivity_timeout", String.valueOf(recognizeOptions.inactivityTimeout()));
}
if (recognizeOptions.keywords() != null) {
builder.query("keywords", RequestUtils.join(recognizeOptions.keywords(), ","));
}
if (recognizeOptions.keywordsThreshold() != null) {
builder.query("keywords_threshold", String.valueOf(recognizeOptions.keywordsThreshold()));
}
if (recognizeOptions.maxAlternatives() != null) {
builder.query("max_alternatives", String.valueOf(recognizeOptions.maxAlternatives()));
}
if (recognizeOptions.wordAlternativesThreshold() != null) {
builder.query("word_alternatives_threshold", String.valueOf(recognizeOptions.wordAlternativesThreshold()));
}
if (recognizeOptions.wordConfidence() != null) {
builder.query("word_confidence", String.valueOf(recognizeOptions.wordConfidence()));
}
if (recognizeOptions.timestamps() != null) {
builder.query("timestamps", String.valueOf(recognizeOptions.timestamps()));
}
if (recognizeOptions.profanityFilter() != null) {
builder.query("profanity_filter", String.valueOf(recognizeOptions.profanityFilter()));
}
if (recognizeOptions.smartFormatting() != null) {
builder.query("smart_formatting", String.valueOf(recognizeOptions.smartFormatting()));
}
if (recognizeOptions.speakerLabels() != null) {
builder.query("speaker_labels", String.valueOf(recognizeOptions.speakerLabels()));
}
if (recognizeOptions.audio() != null) {
builder.body(InputStreamRequestBody.create(MediaType.parse(recognizeOptions.contentType()), recognizeOptions.audio()));
}
}
return createServiceCall(builder.build(), ResponseConverterUtils.getObject(SpeechRecognitionResults.class));
}
use of com.ibm.watson.speech_to_text.v1.model.SpeechRecognitionResults in project java-sdk by watson-developer-cloud.
the class RecognizeUsingWebSocketsWithSpeakerLabelsExample method main.
/**
* The main method.
*
* @param args the arguments
* @throws FileNotFoundException the file not found exception
* @throws InterruptedException the interrupted exception
*/
public static void main(String[] args) throws FileNotFoundException, InterruptedException {
FileInputStream audio = new FileInputStream("src/test/resources/speech_to_text/twospeakers.wav");
Authenticator authenticator = new IamAuthenticator("<iam_api_key>");
SpeechToText service = new SpeechToText(authenticator);
RecognizeWithWebsocketsOptions options = new RecognizeWithWebsocketsOptions.Builder().audio(audio).interimResults(true).speakerLabels(true).model(RecognizeOptions.Model.EN_US_NARROWBANDMODEL).contentType(HttpMediaType.AUDIO_WAV).build();
RecoTokens recoTokens = new RecoTokens();
service.recognizeUsingWebSocket(options, new BaseRecognizeCallback() {
@Override
public void onTranscription(SpeechRecognitionResults speechResults) {
recoTokens.add(speechResults);
}
@Override
public void onDisconnected() {
lock.countDown();
}
});
lock.await(1, TimeUnit.MINUTES);
}
Aggregations