Search in sources :

Example 21 with SpeechRecognitionResults

use of com.ibm.watson.speech_to_text.v1.model.SpeechRecognitionResults in project java-sdk by watson-developer-cloud.

the class SpeechToTextTest method testRecognize.

/**
 * Test recognize.
 *
 * @throws URISyntaxException the URI syntax exception
 * @throws InterruptedException the interrupted exception
 */
@Test
public void testRecognize() throws URISyntaxException, InterruptedException, FileNotFoundException {
    server.enqueue(new MockResponse().addHeader(CONTENT_TYPE, HttpMediaType.APPLICATION_JSON).setBody(GSON.toJson(recognitionResults)));
    RecognizeOptions recognizeOptions = new RecognizeOptions.Builder().audio(SAMPLE_WAV).contentType(RecognizeOptions.ContentType.AUDIO_WAV).build();
    final SpeechRecognitionResults result = service.recognize(recognizeOptions).execute();
    final RecordedRequest request = server.takeRequest();
    assertNotNull(result);
    assertEquals(result, recognitionResults);
    assertEquals("POST", request.getMethod());
    assertEquals(PATH_RECOGNIZE, request.getPath());
    assertEquals(HttpMediaType.AUDIO_WAV, request.getHeader(CONTENT_TYPE));
}
Also used : RecordedRequest(okhttp3.mockwebserver.RecordedRequest) MockResponse(okhttp3.mockwebserver.MockResponse) SpeechRecognitionResults(com.ibm.watson.developer_cloud.speech_to_text.v1.model.SpeechRecognitionResults) RecognizeOptions(com.ibm.watson.developer_cloud.speech_to_text.v1.model.RecognizeOptions) WatsonServiceUnitTest(com.ibm.watson.developer_cloud.WatsonServiceUnitTest) Test(org.junit.Test)

Example 22 with SpeechRecognitionResults

use of com.ibm.watson.speech_to_text.v1.model.SpeechRecognitionResults in project java-sdk by watson-developer-cloud.

the class SpeechToTextTest method testRecognizeWithSpeakerLabels.

/**
 * Test diarization.
 *
 * @throws URISyntaxException the URI syntax exception
 * @throws InterruptedException the interrupted exception
 * @throws FileNotFoundException the file not found exception
 */
@Test
public void testRecognizeWithSpeakerLabels() throws URISyntaxException, InterruptedException, FileNotFoundException {
    FileInputStream jsonFile = new FileInputStream("src/test/resources/speech_to_text/diarization.json");
    String diarizationStr = getStringFromInputStream(jsonFile);
    JsonObject diarization = new JsonParser().parse(diarizationStr).getAsJsonObject();
    server.enqueue(new MockResponse().addHeader(CONTENT_TYPE, HttpMediaType.APPLICATION_JSON).setBody(diarizationStr));
    RecognizeOptions recognizeOptions = new RecognizeOptions.Builder().audio(SAMPLE_WAV).contentType(RecognizeOptions.ContentType.AUDIO_WAV).speakerLabels(true).build();
    SpeechRecognitionResults result = service.recognize(recognizeOptions).execute();
    final RecordedRequest request = server.takeRequest();
    assertEquals("POST", request.getMethod());
    assertEquals(PATH_RECOGNIZE + "?speaker_labels=true", request.getPath());
    assertEquals(diarization.toString(), GSON.toJsonTree(result).toString());
}
Also used : RecordedRequest(okhttp3.mockwebserver.RecordedRequest) MockResponse(okhttp3.mockwebserver.MockResponse) JsonObject(com.google.gson.JsonObject) ByteString(okio.ByteString) FileInputStream(java.io.FileInputStream) SpeechRecognitionResults(com.ibm.watson.developer_cloud.speech_to_text.v1.model.SpeechRecognitionResults) JsonParser(com.google.gson.JsonParser) RecognizeOptions(com.ibm.watson.developer_cloud.speech_to_text.v1.model.RecognizeOptions) WatsonServiceUnitTest(com.ibm.watson.developer_cloud.WatsonServiceUnitTest) Test(org.junit.Test)

Example 23 with SpeechRecognitionResults

use of com.ibm.watson.speech_to_text.v1.model.SpeechRecognitionResults in project java-sdk by watson-developer-cloud.

the class SpeechToTextTest method testRecognizeWithCustomization.

/**
 * Test recognize with customization.
 *
 * @throws FileNotFoundException the file not found exception
 * @throws InterruptedException the interrupted exception
 */
@Test
public void testRecognizeWithCustomization() throws FileNotFoundException, InterruptedException {
    String id = "foo";
    String version = "version";
    String recString = getStringFromInputStream(new FileInputStream("src/test/resources/speech_to_text/recognition.json"));
    JsonObject recognition = new JsonParser().parse(recString).getAsJsonObject();
    server.enqueue(new MockResponse().addHeader(CONTENT_TYPE, HttpMediaType.APPLICATION_JSON).setBody(recString));
    RecognizeOptions recognizeOptions = new RecognizeOptions.Builder().audio(SAMPLE_WAV).contentType(RecognizeOptions.ContentType.AUDIO_WAV).customizationId(id).version(version).build();
    SpeechRecognitionResults result = service.recognize(recognizeOptions).execute();
    final RecordedRequest request = server.takeRequest();
    assertEquals("POST", request.getMethod());
    assertEquals(PATH_RECOGNIZE + "?customization_id=" + id + "&version=" + version, request.getPath());
    assertEquals(recognition, GSON.toJsonTree(result));
}
Also used : RecordedRequest(okhttp3.mockwebserver.RecordedRequest) MockResponse(okhttp3.mockwebserver.MockResponse) JsonObject(com.google.gson.JsonObject) ByteString(okio.ByteString) FileInputStream(java.io.FileInputStream) SpeechRecognitionResults(com.ibm.watson.developer_cloud.speech_to_text.v1.model.SpeechRecognitionResults) JsonParser(com.google.gson.JsonParser) RecognizeOptions(com.ibm.watson.developer_cloud.speech_to_text.v1.model.RecognizeOptions) WatsonServiceUnitTest(com.ibm.watson.developer_cloud.WatsonServiceUnitTest) Test(org.junit.Test)

Example 24 with SpeechRecognitionResults

use of com.ibm.watson.speech_to_text.v1.model.SpeechRecognitionResults in project java-sdk by watson-developer-cloud.

the class SpeechToText method recognize.

/**
 * Recognizes an audio file and returns {@link SpeechRecognitionResults}.<br>
 * <br>
 * Here is an example of how to recognize an audio file:
 *
 * <pre>
 * SpeechToText service = new SpeechToText();
 * service.setUsernameAndPassword(&quot;USERNAME&quot;, &quot;PASSWORD&quot;);
 * service.setEndPoint(&quot;SERVICE_URL&quot;);
 *
 * RecognizeOptions options = new RecognizeOptions().maxAlternatives(3).continuous(true);
 *
 * File audio = new File(&quot;sample1.wav&quot;);
 *
 * SpeechResults results = service.recognize(audio, options).execute();
 * System.out.println(results);
 * </pre>
 *
 * @param recognizeOptions the recognize options
 * @return the {@link SpeechRecognitionResults}
 */
public ServiceCall<SpeechRecognitionResults> recognize(RecognizeOptions recognizeOptions) {
    String[] pathSegments = { "v1/recognize" };
    RequestBuilder builder = RequestBuilder.post(RequestBuilder.constructHttpUrl(getEndPoint(), pathSegments));
    if (recognizeOptions != null) {
        if (recognizeOptions.contentType() != null) {
            builder.header("Content-Type", recognizeOptions.contentType());
        }
        if (recognizeOptions.model() != null) {
            builder.query("model", recognizeOptions.model());
        }
        if (recognizeOptions.customizationId() != null) {
            builder.query("customization_id", recognizeOptions.customizationId());
        }
        if (recognizeOptions.acousticCustomizationId() != null) {
            builder.query("acoustic_customization_id", recognizeOptions.acousticCustomizationId());
        }
        if (recognizeOptions.customizationWeight() != null) {
            builder.query("customization_weight", String.valueOf(recognizeOptions.customizationWeight()));
        }
        if (recognizeOptions.version() != null) {
            builder.query("version", recognizeOptions.version());
        }
        if (recognizeOptions.inactivityTimeout() != null) {
            builder.query("inactivity_timeout", String.valueOf(recognizeOptions.inactivityTimeout()));
        }
        if (recognizeOptions.keywords() != null) {
            builder.query("keywords", RequestUtils.join(recognizeOptions.keywords(), ","));
        }
        if (recognizeOptions.keywordsThreshold() != null) {
            builder.query("keywords_threshold", String.valueOf(recognizeOptions.keywordsThreshold()));
        }
        if (recognizeOptions.maxAlternatives() != null) {
            builder.query("max_alternatives", String.valueOf(recognizeOptions.maxAlternatives()));
        }
        if (recognizeOptions.wordAlternativesThreshold() != null) {
            builder.query("word_alternatives_threshold", String.valueOf(recognizeOptions.wordAlternativesThreshold()));
        }
        if (recognizeOptions.wordConfidence() != null) {
            builder.query("word_confidence", String.valueOf(recognizeOptions.wordConfidence()));
        }
        if (recognizeOptions.timestamps() != null) {
            builder.query("timestamps", String.valueOf(recognizeOptions.timestamps()));
        }
        if (recognizeOptions.profanityFilter() != null) {
            builder.query("profanity_filter", String.valueOf(recognizeOptions.profanityFilter()));
        }
        if (recognizeOptions.smartFormatting() != null) {
            builder.query("smart_formatting", String.valueOf(recognizeOptions.smartFormatting()));
        }
        if (recognizeOptions.speakerLabels() != null) {
            builder.query("speaker_labels", String.valueOf(recognizeOptions.speakerLabels()));
        }
        if (recognizeOptions.audio() != null) {
            builder.body(InputStreamRequestBody.create(MediaType.parse(recognizeOptions.contentType()), recognizeOptions.audio()));
        }
    }
    return createServiceCall(builder.build(), ResponseConverterUtils.getObject(SpeechRecognitionResults.class));
}
Also used : RequestBuilder(com.ibm.watson.developer_cloud.http.RequestBuilder) SpeechRecognitionResults(com.ibm.watson.developer_cloud.speech_to_text.v1.model.SpeechRecognitionResults)

Example 25 with SpeechRecognitionResults

use of com.ibm.watson.speech_to_text.v1.model.SpeechRecognitionResults in project java-sdk by watson-developer-cloud.

the class RecognizeUsingWebSocketsWithSpeakerLabelsExample method main.

/**
 * The main method.
 *
 * @param args the arguments
 * @throws FileNotFoundException the file not found exception
 * @throws InterruptedException the interrupted exception
 */
public static void main(String[] args) throws FileNotFoundException, InterruptedException {
    FileInputStream audio = new FileInputStream("src/test/resources/speech_to_text/twospeakers.wav");
    Authenticator authenticator = new IamAuthenticator("<iam_api_key>");
    SpeechToText service = new SpeechToText(authenticator);
    RecognizeWithWebsocketsOptions options = new RecognizeWithWebsocketsOptions.Builder().audio(audio).interimResults(true).speakerLabels(true).model(RecognizeOptions.Model.EN_US_NARROWBANDMODEL).contentType(HttpMediaType.AUDIO_WAV).build();
    RecoTokens recoTokens = new RecoTokens();
    service.recognizeUsingWebSocket(options, new BaseRecognizeCallback() {

        @Override
        public void onTranscription(SpeechRecognitionResults speechResults) {
            recoTokens.add(speechResults);
        }

        @Override
        public void onDisconnected() {
            lock.countDown();
        }
    });
    lock.await(1, TimeUnit.MINUTES);
}
Also used : IamAuthenticator(com.ibm.cloud.sdk.core.security.IamAuthenticator) BaseRecognizeCallback(com.ibm.watson.speech_to_text.v1.websocket.BaseRecognizeCallback) RecognizeWithWebsocketsOptions(com.ibm.watson.speech_to_text.v1.model.RecognizeWithWebsocketsOptions) FileInputStream(java.io.FileInputStream) IamAuthenticator(com.ibm.cloud.sdk.core.security.IamAuthenticator) Authenticator(com.ibm.cloud.sdk.core.security.Authenticator) SpeechRecognitionResults(com.ibm.watson.speech_to_text.v1.model.SpeechRecognitionResults)

Aggregations

Test (org.junit.Test)18 SpeechRecognitionResults (com.ibm.watson.developer_cloud.speech_to_text.v1.model.SpeechRecognitionResults)17 RecognizeOptions (com.ibm.watson.developer_cloud.speech_to_text.v1.model.RecognizeOptions)16 SpeechRecognitionResults (com.ibm.watson.speech_to_text.v1.model.SpeechRecognitionResults)14 File (java.io.File)13 FileInputStream (java.io.FileInputStream)11 RecognizeOptions (com.ibm.watson.speech_to_text.v1.model.RecognizeOptions)8 WatsonServiceTest (com.ibm.watson.common.WatsonServiceTest)7 MockResponse (okhttp3.mockwebserver.MockResponse)7 RecordedRequest (okhttp3.mockwebserver.RecordedRequest)7 WatsonServiceUnitTest (com.ibm.watson.developer_cloud.WatsonServiceUnitTest)6 Authenticator (com.ibm.cloud.sdk.core.security.Authenticator)5 IamAuthenticator (com.ibm.cloud.sdk.core.security.IamAuthenticator)5 WatsonServiceTest (com.ibm.watson.developer_cloud.WatsonServiceTest)5 RecognizeWithWebsocketsOptions (com.ibm.watson.speech_to_text.v1.model.RecognizeWithWebsocketsOptions)5 BaseRecognizeCallback (com.ibm.watson.speech_to_text.v1.websocket.BaseRecognizeCallback)5 JsonObject (com.google.gson.JsonObject)4 JsonParser (com.google.gson.JsonParser)4 BaseRecognizeCallback (com.ibm.watson.developer_cloud.speech_to_text.v1.websocket.BaseRecognizeCallback)4 ByteString (okio.ByteString)4