Search in sources :

Example 26 with SpeechRecognitionResults

use of com.ibm.watson.speech_to_text.v1.model.SpeechRecognitionResults in project java-sdk by watson-developer-cloud.

the class SpeechToTextExample method main.

public static void main(String[] args) throws FileNotFoundException {
    Authenticator authenticator = new IamAuthenticator("<iam_api_key>");
    SpeechToText service = new SpeechToText(authenticator);
    File audio = new File("src/test/resources/speech_to_text/sample1.wav");
    RecognizeOptions options = new RecognizeOptions.Builder().audio(audio).contentType(HttpMediaType.AUDIO_WAV).build();
    SpeechRecognitionResults transcript = service.recognize(options).execute().getResult();
    System.out.println(transcript);
}
Also used : IamAuthenticator(com.ibm.cloud.sdk.core.security.IamAuthenticator) File(java.io.File) Authenticator(com.ibm.cloud.sdk.core.security.Authenticator) IamAuthenticator(com.ibm.cloud.sdk.core.security.IamAuthenticator) SpeechRecognitionResults(com.ibm.watson.speech_to_text.v1.model.SpeechRecognitionResults) RecognizeOptions(com.ibm.watson.speech_to_text.v1.model.RecognizeOptions)

Example 27 with SpeechRecognitionResults

use of com.ibm.watson.speech_to_text.v1.model.SpeechRecognitionResults in project java-sdk by watson-developer-cloud.

the class SpeechToTextIT method testEndOfPhraseSilenceTimeWebSocket.

/**
 * Test end of phrase silence time web socket.
 *
 * @throws FileNotFoundException the file not found exception
 * @throws InterruptedException the interrupted exception
 */
@Test
public void testEndOfPhraseSilenceTimeWebSocket() throws FileNotFoundException, InterruptedException {
    FileInputStream audio = new FileInputStream(SAMPLE_WAV_WITH_PAUSE);
    RecognizeWithWebsocketsOptions options = new RecognizeWithWebsocketsOptions.Builder().audio(audio).contentType(HttpMediaType.AUDIO_WAV).endOfPhraseSilenceTime(0.2).build();
    service.recognizeUsingWebSocket(options, new BaseRecognizeCallback() {

        @Override
        public void onConnected() {
            LOG.info("onConnected()");
        }

        @Override
        public void onDisconnected() {
            LOG.info("onDisconnected()");
        }

        @Override
        public void onTranscriptionComplete() {
            LOG.info("onTranscriptionComplete()");
            lock.countDown();
        }

        @Override
        public void onError(Exception e) {
            // e.printStackTrace();
            lock.countDown();
        }

        @Override
        public void onTranscription(SpeechRecognitionResults speechResults) {
            if (speechResults != null && speechResults.getResults() != null) {
                asyncTranscriptionResults = speechResults;
            }
        }
    });
    lock.await(1, TimeUnit.MINUTES);
    assertNotNull(asyncTranscriptionResults);
    assertTrue(asyncTranscriptionResults.getResults().size() > 1);
    // Clear for later.
    asyncTranscriptionResults = null;
}
Also used : BaseRecognizeCallback(com.ibm.watson.speech_to_text.v1.websocket.BaseRecognizeCallback) RecognizeWithWebsocketsOptions(com.ibm.watson.speech_to_text.v1.model.RecognizeWithWebsocketsOptions) FileInputStream(java.io.FileInputStream) FileNotFoundException(java.io.FileNotFoundException) ExpectedException(org.junit.rules.ExpectedException) NotFoundException(com.ibm.cloud.sdk.core.service.exception.NotFoundException) SpeechRecognitionResults(com.ibm.watson.speech_to_text.v1.model.SpeechRecognitionResults) WatsonServiceTest(com.ibm.watson.common.WatsonServiceTest) Test(org.junit.Test)

Example 28 with SpeechRecognitionResults

use of com.ibm.watson.speech_to_text.v1.model.SpeechRecognitionResults in project java-sdk by watson-developer-cloud.

the class SpeechToTextIT method testRecognizeFileStringRecognizeOptions.

/**
 * Test recognize file string recognize options.
 *
 * @throws FileNotFoundException the file not found exception
 */
@Test
public void testRecognizeFileStringRecognizeOptions() throws FileNotFoundException {
    File audio = new File(SAMPLE_WAV);
    String contentType = HttpMediaType.AUDIO_WAV;
    RecognizeOptions options = new RecognizeOptions.Builder().audio(audio).timestamps(true).wordConfidence(true).model(EN_BROADBAND16K).contentType(contentType).profanityFilter(false).audioMetrics(true).build();
    SpeechRecognitionResults results = service.recognize(options).execute().getResult();
    assertNotNull(results.getResults().get(0).getAlternatives().get(0).getTranscript());
    assertNotNull(results.getResults().get(0).getAlternatives().get(0).getTimestamps());
    assertNotNull(results.getResults().get(0).getAlternatives().get(0).getWordConfidence());
    assertNotNull(results.getAudioMetrics());
}
Also used : File(java.io.File) SpeechRecognitionResults(com.ibm.watson.speech_to_text.v1.model.SpeechRecognitionResults) RecognizeOptions(com.ibm.watson.speech_to_text.v1.model.RecognizeOptions) WatsonServiceTest(com.ibm.watson.common.WatsonServiceTest) Test(org.junit.Test)

Example 29 with SpeechRecognitionResults

use of com.ibm.watson.speech_to_text.v1.model.SpeechRecognitionResults in project java-sdk by watson-developer-cloud.

the class SpeechToTextIT method testRecognizeWebSocket.

/**
 * Test recognize webSocket.
 *
 * @throws FileNotFoundException the file not found exception
 * @throws InterruptedException the interrupted exception
 */
@Test
public void testRecognizeWebSocket() throws FileNotFoundException, InterruptedException {
    FileInputStream audio = new FileInputStream(SAMPLE_WAV);
    RecognizeWithWebsocketsOptions options = new RecognizeWithWebsocketsOptions.Builder().audio(audio).inactivityTimeout(40).timestamps(true).maxAlternatives(2).wordAlternativesThreshold(0.5f).model(EN_BROADBAND16K).contentType(HttpMediaType.AUDIO_WAV).interimResults(true).processingMetrics(true).processingMetricsInterval(0.2f).audioMetrics(true).build();
    service.recognizeUsingWebSocket(options, new BaseRecognizeCallback() {

        @Override
        public void onConnected() {
            LOG.info("onConnected()");
        }

        @Override
        public void onDisconnected() {
            LOG.info("onDisconnected()");
        }

        @Override
        public void onTranscriptionComplete() {
            LOG.info("onTranscriptionComplete()");
            lock.countDown();
        }

        @Override
        public void onError(Exception e) {
            // e.printStackTrace();
            lock.countDown();
        }

        @Override
        public void onTranscription(SpeechRecognitionResults speechResults) {
            if (speechResults != null) {
                if (speechResults.getResults() != null && speechResults.getResults().get(0).isXFinal()) {
                    asyncTranscriptionResults = speechResults;
                }
                if (speechResults.getAudioMetrics() != null) {
                    asyncAudioMetricsResults = speechResults;
                }
            // System.out.println(speechResults);
            }
        }
    });
    lock.await(3, TimeUnit.MINUTES);
    assertNotNull(asyncTranscriptionResults);
    assertNotNull(asyncAudioMetricsResults);
    List<WordAlternativeResults> wordAlternatives = asyncTranscriptionResults.getResults().get(asyncTranscriptionResults.getResultIndex().intValue()).getWordAlternatives();
    assertTrue(wordAlternatives != null && !wordAlternatives.isEmpty());
    assertNotNull(wordAlternatives.get(0).getAlternatives());
    assertNotNull(asyncAudioMetricsResults.getAudioMetrics());
    // Clear for later tests.
    asyncTranscriptionResults = null;
    asyncAudioMetricsResults = null;
}
Also used : BaseRecognizeCallback(com.ibm.watson.speech_to_text.v1.websocket.BaseRecognizeCallback) WordAlternativeResults(com.ibm.watson.speech_to_text.v1.model.WordAlternativeResults) RecognizeWithWebsocketsOptions(com.ibm.watson.speech_to_text.v1.model.RecognizeWithWebsocketsOptions) FileInputStream(java.io.FileInputStream) FileNotFoundException(java.io.FileNotFoundException) ExpectedException(org.junit.rules.ExpectedException) NotFoundException(com.ibm.cloud.sdk.core.service.exception.NotFoundException) SpeechRecognitionResults(com.ibm.watson.speech_to_text.v1.model.SpeechRecognitionResults) WatsonServiceTest(com.ibm.watson.common.WatsonServiceTest) Test(org.junit.Test)

Example 30 with SpeechRecognitionResults

use of com.ibm.watson.speech_to_text.v1.model.SpeechRecognitionResults in project java-sdk by watson-developer-cloud.

the class SpeechToTextTest method testRecognizeWOptions.

// Test the recognize operation with a valid options model parameter
@Test
public void testRecognizeWOptions() throws Throwable {
    // Register a mock response
    String mockResponseBody = "{\"results\": [{\"final\": true, \"alternatives\": [{\"transcript\": \"transcript\", \"confidence\": 0, \"timestamps\": [[\"timestamps\"]], \"word_confidence\": [[\"wordConfidence\"]]}], \"keywords_result\": {\"mapKey\": [{\"normalized_text\": \"normalizedText\", \"start_time\": 9, \"end_time\": 7, \"confidence\": 0}]}, \"word_alternatives\": [{\"start_time\": 9, \"end_time\": 7, \"alternatives\": [{\"confidence\": 0, \"word\": \"word\"}]}], \"end_of_utterance\": \"end_of_data\"}], \"result_index\": 11, \"speaker_labels\": [{\"from\": 4, \"to\": 2, \"speaker\": 7, \"confidence\": 10, \"final\": true}], \"processing_metrics\": {\"processed_audio\": {\"received\": 8, \"seen_by_engine\": 12, \"transcription\": 13, \"speaker_labels\": 13}, \"wall_clock_since_first_byte_received\": 31, \"periodic\": true}, \"audio_metrics\": {\"sampling_interval\": 16, \"accumulated\": {\"final\": true, \"end_time\": 7, \"signal_to_noise_ratio\": 18, \"speech_ratio\": 11, \"high_frequency_loss\": 17, \"direct_current_offset\": [{\"begin\": 5, \"end\": 3, \"count\": 5}], \"clipping_rate\": [{\"begin\": 5, \"end\": 3, \"count\": 5}], \"speech_level\": [{\"begin\": 5, \"end\": 3, \"count\": 5}], \"non_speech_level\": [{\"begin\": 5, \"end\": 3, \"count\": 5}]}}, \"warnings\": [\"warnings\"]}";
    String recognizePath = "/v1/recognize";
    server.enqueue(new MockResponse().setHeader("Content-type", "application/json").setResponseCode(200).setBody(mockResponseBody));
    // Construct an instance of the RecognizeOptions model
    RecognizeOptions recognizeOptionsModel = new RecognizeOptions.Builder().audio(TestUtilities.createMockStream("This is a mock file.")).contentType("application/octet-stream").model("en-US_BroadbandModel").languageCustomizationId("testString").acousticCustomizationId("testString").baseModelVersion("testString").customizationWeight(Double.valueOf("72.5")).inactivityTimeout(Long.valueOf("26")).keywords(new java.util.ArrayList<String>(java.util.Arrays.asList("testString"))).keywordsThreshold(Float.valueOf("36.0")).maxAlternatives(Long.valueOf("26")).wordAlternativesThreshold(Float.valueOf("36.0")).wordConfidence(false).timestamps(false).profanityFilter(true).smartFormatting(false).speakerLabels(false).customizationId("testString").grammarName("testString").redaction(false).audioMetrics(false).endOfPhraseSilenceTime(Double.valueOf("72.5")).splitTranscriptAtPhraseEnd(false).speechDetectorSensitivity(Float.valueOf("36.0")).backgroundAudioSuppression(Float.valueOf("36.0")).lowLatency(false).build();
    // Invoke recognize() with a valid options model and verify the result
    Response<SpeechRecognitionResults> response = speechToTextService.recognize(recognizeOptionsModel).execute();
    assertNotNull(response);
    SpeechRecognitionResults responseObj = response.getResult();
    assertNotNull(responseObj);
    // Verify the contents of the request sent to the mock server
    RecordedRequest request = server.takeRequest();
    assertNotNull(request);
    assertEquals(request.getMethod(), "POST");
    // Verify request path
    String parsedPath = TestUtilities.parseReqPath(request);
    assertEquals(parsedPath, recognizePath);
    // Verify query params
    Map<String, String> query = TestUtilities.parseQueryString(request);
    assertNotNull(query);
    assertEquals(query.get("model"), "en-US_BroadbandModel");
    assertEquals(query.get("language_customization_id"), "testString");
    assertEquals(query.get("acoustic_customization_id"), "testString");
    assertEquals(query.get("base_model_version"), "testString");
    assertEquals(Double.valueOf(query.get("customization_weight")), Double.valueOf("72.5"));
    assertEquals(Long.valueOf(query.get("inactivity_timeout")), Long.valueOf("26"));
    assertEquals(query.get("keywords"), RequestUtils.join(new java.util.ArrayList<String>(java.util.Arrays.asList("testString")), ","));
    assertEquals(Float.valueOf(query.get("keywords_threshold")), Float.valueOf("36.0"));
    assertEquals(Long.valueOf(query.get("max_alternatives")), Long.valueOf("26"));
    assertEquals(Float.valueOf(query.get("word_alternatives_threshold")), Float.valueOf("36.0"));
    assertEquals(Boolean.valueOf(query.get("word_confidence")), Boolean.valueOf(false));
    assertEquals(Boolean.valueOf(query.get("timestamps")), Boolean.valueOf(false));
    assertEquals(Boolean.valueOf(query.get("profanity_filter")), Boolean.valueOf(true));
    assertEquals(Boolean.valueOf(query.get("smart_formatting")), Boolean.valueOf(false));
    assertEquals(Boolean.valueOf(query.get("speaker_labels")), Boolean.valueOf(false));
    assertEquals(query.get("customization_id"), "testString");
    assertEquals(query.get("grammar_name"), "testString");
    assertEquals(Boolean.valueOf(query.get("redaction")), Boolean.valueOf(false));
    assertEquals(Boolean.valueOf(query.get("audio_metrics")), Boolean.valueOf(false));
    assertEquals(Double.valueOf(query.get("end_of_phrase_silence_time")), Double.valueOf("72.5"));
    assertEquals(Boolean.valueOf(query.get("split_transcript_at_phrase_end")), Boolean.valueOf(false));
    assertEquals(Float.valueOf(query.get("speech_detector_sensitivity")), Float.valueOf("36.0"));
    assertEquals(Float.valueOf(query.get("background_audio_suppression")), Float.valueOf("36.0"));
    assertEquals(Boolean.valueOf(query.get("low_latency")), Boolean.valueOf(false));
}
Also used : RecordedRequest(okhttp3.mockwebserver.RecordedRequest) MockResponse(okhttp3.mockwebserver.MockResponse) SpeechRecognitionResults(com.ibm.watson.speech_to_text.v1.model.SpeechRecognitionResults) RecognizeOptions(com.ibm.watson.speech_to_text.v1.model.RecognizeOptions) Test(org.testng.annotations.Test)

Aggregations

Test (org.junit.Test)18 SpeechRecognitionResults (com.ibm.watson.developer_cloud.speech_to_text.v1.model.SpeechRecognitionResults)17 RecognizeOptions (com.ibm.watson.developer_cloud.speech_to_text.v1.model.RecognizeOptions)16 SpeechRecognitionResults (com.ibm.watson.speech_to_text.v1.model.SpeechRecognitionResults)14 File (java.io.File)13 FileInputStream (java.io.FileInputStream)11 RecognizeOptions (com.ibm.watson.speech_to_text.v1.model.RecognizeOptions)8 WatsonServiceTest (com.ibm.watson.common.WatsonServiceTest)7 MockResponse (okhttp3.mockwebserver.MockResponse)7 RecordedRequest (okhttp3.mockwebserver.RecordedRequest)7 WatsonServiceUnitTest (com.ibm.watson.developer_cloud.WatsonServiceUnitTest)6 Authenticator (com.ibm.cloud.sdk.core.security.Authenticator)5 IamAuthenticator (com.ibm.cloud.sdk.core.security.IamAuthenticator)5 WatsonServiceTest (com.ibm.watson.developer_cloud.WatsonServiceTest)5 RecognizeWithWebsocketsOptions (com.ibm.watson.speech_to_text.v1.model.RecognizeWithWebsocketsOptions)5 BaseRecognizeCallback (com.ibm.watson.speech_to_text.v1.websocket.BaseRecognizeCallback)5 JsonObject (com.google.gson.JsonObject)4 JsonParser (com.google.gson.JsonParser)4 BaseRecognizeCallback (com.ibm.watson.developer_cloud.speech_to_text.v1.websocket.BaseRecognizeCallback)4 ByteString (okio.ByteString)4