Search in sources :

Example 1 with SpeechRecognitionResults

use of com.ibm.watson.developer_cloud.speech_to_text.v1.model.SpeechRecognitionResults in project java-sdk by watson-developer-cloud.

the class SpeechToTextIT method testRecognizeMultipleSpeakers.

/**
 * Test recognize multiple speakers.
 */
@Test
public void testRecognizeMultipleSpeakers() throws FileNotFoundException {
    File audio = new File(TWO_SPEAKERS_WAV);
    RecognizeOptions options = new RecognizeOptions.Builder().audio(audio).speakerLabels(true).model(RecognizeOptions.Model.EN_US_NARROWBANDMODEL).contentType(HttpMediaType.AUDIO_WAV).build();
    SpeechRecognitionResults results = service.recognize(options).execute();
    assertNotNull(results.getSpeakerLabels());
    assertTrue(results.getSpeakerLabels().size() > 0);
}
Also used : File(java.io.File) SpeechRecognitionResults(com.ibm.watson.developer_cloud.speech_to_text.v1.model.SpeechRecognitionResults) RecognizeOptions(com.ibm.watson.developer_cloud.speech_to_text.v1.model.RecognizeOptions) WatsonServiceTest(com.ibm.watson.developer_cloud.WatsonServiceTest) Test(org.junit.Test)

Example 2 with SpeechRecognitionResults

use of com.ibm.watson.developer_cloud.speech_to_text.v1.model.SpeechRecognitionResults in project java-sdk by watson-developer-cloud.

the class SpeechToTextIT method testRecognizeWebSocket.

/**
 * Test recognize webSocket.
 *
 * @throws FileNotFoundException the file not found exception
 * @throws InterruptedException the interrupted exception
 */
@Test
public void testRecognizeWebSocket() throws FileNotFoundException, InterruptedException {
    FileInputStream audio = new FileInputStream(SAMPLE_WAV);
    RecognizeOptions options = new RecognizeOptions.Builder().audio(audio).interimResults(true).inactivityTimeout(40).timestamps(true).maxAlternatives(2).wordAlternativesThreshold(0.5f).model(EN_BROADBAND16K).contentType(HttpMediaType.AUDIO_WAV).build();
    service.recognizeUsingWebSocket(options, new BaseRecognizeCallback() {

        @Override
        public void onConnected() {
            LOG.info("onConnected()");
        }

        @Override
        public void onDisconnected() {
            LOG.info("onDisconnected()");
        }

        @Override
        public void onTranscriptionComplete() {
            LOG.info("onTranscriptionComplete()");
            lock.countDown();
        }

        @Override
        public void onError(Exception e) {
            e.printStackTrace();
            lock.countDown();
        }

        @Override
        public void onTranscription(SpeechRecognitionResults speechResults) {
            Long resultIndex = speechResults.getResultIndex();
            if (speechResults != null && speechResults.getResults().get(resultIndex.intValue()).isFinalResults()) {
                asyncResults = speechResults;
            }
        }
    });
    lock.await(2, TimeUnit.MINUTES);
    assertNotNull(asyncResults);
    List<WordAlternativeResults> wordAlternatives = asyncResults.getResults().get(asyncResults.getResultIndex().intValue()).getWordAlternatives();
    assertTrue(wordAlternatives != null && !wordAlternatives.isEmpty());
    assertNotNull(wordAlternatives.get(0).getAlternatives());
}
Also used : BaseRecognizeCallback(com.ibm.watson.developer_cloud.speech_to_text.v1.websocket.BaseRecognizeCallback) WordAlternativeResults(com.ibm.watson.developer_cloud.speech_to_text.v1.model.WordAlternativeResults) FileInputStream(java.io.FileInputStream) FileNotFoundException(java.io.FileNotFoundException) ExpectedException(org.junit.rules.ExpectedException) NotFoundException(com.ibm.watson.developer_cloud.service.exception.NotFoundException) SpeechRecognitionResults(com.ibm.watson.developer_cloud.speech_to_text.v1.model.SpeechRecognitionResults) RecognizeOptions(com.ibm.watson.developer_cloud.speech_to_text.v1.model.RecognizeOptions) WatsonServiceTest(com.ibm.watson.developer_cloud.WatsonServiceTest) Test(org.junit.Test)

Example 3 with SpeechRecognitionResults

use of com.ibm.watson.developer_cloud.speech_to_text.v1.model.SpeechRecognitionResults in project java-sdk by watson-developer-cloud.

the class SpeechToTextIT method testRecognizeFileStringRecognizeOptions.

/**
 * Test recognize file string recognize options.
 */
@Test
public void testRecognizeFileStringRecognizeOptions() throws FileNotFoundException {
    File audio = new File(SAMPLE_WAV);
    String contentType = HttpMediaType.AUDIO_WAV;
    RecognizeOptions options = new RecognizeOptions.Builder().audio(audio).timestamps(true).wordConfidence(true).model(EN_BROADBAND16K).contentType(contentType).profanityFilter(false).build();
    SpeechRecognitionResults results = service.recognize(options).execute();
    assertNotNull(results.getResults().get(0).getAlternatives().get(0).getTranscript());
    assertNotNull(results.getResults().get(0).getAlternatives().get(0).getTimestamps());
    assertNotNull(results.getResults().get(0).getAlternatives().get(0).getWordConfidence());
}
Also used : File(java.io.File) SpeechRecognitionResults(com.ibm.watson.developer_cloud.speech_to_text.v1.model.SpeechRecognitionResults) RecognizeOptions(com.ibm.watson.developer_cloud.speech_to_text.v1.model.RecognizeOptions) WatsonServiceTest(com.ibm.watson.developer_cloud.WatsonServiceTest) Test(org.junit.Test)

Example 4 with SpeechRecognitionResults

use of com.ibm.watson.developer_cloud.speech_to_text.v1.model.SpeechRecognitionResults in project java-sdk by watson-developer-cloud.

the class SpeechToTextIT method testRecognizeKeywords.

/**
 * Test keyword recognition.
 */
@Test
public void testRecognizeKeywords() throws FileNotFoundException {
    final String keyword1 = "rain";
    final String keyword2 = "tornadoes";
    final File audio = new File(SAMPLE_WAV);
    final RecognizeOptions options = new RecognizeOptions.Builder().audio(audio).contentType(HttpMediaType.AUDIO_WAV).model(RecognizeOptions.Model.EN_US_NARROWBANDMODEL).inactivityTimeout(500).keywords(Arrays.asList(keyword1, keyword2)).keywordsThreshold(0.5f).build();
    final SpeechRecognitionResults results = service.recognize(options).execute();
    final SpeechRecognitionResult transcript = results.getResults().get(0);
    assertEquals(2, transcript.getKeywordsResult().size());
    assertTrue(transcript.getKeywordsResult().containsKey(keyword1));
    assertTrue(transcript.getKeywordsResult().containsKey(keyword2));
    final KeywordResult result1 = transcript.getKeywordsResult().get(keyword1).get(0);
    assertEquals(keyword1, result1.getNormalizedText());
    assertEquals(0.9, result1.getConfidence(), 0.1);
    assertEquals(5.58, result1.getStartTime(), 1.0);
    assertEquals(6.14, result1.getEndTime(), 1.0);
    final KeywordResult result2 = transcript.getKeywordsResult().get(keyword2).get(0);
    assertEquals(keyword2, result2.getNormalizedText());
    assertEquals(0.9, result2.getConfidence(), 0.1);
    assertEquals(4.42, result2.getStartTime(), 1.0);
    assertEquals(5.04, result2.getEndTime(), 1.0);
}
Also used : File(java.io.File) SpeechRecognitionResult(com.ibm.watson.developer_cloud.speech_to_text.v1.model.SpeechRecognitionResult) SpeechRecognitionResults(com.ibm.watson.developer_cloud.speech_to_text.v1.model.SpeechRecognitionResults) RecognizeOptions(com.ibm.watson.developer_cloud.speech_to_text.v1.model.RecognizeOptions) KeywordResult(com.ibm.watson.developer_cloud.speech_to_text.v1.model.KeywordResult) WatsonServiceTest(com.ibm.watson.developer_cloud.WatsonServiceTest) Test(org.junit.Test)

Example 5 with SpeechRecognitionResults

use of com.ibm.watson.developer_cloud.speech_to_text.v1.model.SpeechRecognitionResults in project java-sdk by watson-developer-cloud.

the class CustomizationExample method main.

/**
 * The main method.
 *
 * @param args the arguments
 * @throws InterruptedException the interrupted exception
 */
public static void main(String[] args) throws InterruptedException {
    SpeechToText service = new SpeechToText();
    service.setUsernameAndPassword("<username>", "<password>");
    // Create language model
    CreateLanguageModelOptions createOptions = new CreateLanguageModelOptions.Builder().name("IEEE-permanent").baseModelName("en-US_BroadbandModel").description("My customization").build();
    LanguageModel myModel = service.createLanguageModel(createOptions).execute();
    String id = myModel.getCustomizationId();
    try {
        // Add a corpus file to the model
        AddCorpusOptions addOptions = new AddCorpusOptions.Builder().customizationId(id).corpusName("corpus-1").corpusFile(new File(CORPUS_FILE)).corpusFileContentType(HttpMediaType.TEXT_PLAIN).allowOverwrite(false).build();
        service.addCorpus(addOptions).execute();
        // Get corpus status
        GetCorpusOptions getOptions = new GetCorpusOptions.Builder().customizationId(id).corpusName("corpus-1").build();
        for (int x = 0; x < 30 && (service.getCorpus(getOptions).execute()).getStatus() != Status.ANALYZED; x++) {
            Thread.sleep(5000);
        }
        // Get all corpora
        ListCorporaOptions listCorporaOptions = new ListCorporaOptions.Builder().customizationId(id).build();
        Corpora corpora = service.listCorpora(listCorporaOptions).execute();
        System.out.println(corpora);
        // Get specific corpus
        Corpus corpus = service.getCorpus(getOptions).execute();
        System.out.println(corpus);
        // Now add some user words to the custom model
        service.addWord(new AddWordOptions.Builder().customizationId(id).wordName("IEEE").word("IEEE").displayAs("IEEE").addSoundsLike("I. triple E.").build()).execute();
        service.addWord(new AddWordOptions.Builder().customizationId(id).wordName("hhonors").word("hhonors").displayAs("IEEE").addSoundsLike("H. honors").addSoundsLike("Hilton honors").build()).execute();
        // Display all words in the words resource (OOVs from the corpus and
        // new words just added) in ascending alphabetical order
        ListWordsOptions listWordsAlphabeticalOptions = new ListWordsOptions.Builder().customizationId(id).wordType(ListWordsOptions.WordType.ALL).build();
        Words words = service.listWords(listWordsAlphabeticalOptions).execute();
        System.out.println("\nASCENDING ALPHABETICAL ORDER:");
        System.out.println(words);
        // Then display all words in the words resource in descending order
        // by count
        ListWordsOptions listWordsCountOptions = new ListWordsOptions.Builder().customizationId(id).wordType(ListWordsOptions.WordType.ALL).sort("-" + ListWordsOptions.Sort.COUNT).build();
        words = service.listWords(listWordsCountOptions).execute();
        System.out.println("\nDESCENDING ORDER BY COUNT:");
        System.out.println(words);
        // Now start training of the model
        TrainLanguageModelOptions trainOptions = new TrainLanguageModelOptions.Builder().customizationId(id).wordTypeToAdd(TrainLanguageModelOptions.WordTypeToAdd.ALL).build();
        service.trainLanguageModel(trainOptions).execute();
        for (int x = 0; x < 30 && myModel.getStatus() != LanguageModel.Status.AVAILABLE; x++) {
            GetLanguageModelOptions getOptions = new GetLanguageModelOptions.Builder().customizationId(id).build();
            myModel = service.getLanguageModel(getOptions).execute();
            Thread.sleep(10000);
        }
        File audio = new File(AUDIO_FILE);
        RecognizeOptions recognizeOptionsWithModel = new RecognizeOptions.Builder().model(RecognizeOptions.EN_US_BROADBANDMODEL).customizationId(id).audio(audio).contentType(HttpMediaType.AUDIO_WAV).build();
        RecognizeOptions recognizeOptionsWithoutModel = new RecognizeOptions.Builder().model(RecognizeOptions.EN_US_BROADBANDMODEL).audio(audio).contentType(HttpMediaType.AUDIO_WAV).build();
        // First decode WITHOUT the custom model
        SpeechRecognitionResults transcript = service.recognize(recognizeOptionsWithoutModel).execute();
        System.out.println(transcript);
        // Now decode with the custom model
        transcript = service.recognize(recognizeOptionsWithModel).execute();
        System.out.println(transcript);
    } finally {
        DeleteLanguageModelOptions deleteOptions = new DeleteLanguageModelOptions.Builder().customizationId(id).build();
        service.deleteLanguageModel(deleteOptions).execute();
    }
}
Also used : CreateLanguageModelOptions(com.ibm.watson.developer_cloud.speech_to_text.v1.model.CreateLanguageModelOptions) Corpus(com.ibm.watson.developer_cloud.speech_to_text.v1.model.Corpus) DeleteLanguageModelOptions(com.ibm.watson.developer_cloud.speech_to_text.v1.model.DeleteLanguageModelOptions) AddWordOptions(com.ibm.watson.developer_cloud.speech_to_text.v1.model.AddWordOptions) LanguageModel(com.ibm.watson.developer_cloud.speech_to_text.v1.model.LanguageModel) GetCorpusOptions(com.ibm.watson.developer_cloud.speech_to_text.v1.model.GetCorpusOptions) AddCorpusOptions(com.ibm.watson.developer_cloud.speech_to_text.v1.model.AddCorpusOptions) Corpora(com.ibm.watson.developer_cloud.speech_to_text.v1.model.Corpora) GetLanguageModelOptions(com.ibm.watson.developer_cloud.speech_to_text.v1.model.GetLanguageModelOptions) Words(com.ibm.watson.developer_cloud.speech_to_text.v1.model.Words) ListWordsOptions(com.ibm.watson.developer_cloud.speech_to_text.v1.model.ListWordsOptions) TrainLanguageModelOptions(com.ibm.watson.developer_cloud.speech_to_text.v1.model.TrainLanguageModelOptions) File(java.io.File) ListCorporaOptions(com.ibm.watson.developer_cloud.speech_to_text.v1.model.ListCorporaOptions) SpeechRecognitionResults(com.ibm.watson.developer_cloud.speech_to_text.v1.model.SpeechRecognitionResults) RecognizeOptions(com.ibm.watson.developer_cloud.speech_to_text.v1.model.RecognizeOptions)

Aggregations

SpeechRecognitionResults (com.ibm.watson.developer_cloud.speech_to_text.v1.model.SpeechRecognitionResults)17 RecognizeOptions (com.ibm.watson.developer_cloud.speech_to_text.v1.model.RecognizeOptions)16 Test (org.junit.Test)11 FileInputStream (java.io.FileInputStream)7 WatsonServiceUnitTest (com.ibm.watson.developer_cloud.WatsonServiceUnitTest)6 File (java.io.File)6 MockResponse (okhttp3.mockwebserver.MockResponse)6 RecordedRequest (okhttp3.mockwebserver.RecordedRequest)6 WatsonServiceTest (com.ibm.watson.developer_cloud.WatsonServiceTest)5 JsonObject (com.google.gson.JsonObject)4 JsonParser (com.google.gson.JsonParser)4 BaseRecognizeCallback (com.ibm.watson.developer_cloud.speech_to_text.v1.websocket.BaseRecognizeCallback)4 ByteString (okio.ByteString)4 WordAlternativeResults (com.ibm.watson.developer_cloud.speech_to_text.v1.model.WordAlternativeResults)2 RequestBuilder (com.ibm.watson.developer_cloud.http.RequestBuilder)1 NotFoundException (com.ibm.watson.developer_cloud.service.exception.NotFoundException)1 AddCorpusOptions (com.ibm.watson.developer_cloud.speech_to_text.v1.model.AddCorpusOptions)1 AddWordOptions (com.ibm.watson.developer_cloud.speech_to_text.v1.model.AddWordOptions)1 Corpora (com.ibm.watson.developer_cloud.speech_to_text.v1.model.Corpora)1 Corpus (com.ibm.watson.developer_cloud.speech_to_text.v1.model.Corpus)1