Search in sources :

Example 6 with RecognizeOptions

use of com.ibm.watson.speech_to_text.v1.model.RecognizeOptions in project java-sdk by watson-developer-cloud.

the class CustomizationExample method main.

/**
 * The main method.
 *
 * @param args the arguments
 * @throws InterruptedException the interrupted exception
 */
public static void main(String[] args) throws InterruptedException {
    SpeechToText service = new SpeechToText();
    service.setUsernameAndPassword("<username>", "<password>");
    // Create language model
    CreateLanguageModelOptions createOptions = new CreateLanguageModelOptions.Builder().name("IEEE-permanent").baseModelName("en-US_BroadbandModel").description("My customization").build();
    LanguageModel myModel = service.createLanguageModel(createOptions).execute();
    String id = myModel.getCustomizationId();
    try {
        // Add a corpus file to the model
        AddCorpusOptions addOptions = new AddCorpusOptions.Builder().customizationId(id).corpusName("corpus-1").corpusFile(new File(CORPUS_FILE)).corpusFileContentType(HttpMediaType.TEXT_PLAIN).allowOverwrite(false).build();
        service.addCorpus(addOptions).execute();
        // Get corpus status
        GetCorpusOptions getOptions = new GetCorpusOptions.Builder().customizationId(id).corpusName("corpus-1").build();
        for (int x = 0; x < 30 && (service.getCorpus(getOptions).execute()).getStatus() != Status.ANALYZED; x++) {
            Thread.sleep(5000);
        }
        // Get all corpora
        ListCorporaOptions listCorporaOptions = new ListCorporaOptions.Builder().customizationId(id).build();
        Corpora corpora = service.listCorpora(listCorporaOptions).execute();
        System.out.println(corpora);
        // Get specific corpus
        Corpus corpus = service.getCorpus(getOptions).execute();
        System.out.println(corpus);
        // Now add some user words to the custom model
        service.addWord(new AddWordOptions.Builder().customizationId(id).wordName("IEEE").word("IEEE").displayAs("IEEE").addSoundsLike("I. triple E.").build()).execute();
        service.addWord(new AddWordOptions.Builder().customizationId(id).wordName("hhonors").word("hhonors").displayAs("IEEE").addSoundsLike("H. honors").addSoundsLike("Hilton honors").build()).execute();
        // Display all words in the words resource (OOVs from the corpus and
        // new words just added) in ascending alphabetical order
        ListWordsOptions listWordsAlphabeticalOptions = new ListWordsOptions.Builder().customizationId(id).wordType(ListWordsOptions.WordType.ALL).build();
        Words words = service.listWords(listWordsAlphabeticalOptions).execute();
        System.out.println("\nASCENDING ALPHABETICAL ORDER:");
        System.out.println(words);
        // Then display all words in the words resource in descending order
        // by count
        ListWordsOptions listWordsCountOptions = new ListWordsOptions.Builder().customizationId(id).wordType(ListWordsOptions.WordType.ALL).sort("-" + ListWordsOptions.Sort.COUNT).build();
        words = service.listWords(listWordsCountOptions).execute();
        System.out.println("\nDESCENDING ORDER BY COUNT:");
        System.out.println(words);
        // Now start training of the model
        TrainLanguageModelOptions trainOptions = new TrainLanguageModelOptions.Builder().customizationId(id).wordTypeToAdd(TrainLanguageModelOptions.WordTypeToAdd.ALL).build();
        service.trainLanguageModel(trainOptions).execute();
        for (int x = 0; x < 30 && myModel.getStatus() != LanguageModel.Status.AVAILABLE; x++) {
            GetLanguageModelOptions getOptions = new GetLanguageModelOptions.Builder().customizationId(id).build();
            myModel = service.getLanguageModel(getOptions).execute();
            Thread.sleep(10000);
        }
        File audio = new File(AUDIO_FILE);
        RecognizeOptions recognizeOptionsWithModel = new RecognizeOptions.Builder().model(RecognizeOptions.EN_US_BROADBANDMODEL).customizationId(id).audio(audio).contentType(HttpMediaType.AUDIO_WAV).build();
        RecognizeOptions recognizeOptionsWithoutModel = new RecognizeOptions.Builder().model(RecognizeOptions.EN_US_BROADBANDMODEL).audio(audio).contentType(HttpMediaType.AUDIO_WAV).build();
        // First decode WITHOUT the custom model
        SpeechRecognitionResults transcript = service.recognize(recognizeOptionsWithoutModel).execute();
        System.out.println(transcript);
        // Now decode with the custom model
        transcript = service.recognize(recognizeOptionsWithModel).execute();
        System.out.println(transcript);
    } finally {
        DeleteLanguageModelOptions deleteOptions = new DeleteLanguageModelOptions.Builder().customizationId(id).build();
        service.deleteLanguageModel(deleteOptions).execute();
    }
}
Also used : CreateLanguageModelOptions(com.ibm.watson.developer_cloud.speech_to_text.v1.model.CreateLanguageModelOptions) Corpus(com.ibm.watson.developer_cloud.speech_to_text.v1.model.Corpus) DeleteLanguageModelOptions(com.ibm.watson.developer_cloud.speech_to_text.v1.model.DeleteLanguageModelOptions) AddWordOptions(com.ibm.watson.developer_cloud.speech_to_text.v1.model.AddWordOptions) LanguageModel(com.ibm.watson.developer_cloud.speech_to_text.v1.model.LanguageModel) GetCorpusOptions(com.ibm.watson.developer_cloud.speech_to_text.v1.model.GetCorpusOptions) AddCorpusOptions(com.ibm.watson.developer_cloud.speech_to_text.v1.model.AddCorpusOptions) Corpora(com.ibm.watson.developer_cloud.speech_to_text.v1.model.Corpora) GetLanguageModelOptions(com.ibm.watson.developer_cloud.speech_to_text.v1.model.GetLanguageModelOptions) Words(com.ibm.watson.developer_cloud.speech_to_text.v1.model.Words) ListWordsOptions(com.ibm.watson.developer_cloud.speech_to_text.v1.model.ListWordsOptions) TrainLanguageModelOptions(com.ibm.watson.developer_cloud.speech_to_text.v1.model.TrainLanguageModelOptions) File(java.io.File) ListCorporaOptions(com.ibm.watson.developer_cloud.speech_to_text.v1.model.ListCorporaOptions) SpeechRecognitionResults(com.ibm.watson.developer_cloud.speech_to_text.v1.model.SpeechRecognitionResults) RecognizeOptions(com.ibm.watson.developer_cloud.speech_to_text.v1.model.RecognizeOptions)

Example 7 with RecognizeOptions

use of com.ibm.watson.speech_to_text.v1.model.RecognizeOptions in project java-sdk by watson-developer-cloud.

the class MicrophoneWithWebSocketsExample method main.

/**
 * The main method.
 *
 * @param args the arguments
 * @throws Exception the exception
 */
public static void main(final String[] args) throws Exception {
    SpeechToText service = new SpeechToText();
    service.setUsernameAndPassword("<username>", "<password>");
    // Signed PCM AudioFormat with 16kHz, 16 bit sample size, mono
    int sampleRate = 16000;
    AudioFormat format = new AudioFormat(sampleRate, 16, 1, true, false);
    DataLine.Info info = new DataLine.Info(TargetDataLine.class, format);
    if (!AudioSystem.isLineSupported(info)) {
        System.out.println("Line not supported");
        System.exit(0);
    }
    TargetDataLine line = (TargetDataLine) AudioSystem.getLine(info);
    line.open(format);
    line.start();
    AudioInputStream audio = new AudioInputStream(line);
    RecognizeOptions options = new RecognizeOptions.Builder().audio(audio).interimResults(true).timestamps(true).wordConfidence(true).contentType(HttpMediaType.AUDIO_RAW + ";rate=" + sampleRate).build();
    service.recognizeUsingWebSocket(options, new BaseRecognizeCallback() {

        @Override
        public void onTranscription(SpeechRecognitionResults speechResults) {
            System.out.println(speechResults);
        }
    });
    System.out.println("Listening to your voice for the next 30s...");
    Thread.sleep(30 * 1000);
    // closing the WebSockets underlying InputStream will close the WebSocket itself.
    line.stop();
    line.close();
    System.out.println("Fin.");
}
Also used : BaseRecognizeCallback(com.ibm.watson.developer_cloud.speech_to_text.v1.websocket.BaseRecognizeCallback) TargetDataLine(javax.sound.sampled.TargetDataLine) DataLine(javax.sound.sampled.DataLine) TargetDataLine(javax.sound.sampled.TargetDataLine) AudioInputStream(javax.sound.sampled.AudioInputStream) AudioFormat(javax.sound.sampled.AudioFormat) SpeechRecognitionResults(com.ibm.watson.developer_cloud.speech_to_text.v1.model.SpeechRecognitionResults) RecognizeOptions(com.ibm.watson.developer_cloud.speech_to_text.v1.model.RecognizeOptions)

Example 8 with RecognizeOptions

use of com.ibm.watson.speech_to_text.v1.model.RecognizeOptions in project java-sdk by watson-developer-cloud.

the class SpeechToText method recognizeUsingWebSocket.

/**
 * Sends audio and returns transcription results for recognition requests over a WebSocket
 * connection. Requests and responses are enabled over a single TCP connection that abstracts much
 * of the complexity of the request to offer efficient implementation, low latency, high
 * throughput, and an asynchronous response. By default, only final results are returned for any
 * request; to enable interim results, set the interimResults parameter to true.
 *
 * <p>The service imposes a data size limit of 100 MB per utterance (per recognition request). You
 * can send multiple utterances over a single WebSocket connection. The service automatically
 * detects the endianness of the incoming audio and, for audio that includes multiple channels,
 * downmixes the audio to one-channel mono during transcoding. (For the audio/l16 format, you can
 * specify the endianness.)
 *
 * @param recognizeOptions the recognize options
 * @param callback the {@link RecognizeCallback} instance where results will be sent
 * @return the {@link WebSocket}
 */
public WebSocket recognizeUsingWebSocket(RecognizeWithWebsocketsOptions recognizeOptions, RecognizeCallback callback) {
    com.ibm.cloud.sdk.core.util.Validator.notNull(recognizeOptions, "recognizeOptions cannot be null");
    com.ibm.cloud.sdk.core.util.Validator.notNull(recognizeOptions.audio(), "audio cannot be null");
    com.ibm.cloud.sdk.core.util.Validator.notNull(callback, "callback cannot be null");
    HttpUrl.Builder urlBuilder = HttpUrl.parse(getServiceUrl() + "/v1/recognize").newBuilder();
    if (recognizeOptions.model() != null) {
        urlBuilder.addQueryParameter("model", recognizeOptions.model());
    }
    if (recognizeOptions.customizationId() != null) {
        urlBuilder.addQueryParameter("customization_id", recognizeOptions.customizationId());
    }
    if (recognizeOptions.languageCustomizationId() != null) {
        urlBuilder.addQueryParameter("language_customization_id", recognizeOptions.languageCustomizationId());
    }
    if (recognizeOptions.acousticCustomizationId() != null) {
        urlBuilder.addQueryParameter("acoustic_customization_id", recognizeOptions.acousticCustomizationId());
    }
    if (recognizeOptions.baseModelVersion() != null) {
        urlBuilder.addQueryParameter("base_model_version", recognizeOptions.baseModelVersion());
    }
    String url = urlBuilder.toString().replace("https://", "wss://");
    Request.Builder builder = new Request.Builder().url(url);
    setAuthentication(builder);
    setDefaultHeaders(builder);
    OkHttpClient client = configureHttpClient();
    return client.newWebSocket(builder.build(), new SpeechToTextWebSocketListener(recognizeOptions, callback));
}
Also used : OkHttpClient(okhttp3.OkHttpClient) SpeechToTextWebSocketListener(com.ibm.watson.speech_to_text.v1.websocket.SpeechToTextWebSocketListener) Request(okhttp3.Request) HttpUrl(okhttp3.HttpUrl)

Example 9 with RecognizeOptions

use of com.ibm.watson.speech_to_text.v1.model.RecognizeOptions in project java-sdk by watson-developer-cloud.

the class CustomizationExample method main.

/**
 * The main method.
 *
 * @param args the arguments
 * @throws InterruptedException the interrupted exception
 */
public static void main(String[] args) throws InterruptedException, FileNotFoundException {
    Authenticator authenticator = new IamAuthenticator("<iam_api_key>");
    SpeechToText service = new SpeechToText(authenticator);
    // Create language model
    CreateLanguageModelOptions createOptions = new CreateLanguageModelOptions.Builder().name("IEEE-permanent").baseModelName("en-US_BroadbandModel").description("My customization").build();
    LanguageModel myModel = service.createLanguageModel(createOptions).execute().getResult();
    String id = myModel.getCustomizationId();
    try {
        // Add a corpus file to the model
        AddCorpusOptions addOptions = new AddCorpusOptions.Builder().customizationId(id).corpusName("corpus-1").corpusFile(new File(CORPUS_FILE)).allowOverwrite(false).build();
        service.addCorpus(addOptions).execute().getResult();
        // Get corpus status
        GetCorpusOptions getOptions = new GetCorpusOptions.Builder().customizationId(id).corpusName("corpus-1").build();
        for (int x = 0; x < 30 && !service.getCorpus(getOptions).execute().getResult().getStatus().equals(Corpus.Status.ANALYZED); x++) {
            Thread.sleep(5000);
        }
        // Get all corpora
        ListCorporaOptions listCorporaOptions = new ListCorporaOptions.Builder().customizationId(id).build();
        Corpora corpora = service.listCorpora(listCorporaOptions).execute().getResult();
        System.out.println(corpora);
        // Get specific corpus
        Corpus corpus = service.getCorpus(getOptions).execute().getResult();
        System.out.println(corpus);
        // Now add some user words to the custom model
        service.addWord(new AddWordOptions.Builder().customizationId(id).wordName("IEEE").word("IEEE").displayAs("IEEE").addSoundsLike("I. triple E.").build()).execute();
        service.addWord(new AddWordOptions.Builder().customizationId(id).wordName("hhonors").word("hhonors").displayAs("IEEE").addSoundsLike("H. honors").addSoundsLike("Hilton honors").build()).execute();
        // Display all words in the words resource (OOVs from the corpus and
        // new words just added) in ascending alphabetical order
        ListWordsOptions listWordsAlphabeticalOptions = new ListWordsOptions.Builder().customizationId(id).wordType(ListWordsOptions.WordType.ALL).build();
        Words words = service.listWords(listWordsAlphabeticalOptions).execute().getResult();
        System.out.println("\nASCENDING ALPHABETICAL ORDER:");
        System.out.println(words);
        // Then display all words in the words resource in descending order
        // by count
        ListWordsOptions listWordsCountOptions = new ListWordsOptions.Builder().customizationId(id).wordType(ListWordsOptions.WordType.ALL).sort("-" + ListWordsOptions.Sort.COUNT).build();
        words = service.listWords(listWordsCountOptions).execute().getResult();
        System.out.println("\nDESCENDING ORDER BY COUNT:");
        System.out.println(words);
        // Now start training of the model
        TrainLanguageModelOptions trainOptions = new TrainLanguageModelOptions.Builder().customizationId(id).wordTypeToAdd(TrainLanguageModelOptions.WordTypeToAdd.ALL).build();
        service.trainLanguageModel(trainOptions).execute();
        for (int x = 0; x < 30 && !myModel.getStatus().equals(LanguageModel.Status.AVAILABLE); x++) {
            GetLanguageModelOptions getLanguageModelOptions = new GetLanguageModelOptions.Builder().customizationId(id).build();
            myModel = service.getLanguageModel(getLanguageModelOptions).execute().getResult();
            Thread.sleep(10000);
        }
        File audio = new File(AUDIO_FILE);
        RecognizeOptions recognizeOptionsWithModel = new RecognizeOptions.Builder().model(RecognizeOptions.Model.EN_US_BROADBANDMODEL).customizationId(id).audio(audio).contentType(HttpMediaType.AUDIO_WAV).build();
        RecognizeOptions recognizeOptionsWithoutModel = new RecognizeOptions.Builder().model(RecognizeOptions.Model.EN_US_BROADBANDMODEL).audio(audio).contentType(HttpMediaType.AUDIO_WAV).build();
        // First decode WITHOUT the custom model
        SpeechRecognitionResults transcript = service.recognize(recognizeOptionsWithoutModel).execute().getResult();
        System.out.println(transcript);
        // Now decode with the custom model
        transcript = service.recognize(recognizeOptionsWithModel).execute().getResult();
        System.out.println(transcript);
    } finally {
        DeleteLanguageModelOptions deleteOptions = new DeleteLanguageModelOptions.Builder().customizationId(id).build();
        service.deleteLanguageModel(deleteOptions).execute();
    }
}
Also used : IamAuthenticator(com.ibm.cloud.sdk.core.security.IamAuthenticator) CreateLanguageModelOptions(com.ibm.watson.speech_to_text.v1.model.CreateLanguageModelOptions) Corpus(com.ibm.watson.speech_to_text.v1.model.Corpus) DeleteLanguageModelOptions(com.ibm.watson.speech_to_text.v1.model.DeleteLanguageModelOptions) AddWordOptions(com.ibm.watson.speech_to_text.v1.model.AddWordOptions) LanguageModel(com.ibm.watson.speech_to_text.v1.model.LanguageModel) GetCorpusOptions(com.ibm.watson.speech_to_text.v1.model.GetCorpusOptions) IamAuthenticator(com.ibm.cloud.sdk.core.security.IamAuthenticator) Authenticator(com.ibm.cloud.sdk.core.security.Authenticator) AddCorpusOptions(com.ibm.watson.speech_to_text.v1.model.AddCorpusOptions) Corpora(com.ibm.watson.speech_to_text.v1.model.Corpora) GetLanguageModelOptions(com.ibm.watson.speech_to_text.v1.model.GetLanguageModelOptions) Words(com.ibm.watson.speech_to_text.v1.model.Words) ListWordsOptions(com.ibm.watson.speech_to_text.v1.model.ListWordsOptions) TrainLanguageModelOptions(com.ibm.watson.speech_to_text.v1.model.TrainLanguageModelOptions) File(java.io.File) ListCorporaOptions(com.ibm.watson.speech_to_text.v1.model.ListCorporaOptions) SpeechRecognitionResults(com.ibm.watson.speech_to_text.v1.model.SpeechRecognitionResults) RecognizeOptions(com.ibm.watson.speech_to_text.v1.model.RecognizeOptions)

Example 10 with RecognizeOptions

use of com.ibm.watson.speech_to_text.v1.model.RecognizeOptions in project java-sdk by watson-developer-cloud.

the class SpeechToTextIT method testRecognizeFileString.

/**
 * Test recognize audio file.
 *
 * @throws FileNotFoundException the file not found exception
 */
@Test
public void testRecognizeFileString() throws FileNotFoundException {
    Long maxAlternatives = 3L;
    Float wordAlternativesThreshold = 0.8f;
    File audio = new File(SAMPLE_WAV);
    RecognizeOptions options = new RecognizeOptions.Builder().audio(audio).contentType(HttpMediaType.AUDIO_WAV).maxAlternatives(maxAlternatives).wordAlternativesThreshold(wordAlternativesThreshold).smartFormatting(true).build();
    SpeechRecognitionResults results = service.recognize(options).execute().getResult();
    assertNotNull(results.getResults().get(0).getAlternatives().get(0).getTranscript());
    assertTrue(results.getResults().get(0).getAlternatives().size() <= maxAlternatives);
    List<WordAlternativeResults> wordAlternatives = results.getResults().get(0).getWordAlternatives();
    for (WordAlternativeResults alternativeResults : wordAlternatives) {
        assertTrue(alternativeResults.getAlternatives().get(0).getConfidence() >= wordAlternativesThreshold);
    }
}
Also used : WordAlternativeResults(com.ibm.watson.speech_to_text.v1.model.WordAlternativeResults) File(java.io.File) SpeechRecognitionResults(com.ibm.watson.speech_to_text.v1.model.SpeechRecognitionResults) RecognizeOptions(com.ibm.watson.speech_to_text.v1.model.RecognizeOptions) WatsonServiceTest(com.ibm.watson.common.WatsonServiceTest) Test(org.junit.Test)

Aggregations

RecognizeOptions (com.ibm.watson.developer_cloud.speech_to_text.v1.model.RecognizeOptions)18 Test (org.junit.Test)18 SpeechRecognitionResults (com.ibm.watson.developer_cloud.speech_to_text.v1.model.SpeechRecognitionResults)16 File (java.io.File)13 SpeechRecognitionResults (com.ibm.watson.speech_to_text.v1.model.SpeechRecognitionResults)9 FileInputStream (java.io.FileInputStream)9 RecognizeOptions (com.ibm.watson.speech_to_text.v1.model.RecognizeOptions)8 MockResponse (okhttp3.mockwebserver.MockResponse)8 WatsonServiceUnitTest (com.ibm.watson.developer_cloud.WatsonServiceUnitTest)7 RecordedRequest (okhttp3.mockwebserver.RecordedRequest)7 WatsonServiceTest (com.ibm.watson.developer_cloud.WatsonServiceTest)6 WatsonServiceTest (com.ibm.watson.common.WatsonServiceTest)5 BaseRecognizeCallback (com.ibm.watson.developer_cloud.speech_to_text.v1.websocket.BaseRecognizeCallback)5 ByteString (okio.ByteString)5 JsonObject (com.google.gson.JsonObject)4 JsonParser (com.google.gson.JsonParser)4 Authenticator (com.ibm.cloud.sdk.core.security.Authenticator)2 IamAuthenticator (com.ibm.cloud.sdk.core.security.IamAuthenticator)2 NotFoundException (com.ibm.watson.developer_cloud.service.exception.NotFoundException)2 WordAlternativeResults (com.ibm.watson.developer_cloud.speech_to_text.v1.model.WordAlternativeResults)2