Examples with Corpora - com.ibm.watson.speech_to

Example 1 with Corpora

use of com.ibm.watson.speech_to_text.v1.model.Corpora in project java-sdk by watson-developer-cloud.

the class SpeechToTextIT method testCreateLanguageModel.

/**
 * Test create language model.
 *
 * @throws InterruptedException the interrupted exception
 */
@Test
public void testCreateLanguageModel() throws InterruptedException, FileNotFoundException {
    CreateLanguageModelOptions createOptions = new CreateLanguageModelOptions.Builder().name("java-sdk-temporary").baseModelName(EN_BROADBAND16K).description("Temporary custom model for testing the Java SDK").build();
    LanguageModel myModel = service.createLanguageModel(createOptions).execute();
    String id = myModel.getCustomizationId();
    try {
        // Add a corpus file to the model
        AddCorpusOptions addOptions = new AddCorpusOptions.Builder().customizationId(id).corpusName("corpus-1").corpusFile(new File(String.format(SPEECH_RESOURCE, "corpus1.txt"))).corpusFileContentType(HttpMediaType.TEXT_PLAIN).allowOverwrite(false).build();
        service.addCorpus(addOptions).execute();
        // Get corpus status
        GetCorpusOptions getOptions = new GetCorpusOptions.Builder().customizationId(id).corpusName("corpus-1").build();
        for (int x = 0; x < 30 && !service.getCorpus(getOptions).execute().getStatus().equals(Status.ANALYZED); x++) {
            Thread.sleep(5000);
        }
        assertTrue(service.getCorpus(getOptions).execute().getStatus().equals(Status.ANALYZED));
        // Add the corpus file to the model again and allow overwrite
        AddCorpusOptions addOptionsWithOverwrite = new AddCorpusOptions.Builder().customizationId(id).corpusName("corpus-1").corpusFile(new File(String.format(SPEECH_RESOURCE, "corpus1.txt"))).corpusFileContentType(HttpMediaType.TEXT_PLAIN).allowOverwrite(true).build();
        service.addCorpus(addOptionsWithOverwrite).execute();
        // Get corpus status
        for (int x = 0; x < 30 && !service.getCorpus(getOptions).execute().getStatus().equals(Status.ANALYZED); x++) {
            Thread.sleep(5000);
        }
        assertTrue(service.getCorpus(getOptions).execute().getStatus().equals(Status.ANALYZED));
        // Get corpora
        ListCorporaOptions listCorporaOptions = new ListCorporaOptions.Builder().customizationId(id).build();
        Corpora corpora = service.listCorpora(listCorporaOptions).execute();
        assertNotNull(corpora);
        assertTrue(corpora.getCorpora().size() == 1);
        // Now add some user words to the custom model
        service.addWord(new AddWordOptions.Builder().customizationId(id).wordName("IEEE").word("IEEE").displayAs("IEEE").addSoundsLike("I. triple E.").build()).execute();
        service.addWord(new AddWordOptions.Builder().customizationId(id).wordName("hhonors").word("hhonors").displayAs("IEEE").addSoundsLike("H. honors").addSoundsLike("Hilton honors").build()).execute();
        service.addWord(new AddWordOptions.Builder().customizationId(id).wordName("aaa").word("aaa").displayAs("aaa").addSoundsLike("aaa").addSoundsLike("bbb").build()).execute();
        service.addWord(new AddWordOptions.Builder().customizationId(id).wordName("bbb").word("bbb").addSoundsLike("aaa").addSoundsLike("bbb").build()).execute();
        service.addWord(new AddWordOptions.Builder().customizationId(id).wordName("ccc").word("ccc").displayAs("ccc").build()).execute();
        service.addWord(new AddWordOptions.Builder().customizationId(id).wordName("ddd").word("ddd").build()).execute();
        service.addWord(new AddWordOptions.Builder().customizationId(id).wordName("eee").word("eee").build()).execute();
        // Display all words in the words resource (coming from OOVs from the corpus add and the new words just added)
        ListWordsOptions listWordsOptions = new ListWordsOptions.Builder().customizationId(id).wordType(ListWordsOptions.WordType.ALL).build();
        Words words = service.listWords(listWordsOptions).execute();
        assertNotNull(words);
    } finally {
        DeleteLanguageModelOptions deleteOptions = new DeleteLanguageModelOptions.Builder().customizationId(id).build();
        service.deleteLanguageModel(deleteOptions).execute();
    }
}

Also used : CreateLanguageModelOptions(com.ibm.watson.developer_cloud.speech_to_text.v1.model.CreateLanguageModelOptions) AddCorpusOptions(com.ibm.watson.developer_cloud.speech_to_text.v1.model.AddCorpusOptions) DeleteLanguageModelOptions(com.ibm.watson.developer_cloud.speech_to_text.v1.model.DeleteLanguageModelOptions) AddWordOptions(com.ibm.watson.developer_cloud.speech_to_text.v1.model.AddWordOptions) Corpora(com.ibm.watson.developer_cloud.speech_to_text.v1.model.Corpora) Words(com.ibm.watson.developer_cloud.speech_to_text.v1.model.Words) ListWordsOptions(com.ibm.watson.developer_cloud.speech_to_text.v1.model.ListWordsOptions) LanguageModel(com.ibm.watson.developer_cloud.speech_to_text.v1.model.LanguageModel) GetCorpusOptions(com.ibm.watson.developer_cloud.speech_to_text.v1.model.GetCorpusOptions) File(java.io.File) ListCorporaOptions(com.ibm.watson.developer_cloud.speech_to_text.v1.model.ListCorporaOptions) WatsonServiceTest(com.ibm.watson.developer_cloud.WatsonServiceTest) Test(org.junit.Test)

Example 2 with Corpora

use of com.ibm.watson.speech_to_text.v1.model.Corpora in project java-sdk by watson-developer-cloud.

the class SpeechToTextTest method testListCorpora.

/**
 * Test list corpora.
 *
 * @throws InterruptedException the interrupted exception
 * @throws FileNotFoundException the file not found exception
 */
@Test
public void testListCorpora() throws InterruptedException, FileNotFoundException {
    String id = "foo";
    String corporaAsString = getStringFromInputStream(new FileInputStream("src/test/resources/speech_to_text/corpora.json"));
    JsonObject corpora = new JsonParser().parse(corporaAsString).getAsJsonObject();
    server.enqueue(new MockResponse().addHeader(CONTENT_TYPE, HttpMediaType.APPLICATION_JSON).setBody(corporaAsString));
    ListCorporaOptions listOptions = new ListCorporaOptions.Builder().customizationId(id).build();
    Corpora result = service.listCorpora(listOptions).execute();
    final RecordedRequest request = server.takeRequest();
    assertEquals("GET", request.getMethod());
    assertEquals(String.format(PATH_CORPORA, id), request.getPath());
    assertEquals(corpora.get("corpora"), GSON.toJsonTree(result.getCorpora()));
}

Also used : RecordedRequest(okhttp3.mockwebserver.RecordedRequest) MockResponse(okhttp3.mockwebserver.MockResponse) Corpora(com.ibm.watson.developer_cloud.speech_to_text.v1.model.Corpora) JsonObject(com.google.gson.JsonObject) ByteString(okio.ByteString) FileInputStream(java.io.FileInputStream) ListCorporaOptions(com.ibm.watson.developer_cloud.speech_to_text.v1.model.ListCorporaOptions) JsonParser(com.google.gson.JsonParser) WatsonServiceUnitTest(com.ibm.watson.developer_cloud.WatsonServiceUnitTest) Test(org.junit.Test)

Example 3 with Corpora

use of com.ibm.watson.speech_to_text.v1.model.Corpora in project java-sdk by watson-developer-cloud.

the class SpeechToText method listCorpora.

/**
 * Lists information about all corpora for a custom language model.
 *
 * Lists information about all corpora from a custom language model. The information includes the total number of
 * words and out-of-vocabulary (OOV) words, name, and status of each corpus. You must use credentials for the instance
 * of the service that owns a model to list its corpora.
 *
 * @param listCorporaOptions the {@link ListCorporaOptions} containing the options for the call
 * @return a {@link ServiceCall} with a response type of {@link Corpora}
 */
public ServiceCall<Corpora> listCorpora(ListCorporaOptions listCorporaOptions) {
    Validator.notNull(listCorporaOptions, "listCorporaOptions cannot be null");
    String[] pathSegments = { "v1/customizations", "corpora" };
    String[] pathParameters = { listCorporaOptions.customizationId() };
    RequestBuilder builder = RequestBuilder.get(RequestBuilder.constructHttpUrl(getEndPoint(), pathSegments, pathParameters));
    return createServiceCall(builder.build(), ResponseConverterUtils.getObject(Corpora.class));
}

Also used : Corpora(com.ibm.watson.developer_cloud.speech_to_text.v1.model.Corpora) RequestBuilder(com.ibm.watson.developer_cloud.http.RequestBuilder)

Example 4 with Corpora

use of com.ibm.watson.speech_to_text.v1.model.Corpora in project java-sdk by watson-developer-cloud.

the class CustomizationExample method main.

/**
 * The main method.
 *
 * @param args the arguments
 * @throws InterruptedException the interrupted exception
 */
public static void main(String[] args) throws InterruptedException {
    SpeechToText service = new SpeechToText();
    service.setUsernameAndPassword("<username>", "<password>");
    // Create language model
    CreateLanguageModelOptions createOptions = new CreateLanguageModelOptions.Builder().name("IEEE-permanent").baseModelName("en-US_BroadbandModel").description("My customization").build();
    LanguageModel myModel = service.createLanguageModel(createOptions).execute();
    String id = myModel.getCustomizationId();
    try {
        // Add a corpus file to the model
        AddCorpusOptions addOptions = new AddCorpusOptions.Builder().customizationId(id).corpusName("corpus-1").corpusFile(new File(CORPUS_FILE)).corpusFileContentType(HttpMediaType.TEXT_PLAIN).allowOverwrite(false).build();
        service.addCorpus(addOptions).execute();
        // Get corpus status
        GetCorpusOptions getOptions = new GetCorpusOptions.Builder().customizationId(id).corpusName("corpus-1").build();
        for (int x = 0; x < 30 && (service.getCorpus(getOptions).execute()).getStatus() != Status.ANALYZED; x++) {
            Thread.sleep(5000);
        }
        // Get all corpora
        ListCorporaOptions listCorporaOptions = new ListCorporaOptions.Builder().customizationId(id).build();
        Corpora corpora = service.listCorpora(listCorporaOptions).execute();
        System.out.println(corpora);
        // Get specific corpus
        Corpus corpus = service.getCorpus(getOptions).execute();
        System.out.println(corpus);
        // Now add some user words to the custom model
        service.addWord(new AddWordOptions.Builder().customizationId(id).wordName("IEEE").word("IEEE").displayAs("IEEE").addSoundsLike("I. triple E.").build()).execute();
        service.addWord(new AddWordOptions.Builder().customizationId(id).wordName("hhonors").word("hhonors").displayAs("IEEE").addSoundsLike("H. honors").addSoundsLike("Hilton honors").build()).execute();
        // Display all words in the words resource (OOVs from the corpus and
        // new words just added) in ascending alphabetical order
        ListWordsOptions listWordsAlphabeticalOptions = new ListWordsOptions.Builder().customizationId(id).wordType(ListWordsOptions.WordType.ALL).build();
        Words words = service.listWords(listWordsAlphabeticalOptions).execute();
        System.out.println("\nASCENDING ALPHABETICAL ORDER:");
        System.out.println(words);
        // Then display all words in the words resource in descending order
        // by count
        ListWordsOptions listWordsCountOptions = new ListWordsOptions.Builder().customizationId(id).wordType(ListWordsOptions.WordType.ALL).sort("-" + ListWordsOptions.Sort.COUNT).build();
        words = service.listWords(listWordsCountOptions).execute();
        System.out.println("\nDESCENDING ORDER BY COUNT:");
        System.out.println(words);
        // Now start training of the model
        TrainLanguageModelOptions trainOptions = new TrainLanguageModelOptions.Builder().customizationId(id).wordTypeToAdd(TrainLanguageModelOptions.WordTypeToAdd.ALL).build();
        service.trainLanguageModel(trainOptions).execute();
        for (int x = 0; x < 30 && myModel.getStatus() != LanguageModel.Status.AVAILABLE; x++) {
            GetLanguageModelOptions getOptions = new GetLanguageModelOptions.Builder().customizationId(id).build();
            myModel = service.getLanguageModel(getOptions).execute();
            Thread.sleep(10000);
        }
        File audio = new File(AUDIO_FILE);
        RecognizeOptions recognizeOptionsWithModel = new RecognizeOptions.Builder().model(RecognizeOptions.EN_US_BROADBANDMODEL).customizationId(id).audio(audio).contentType(HttpMediaType.AUDIO_WAV).build();
        RecognizeOptions recognizeOptionsWithoutModel = new RecognizeOptions.Builder().model(RecognizeOptions.EN_US_BROADBANDMODEL).audio(audio).contentType(HttpMediaType.AUDIO_WAV).build();
        // First decode WITHOUT the custom model
        SpeechRecognitionResults transcript = service.recognize(recognizeOptionsWithoutModel).execute();
        System.out.println(transcript);
        // Now decode with the custom model
        transcript = service.recognize(recognizeOptionsWithModel).execute();
        System.out.println(transcript);
    } finally {
        DeleteLanguageModelOptions deleteOptions = new DeleteLanguageModelOptions.Builder().customizationId(id).build();
        service.deleteLanguageModel(deleteOptions).execute();
    }
}

Also used : CreateLanguageModelOptions(com.ibm.watson.developer_cloud.speech_to_text.v1.model.CreateLanguageModelOptions) Corpus(com.ibm.watson.developer_cloud.speech_to_text.v1.model.Corpus) DeleteLanguageModelOptions(com.ibm.watson.developer_cloud.speech_to_text.v1.model.DeleteLanguageModelOptions) AddWordOptions(com.ibm.watson.developer_cloud.speech_to_text.v1.model.AddWordOptions) LanguageModel(com.ibm.watson.developer_cloud.speech_to_text.v1.model.LanguageModel) GetCorpusOptions(com.ibm.watson.developer_cloud.speech_to_text.v1.model.GetCorpusOptions) AddCorpusOptions(com.ibm.watson.developer_cloud.speech_to_text.v1.model.AddCorpusOptions) Corpora(com.ibm.watson.developer_cloud.speech_to_text.v1.model.Corpora) GetLanguageModelOptions(com.ibm.watson.developer_cloud.speech_to_text.v1.model.GetLanguageModelOptions) Words(com.ibm.watson.developer_cloud.speech_to_text.v1.model.Words) ListWordsOptions(com.ibm.watson.developer_cloud.speech_to_text.v1.model.ListWordsOptions) TrainLanguageModelOptions(com.ibm.watson.developer_cloud.speech_to_text.v1.model.TrainLanguageModelOptions) File(java.io.File) ListCorporaOptions(com.ibm.watson.developer_cloud.speech_to_text.v1.model.ListCorporaOptions) SpeechRecognitionResults(com.ibm.watson.developer_cloud.speech_to_text.v1.model.SpeechRecognitionResults) RecognizeOptions(com.ibm.watson.developer_cloud.speech_to_text.v1.model.RecognizeOptions)

Example 5 with Corpora

use of com.ibm.watson.speech_to_text.v1.model.Corpora in project java-sdk by watson-developer-cloud.

the class SpeechToText method listWords.

/**
 * List custom words.
 *
 * <p>Lists information about custom words from a custom language model. You can list all words
 * from the custom model's words resource, only custom words that were added or modified by the
 * user, or, _for a custom model that is based on a previous-generation model_, only
 * out-of-vocabulary (OOV) words that were extracted from corpora or are recognized by grammars.
 * You can also indicate the order in which the service is to return words; by default, the
 * service lists words in ascending alphabetical order. You must use credentials for the instance
 * of the service that owns a model to list information about its words.
 *
 * <p>**See also:** [Listing words from a custom language
 * model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-manageWords#listWords).
 *
 * @param listWordsOptions the {@link ListWordsOptions} containing the options for the call
 * @return a {@link ServiceCall} with a result of type {@link Words}
 */
public ServiceCall<Words> listWords(ListWordsOptions listWordsOptions) {
    com.ibm.cloud.sdk.core.util.Validator.notNull(listWordsOptions, "listWordsOptions cannot be null");
    Map<String, String> pathParamsMap = new HashMap<String, String>();
    pathParamsMap.put("customization_id", listWordsOptions.customizationId());
    RequestBuilder builder = RequestBuilder.get(RequestBuilder.resolveRequestUrl(getServiceUrl(), "/v1/customizations/{customization_id}/words", pathParamsMap));
    Map<String, String> sdkHeaders = SdkCommon.getSdkHeaders("speech_to_text", "v1", "listWords");
    for (Entry<String, String> header : sdkHeaders.entrySet()) {
        builder.header(header.getKey(), header.getValue());
    }
    builder.header("Accept", "application/json");
    if (listWordsOptions.wordType() != null) {
        builder.query("word_type", String.valueOf(listWordsOptions.wordType()));
    }
    if (listWordsOptions.sort() != null) {
        builder.query("sort", String.valueOf(listWordsOptions.sort()));
    }
    ResponseConverter<Words> responseConverter = ResponseConverterUtils.getValue(new com.google.gson.reflect.TypeToken<Words>() {
    }.getType());
    return createServiceCall(builder.build(), responseConverter);
}

Also used : RequestBuilder(com.ibm.cloud.sdk.core.http.RequestBuilder) HashMap(java.util.HashMap) Words(com.ibm.watson.speech_to_text.v1.model.Words)

Aggregations

RequestBuilder (com.ibm.cloud.sdk.core.http.RequestBuilder)5 Corpora (com.ibm.watson.developer_cloud.speech_to_text.v1.model.Corpora)5 Corpora (com.ibm.watson.speech_to_text.v1.model.Corpora)5 HashMap (java.util.HashMap)5 MockResponse (okhttp3.mockwebserver.MockResponse)5 RecordedRequest (okhttp3.mockwebserver.RecordedRequest)5 Test (org.junit.Test)5 ListCorporaOptions (com.ibm.watson.developer_cloud.speech_to_text.v1.model.ListCorporaOptions)4 ListCorporaOptions (com.ibm.watson.speech_to_text.v1.model.ListCorporaOptions)4 File (java.io.File)4 Test (org.testng.annotations.Test)4 AddCorpusOptions (com.ibm.watson.speech_to_text.v1.model.AddCorpusOptions)3 Corpus (com.ibm.watson.speech_to_text.v1.model.Corpus)3 GetCorpusOptions (com.ibm.watson.speech_to_text.v1.model.GetCorpusOptions)3 Words (com.ibm.watson.speech_to_text.v1.model.Words)3 Ignore (org.junit.Ignore)3 WatsonServiceTest (com.ibm.watson.common.WatsonServiceTest)2 WatsonServiceTest (com.ibm.watson.developer_cloud.WatsonServiceTest)2 AddCorpusOptions (com.ibm.watson.developer_cloud.speech_to_text.v1.model.AddCorpusOptions)2 AddWordOptions (com.ibm.watson.developer_cloud.speech_to_text.v1.model.AddWordOptions)2