use of com.ibm.watson.speech_to_text.v1.model.RecognitionJob in project java-sdk by watson-developer-cloud.
the class SpeechToTextTest method testCheckJob.
/**
* Test check job.
*
* @throws InterruptedException the interrupted exception
* @throws FileNotFoundException the file not found exception
*/
@Test
public void testCheckJob() throws InterruptedException, FileNotFoundException {
String id = "foo";
RecognitionJob job = loadFixture("src/test/resources/speech_to_text/job.json", RecognitionJob.class);
server.enqueue(new MockResponse().addHeader(CONTENT_TYPE, HttpMediaType.APPLICATION_JSON).setBody(GSON.toJson(job)));
CheckJobOptions checkOptions = new CheckJobOptions.Builder().id(id).build();
RecognitionJob result = service.checkJob(checkOptions).execute();
final RecordedRequest request = server.takeRequest();
assertEquals("GET", request.getMethod());
assertEquals(String.format(PATH_RECOGNITION, id), request.getPath());
assertEquals(result.toString(), job.toString());
}
use of com.ibm.watson.speech_to_text.v1.model.RecognitionJob in project java-sdk by watson-developer-cloud.
the class SpeechToTextTest method testCreateJob.
@Test
public void testCreateJob() throws InterruptedException, FileNotFoundException {
String callbackUrl = "callback";
String events = CreateJobOptions.Events.RECOGNITIONS_STARTED;
String userToken = "token";
Long resultsTtl = 5L;
File audio = SAMPLE_WAV;
String contentType = CreateJobOptions.ContentType.AUDIO_WAV;
String model = CreateJobOptions.Model.EN_US_BROADBANDMODEL;
String customizationId = "customizationId";
Double customizationWeight = 5d;
String version = "version";
Long inactivityTimeout = 20L;
List<String> keywords = Arrays.asList("keyword1", "keyword2");
Float keywordsThreshold = 5f;
Boolean wordConfidence = true;
Boolean timestamps = true;
Boolean profanityFilter = true;
Boolean smartFormatting = true;
Boolean speakerLabels = true;
RecognitionJob job = loadFixture("src/test/resources/speech_to_text/job.json", RecognitionJob.class);
server.enqueue(new MockResponse().addHeader(CONTENT_TYPE, HttpMediaType.APPLICATION_JSON).setBody(GSON.toJson(job)));
CreateJobOptions createOptions = new CreateJobOptions.Builder().callbackUrl(callbackUrl).events(events).userToken(userToken).resultsTtl(resultsTtl).audio(audio).contentType(contentType).model(model).customizationId(customizationId).customizationWeight(customizationWeight).version(version).inactivityTimeout(inactivityTimeout).keywords(keywords).keywordsThreshold(keywordsThreshold).wordConfidence(wordConfidence).timestamps(timestamps).profanityFilter(profanityFilter).smartFormatting(smartFormatting).speakerLabels(speakerLabels).build();
service.createJob(createOptions).execute();
final RecordedRequest request = server.takeRequest();
assertEquals("POST", request.getMethod());
assertEquals(PATH_RECOGNITIONS + "?model=" + model + "&callback_url=" + callbackUrl + "&events=" + events + "&user_token=" + userToken + "&results_ttl=" + resultsTtl + "&customization_id=" + customizationId + "&customization_weight=" + customizationWeight + "&version=" + version + "&inactivity_timeout=" + inactivityTimeout + "&keywords=" + StringUtils.join(keywords, ',') + "&keywords_threshold=" + keywordsThreshold + "&word_confidence=" + wordConfidence + "×tamps=" + timestamps + "&profanity_filter=" + profanityFilter + "&smart_formatting=" + smartFormatting + "&speaker_labels=" + speakerLabels, request.getPath());
}
use of com.ibm.watson.speech_to_text.v1.model.RecognitionJob in project java-sdk by watson-developer-cloud.
the class SpeechToText method createJob.
/**
* Create a job.
*
* <p>Creates a job for a new asynchronous recognition request. The job is owned by the instance
* of the service whose credentials are used to create it. How you learn the status and results of
* a job depends on the parameters you include with the job creation request: * By callback
* notification: Include the `callback_url` parameter to specify a URL to which the service is to
* send callback notifications when the status of the job changes. Optionally, you can also
* include the `events` and `user_token` parameters to subscribe to specific events and to specify
* a string that is to be included with each notification for the job. * By polling the service:
* Omit the `callback_url`, `events`, and `user_token` parameters. You must then use the [Check
* jobs](#checkjobs) or [Check a job](#checkjob) methods to check the status of the job, using the
* latter to retrieve the results when the job is complete.
*
* <p>The two approaches are not mutually exclusive. You can poll the service for job status or
* obtain results from the service manually even if you include a callback URL. In both cases, you
* can include the `results_ttl` parameter to specify how long the results are to remain available
* after the job is complete. Using the HTTPS [Check a job](#checkjob) method to retrieve results
* is more secure than receiving them via callback notification over HTTP because it provides
* confidentiality in addition to authentication and data integrity.
*
* <p>The method supports the same basic parameters as other HTTP and WebSocket recognition
* requests. It also supports the following parameters specific to the asynchronous interface: *
* `callback_url` * `events` * `user_token` * `results_ttl`
*
* <p>You can pass a maximum of 1 GB and a minimum of 100 bytes of audio with a request. The
* service automatically detects the endianness of the incoming audio and, for audio that includes
* multiple channels, downmixes the audio to one-channel mono during transcoding. The method
* returns only final results; to enable interim results, use the WebSocket API. (With the `curl`
* command, use the `--data-binary` option to upload the file for the request.)
*
* <p>**See also:** [Creating a
* job](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-async#create).
*
* <p>### Streaming mode
*
* <p>For requests to transcribe live audio as it becomes available, you must set the
* `Transfer-Encoding` header to `chunked` to use streaming mode. In streaming mode, the service
* closes the connection (status code 408) if it does not receive at least 15 seconds of audio
* (including silence) in any 30-second period. The service also closes the connection (status
* code 400) if it detects no speech for `inactivity_timeout` seconds of streaming audio; use the
* `inactivity_timeout` parameter to change the default of 30 seconds.
*
* <p>**See also:** * [Audio
* transmission](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-input#transmission)
* * [Timeouts](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-input#timeouts)
*
* <p>### Audio formats (content types)
*
* <p>The service accepts audio in the following formats (MIME types). * For formats that are
* labeled **Required**, you must use the `Content-Type` header with the request to specify the
* format of the audio. * For all other formats, you can omit the `Content-Type` header or specify
* `application/octet-stream` with the header to have the service automatically detect the format
* of the audio. (With the `curl` command, you can specify either `"Content-Type:"` or
* `"Content-Type: application/octet-stream"`.)
*
* <p>Where indicated, the format that you specify must include the sampling rate and can
* optionally include the number of channels and the endianness of the audio. * `audio/alaw`
* (**Required.** Specify the sampling rate (`rate`) of the audio.) * `audio/basic` (**Required.**
* Use only with narrowband models.) * `audio/flac` * `audio/g729` (Use only with narrowband
* models.) * `audio/l16` (**Required.** Specify the sampling rate (`rate`) and optionally the
* number of channels (`channels`) and endianness (`endianness`) of the audio.) * `audio/mp3` *
* `audio/mpeg` * `audio/mulaw` (**Required.** Specify the sampling rate (`rate`) of the audio.) *
* `audio/ogg` (The service automatically detects the codec of the input audio.) *
* `audio/ogg;codecs=opus` * `audio/ogg;codecs=vorbis` * `audio/wav` (Provide audio with a maximum
* of nine channels.) * `audio/webm` (The service automatically detects the codec of the input
* audio.) * `audio/webm;codecs=opus` * `audio/webm;codecs=vorbis`
*
* <p>The sampling rate of the audio must match the sampling rate of the model for the recognition
* request: for broadband models, at least 16 kHz; for narrowband models, at least 8 kHz. If the
* sampling rate of the audio is higher than the minimum required rate, the service down-samples
* the audio to the appropriate rate. If the sampling rate of the audio is lower than the minimum
* required rate, the request fails.
*
* <p>**See also:** [Supported audio
* formats](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-audio-formats).
*
* <p>### Next-generation models
*
* <p>The service supports next-generation `Multimedia` (16 kHz) and `Telephony` (8 kHz) models
* for many languages. Next-generation models have higher throughput than the service's previous
* generation of `Broadband` and `Narrowband` models. When you use next-generation models, the
* service can return transcriptions more quickly and also provide noticeably better transcription
* accuracy.
*
* <p>You specify a next-generation model by using the `model` query parameter, as you do a
* previous-generation model. Many next-generation models also support the `low_latency`
* parameter, which is not available with previous-generation models. Next-generation models do
* not support all of the parameters that are available for use with previous-generation models.
*
* <p>**Important:** Effective 15 March 2022, previous-generation models for all languages other
* than Arabic and Japanese are deprecated. The deprecated models remain available until 15
* September 2022, when they will be removed from the service and the documentation. You must
* migrate to the equivalent next-generation model by the end of service date. For more
* information, see [Migrating to next-generation
* models](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-models-migrate).
*
* <p>**See also:** * [Next-generation languages and
* models](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-models-ng) * [Supported
* features for next-generation
* models](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-models-ng#models-ng-features).
*
* @param createJobOptions the {@link CreateJobOptions} containing the options for the call
* @return a {@link ServiceCall} with a result of type {@link RecognitionJob}
*/
public ServiceCall<RecognitionJob> createJob(CreateJobOptions createJobOptions) {
com.ibm.cloud.sdk.core.util.Validator.notNull(createJobOptions, "createJobOptions cannot be null");
RequestBuilder builder = RequestBuilder.post(RequestBuilder.resolveRequestUrl(getServiceUrl(), "/v1/recognitions"));
Map<String, String> sdkHeaders = SdkCommon.getSdkHeaders("speech_to_text", "v1", "createJob");
for (Entry<String, String> header : sdkHeaders.entrySet()) {
builder.header(header.getKey(), header.getValue());
}
builder.header("Accept", "application/json");
if (createJobOptions.contentType() != null) {
builder.header("Content-Type", createJobOptions.contentType());
}
if (createJobOptions.model() != null) {
builder.query("model", String.valueOf(createJobOptions.model()));
}
if (createJobOptions.callbackUrl() != null) {
builder.query("callback_url", String.valueOf(createJobOptions.callbackUrl()));
}
if (createJobOptions.events() != null) {
builder.query("events", String.valueOf(createJobOptions.events()));
}
if (createJobOptions.userToken() != null) {
builder.query("user_token", String.valueOf(createJobOptions.userToken()));
}
if (createJobOptions.resultsTtl() != null) {
builder.query("results_ttl", String.valueOf(createJobOptions.resultsTtl()));
}
if (createJobOptions.languageCustomizationId() != null) {
builder.query("language_customization_id", String.valueOf(createJobOptions.languageCustomizationId()));
}
if (createJobOptions.acousticCustomizationId() != null) {
builder.query("acoustic_customization_id", String.valueOf(createJobOptions.acousticCustomizationId()));
}
if (createJobOptions.baseModelVersion() != null) {
builder.query("base_model_version", String.valueOf(createJobOptions.baseModelVersion()));
}
if (createJobOptions.customizationWeight() != null) {
builder.query("customization_weight", String.valueOf(createJobOptions.customizationWeight()));
}
if (createJobOptions.inactivityTimeout() != null) {
builder.query("inactivity_timeout", String.valueOf(createJobOptions.inactivityTimeout()));
}
if (createJobOptions.keywords() != null) {
builder.query("keywords", RequestUtils.join(createJobOptions.keywords(), ","));
}
if (createJobOptions.keywordsThreshold() != null) {
builder.query("keywords_threshold", String.valueOf(createJobOptions.keywordsThreshold()));
}
if (createJobOptions.maxAlternatives() != null) {
builder.query("max_alternatives", String.valueOf(createJobOptions.maxAlternatives()));
}
if (createJobOptions.wordAlternativesThreshold() != null) {
builder.query("word_alternatives_threshold", String.valueOf(createJobOptions.wordAlternativesThreshold()));
}
if (createJobOptions.wordConfidence() != null) {
builder.query("word_confidence", String.valueOf(createJobOptions.wordConfidence()));
}
if (createJobOptions.timestamps() != null) {
builder.query("timestamps", String.valueOf(createJobOptions.timestamps()));
}
if (createJobOptions.profanityFilter() != null) {
builder.query("profanity_filter", String.valueOf(createJobOptions.profanityFilter()));
}
if (createJobOptions.smartFormatting() != null) {
builder.query("smart_formatting", String.valueOf(createJobOptions.smartFormatting()));
}
if (createJobOptions.speakerLabels() != null) {
builder.query("speaker_labels", String.valueOf(createJobOptions.speakerLabels()));
}
if (createJobOptions.customizationId() != null) {
builder.query("customization_id", String.valueOf(createJobOptions.customizationId()));
}
if (createJobOptions.grammarName() != null) {
builder.query("grammar_name", String.valueOf(createJobOptions.grammarName()));
}
if (createJobOptions.redaction() != null) {
builder.query("redaction", String.valueOf(createJobOptions.redaction()));
}
if (createJobOptions.processingMetrics() != null) {
builder.query("processing_metrics", String.valueOf(createJobOptions.processingMetrics()));
}
if (createJobOptions.processingMetricsInterval() != null) {
builder.query("processing_metrics_interval", String.valueOf(createJobOptions.processingMetricsInterval()));
}
if (createJobOptions.audioMetrics() != null) {
builder.query("audio_metrics", String.valueOf(createJobOptions.audioMetrics()));
}
if (createJobOptions.endOfPhraseSilenceTime() != null) {
builder.query("end_of_phrase_silence_time", String.valueOf(createJobOptions.endOfPhraseSilenceTime()));
}
if (createJobOptions.splitTranscriptAtPhraseEnd() != null) {
builder.query("split_transcript_at_phrase_end", String.valueOf(createJobOptions.splitTranscriptAtPhraseEnd()));
}
if (createJobOptions.speechDetectorSensitivity() != null) {
builder.query("speech_detector_sensitivity", String.valueOf(createJobOptions.speechDetectorSensitivity()));
}
if (createJobOptions.backgroundAudioSuppression() != null) {
builder.query("background_audio_suppression", String.valueOf(createJobOptions.backgroundAudioSuppression()));
}
if (createJobOptions.lowLatency() != null) {
builder.query("low_latency", String.valueOf(createJobOptions.lowLatency()));
}
builder.bodyContent(createJobOptions.contentType(), null, null, createJobOptions.audio());
ResponseConverter<RecognitionJob> responseConverter = ResponseConverterUtils.getValue(new com.google.gson.reflect.TypeToken<RecognitionJob>() {
}.getType());
return createServiceCall(builder.build(), responseConverter);
}
use of com.ibm.watson.speech_to_text.v1.model.RecognitionJob in project java-sdk by watson-developer-cloud.
the class SpeechToTextIT method testCreateJobWarning.
/**
* Test create job with a warning message.
*
* <p>This test is currently being ignored as it has a very long runtime and causes Travis to
* timeout. The ignore annotation can be removed to test this locally.
*
* @throws InterruptedException the interrupted exception
* @throws FileNotFoundException the file not found exception
*/
@Ignore
@Test
public void testCreateJobWarning() throws InterruptedException, FileNotFoundException {
File audio = new File(SAMPLE_WAV);
CreateJobOptions createOptions = new CreateJobOptions.Builder().audio(audio).contentType(HttpMediaType.AUDIO_WAV).userToken("job").build();
RecognitionJob job = service.createJob(createOptions).execute().getResult();
try {
assertNotNull(job.getId());
assertNotNull(job.getWarnings());
CheckJobOptions checkOptions = new CheckJobOptions.Builder().id(job.getId()).build();
for (int x = 0; x < 30 && !Objects.equals(job.getStatus(), RecognitionJob.Status.COMPLETED); x++) {
Thread.sleep(3000);
job = service.checkJob(checkOptions).execute().getResult();
}
job = service.checkJob(checkOptions).execute().getResult();
assertEquals(RecognitionJob.Status.COMPLETED, job.getStatus());
assertNotNull(job.getResults());
} finally {
DeleteJobOptions deleteOptions = new DeleteJobOptions.Builder().id(job.getId()).build();
service.deleteJob(deleteOptions).execute();
}
}
use of com.ibm.watson.speech_to_text.v1.model.RecognitionJob in project java-sdk by watson-developer-cloud.
the class SpeechToTextTest method testCheckJobWOptions.
// Test the checkJob operation with a valid options model parameter
@Test
public void testCheckJobWOptions() throws Throwable {
// Register a mock response
String mockResponseBody = "{\"id\": \"id\", \"status\": \"waiting\", \"created\": \"created\", \"updated\": \"updated\", \"url\": \"url\", \"user_token\": \"userToken\", \"results\": [{\"results\": [{\"final\": true, \"alternatives\": [{\"transcript\": \"transcript\", \"confidence\": 0, \"timestamps\": [[\"timestamps\"]], \"word_confidence\": [[\"wordConfidence\"]]}], \"keywords_result\": {\"mapKey\": [{\"normalized_text\": \"normalizedText\", \"start_time\": 9, \"end_time\": 7, \"confidence\": 0}]}, \"word_alternatives\": [{\"start_time\": 9, \"end_time\": 7, \"alternatives\": [{\"confidence\": 0, \"word\": \"word\"}]}], \"end_of_utterance\": \"end_of_data\"}], \"result_index\": 11, \"speaker_labels\": [{\"from\": 4, \"to\": 2, \"speaker\": 7, \"confidence\": 10, \"final\": true}], \"processing_metrics\": {\"processed_audio\": {\"received\": 8, \"seen_by_engine\": 12, \"transcription\": 13, \"speaker_labels\": 13}, \"wall_clock_since_first_byte_received\": 31, \"periodic\": true}, \"audio_metrics\": {\"sampling_interval\": 16, \"accumulated\": {\"final\": true, \"end_time\": 7, \"signal_to_noise_ratio\": 18, \"speech_ratio\": 11, \"high_frequency_loss\": 17, \"direct_current_offset\": [{\"begin\": 5, \"end\": 3, \"count\": 5}], \"clipping_rate\": [{\"begin\": 5, \"end\": 3, \"count\": 5}], \"speech_level\": [{\"begin\": 5, \"end\": 3, \"count\": 5}], \"non_speech_level\": [{\"begin\": 5, \"end\": 3, \"count\": 5}]}}, \"warnings\": [\"warnings\"]}], \"warnings\": [\"warnings\"]}";
String checkJobPath = "/v1/recognitions/testString";
server.enqueue(new MockResponse().setHeader("Content-type", "application/json").setResponseCode(200).setBody(mockResponseBody));
// Construct an instance of the CheckJobOptions model
CheckJobOptions checkJobOptionsModel = new CheckJobOptions.Builder().id("testString").build();
// Invoke checkJob() with a valid options model and verify the result
Response<RecognitionJob> response = speechToTextService.checkJob(checkJobOptionsModel).execute();
assertNotNull(response);
RecognitionJob responseObj = response.getResult();
assertNotNull(responseObj);
// Verify the contents of the request sent to the mock server
RecordedRequest request = server.takeRequest();
assertNotNull(request);
assertEquals(request.getMethod(), "GET");
// Verify request path
String parsedPath = TestUtilities.parseReqPath(request);
assertEquals(parsedPath, checkJobPath);
// Verify that there is no query string
Map<String, String> query = TestUtilities.parseQueryString(request);
assertNull(query);
}
Aggregations