use of com.google.cloud.speech.v1.LongRunningRecognizeMetadata in project java-speech by googleapis.
the class Recognize method asyncRecognizeGcs.
// [END speech_transcribe_async_word_time_offsets_gcs]
// [START speech_transcribe_async_gcs]
/**
* Performs non-blocking speech recognition on remote FLAC file and prints the transcription.
*
* @param gcsUri the path to the remote LINEAR16 audio file to transcribe.
*/
public static void asyncRecognizeGcs(String gcsUri) throws Exception {
// Configure polling algorithm
SpeechSettings.Builder speechSettings = SpeechSettings.newBuilder();
TimedRetryAlgorithm timedRetryAlgorithm = OperationTimedPollAlgorithm.create(RetrySettings.newBuilder().setInitialRetryDelay(Duration.ofMillis(500L)).setRetryDelayMultiplier(1.5).setMaxRetryDelay(Duration.ofMillis(5000L)).setInitialRpcTimeout(// ignored
Duration.ZERO).setRpcTimeoutMultiplier(// ignored
1.0).setMaxRpcTimeout(// ignored
Duration.ZERO).setTotalTimeout(// set polling timeout to 24 hours
Duration.ofHours(24L)).build());
speechSettings.longRunningRecognizeOperationSettings().setPollingAlgorithm(timedRetryAlgorithm);
// Instantiates a client with GOOGLE_APPLICATION_CREDENTIALS
try (SpeechClient speech = SpeechClient.create(speechSettings.build())) {
// Configure remote file request for FLAC
RecognitionConfig config = RecognitionConfig.newBuilder().setEncoding(AudioEncoding.FLAC).setLanguageCode("en-US").setSampleRateHertz(16000).build();
RecognitionAudio audio = RecognitionAudio.newBuilder().setUri(gcsUri).build();
// Use non-blocking call for getting file transcription
OperationFuture<LongRunningRecognizeResponse, LongRunningRecognizeMetadata> response = speech.longRunningRecognizeAsync(config, audio);
while (!response.isDone()) {
System.out.println("Waiting for response...");
Thread.sleep(10000);
}
List<SpeechRecognitionResult> results = response.get().getResultsList();
for (SpeechRecognitionResult result : results) {
// There can be several alternative transcripts for a given chunk of speech. Just use the
// first (most likely) one here.
SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0);
System.out.printf("Transcription: %s\n", alternative.getTranscript());
}
}
}
use of com.google.cloud.speech.v1.LongRunningRecognizeMetadata in project java-speech by googleapis.
the class Recognize method transcribeModelSelectionGcs.
// [END speech_transcribe_model_selection]
// [START speech_transcribe_model_selection_gcs]
/**
* Performs transcription of the remote audio file asynchronously with the selected model.
*
* @param gcsUri the path to the remote audio file to transcribe.
*/
public static void transcribeModelSelectionGcs(String gcsUri) throws Exception {
try (SpeechClient speech = SpeechClient.create()) {
// Configure request with video media type
RecognitionConfig config = RecognitionConfig.newBuilder().setEncoding(AudioEncoding.LINEAR16).setLanguageCode("en-US").setSampleRateHertz(16000).setModel("video").build();
RecognitionAudio audio = RecognitionAudio.newBuilder().setUri(gcsUri).build();
// Use non-blocking call for getting file transcription
OperationFuture<LongRunningRecognizeResponse, LongRunningRecognizeMetadata> response = speech.longRunningRecognizeAsync(config, audio);
while (!response.isDone()) {
System.out.println("Waiting for response...");
Thread.sleep(10000);
}
List<SpeechRecognitionResult> results = response.get().getResultsList();
// Just print the first result here.
SpeechRecognitionResult result = results.get(0);
// There can be several alternative transcripts for a given chunk of speech. Just use the
// first (most likely) one here.
SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0);
System.out.printf("Transcript : %s\n", alternative.getTranscript());
}
}
use of com.google.cloud.speech.v1.LongRunningRecognizeMetadata in project java-speech by googleapis.
the class RecognizeBeta method transcribeMultiChannelGcs.
// [END speech_transcribe_multichannel_beta]
// [START speech_transcribe_multichannel_gcs_beta]
/**
* Transcribe a remote audio file with multi-channel recognition
*
* @param gcsUri the path to the audio file
*/
public static void transcribeMultiChannelGcs(String gcsUri) throws Exception {
try (SpeechClient speechClient = SpeechClient.create()) {
// Configure request to enable multiple channels
RecognitionConfig config = RecognitionConfig.newBuilder().setEncoding(AudioEncoding.LINEAR16).setLanguageCode("en-US").setSampleRateHertz(44100).setAudioChannelCount(2).setEnableSeparateRecognitionPerChannel(true).build();
// Set the remote path for the audio file
RecognitionAudio audio = RecognitionAudio.newBuilder().setUri(gcsUri).build();
// Use non-blocking call for getting file transcription
OperationFuture<LongRunningRecognizeResponse, LongRunningRecognizeMetadata> response = speechClient.longRunningRecognizeAsync(config, audio);
while (!response.isDone()) {
System.out.println("Waiting for response...");
Thread.sleep(10000);
}
// Just print the first result here.
for (SpeechRecognitionResult result : response.get().getResultsList()) {
// There can be several alternative transcripts for a given chunk of speech. Just use the
// first (most likely) one here.
SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0);
// Print out the result
System.out.printf("Transcript : %s\n", alternative.getTranscript());
System.out.printf("Channel Tag : %s\n\n", result.getChannelTag());
}
}
}
use of com.google.cloud.speech.v1.LongRunningRecognizeMetadata in project java-speech by googleapis.
the class RecognizeBeta method transcribeDiarizationGcs.
// [END speech_transcribe_diarization_beta]
// [START speech_transcribe_diarization_gcs_beta]
/**
* Transcribe a remote audio file using speaker diarization.
*
* @param gcsUri the path to an audio file.
*/
public static void transcribeDiarizationGcs(String gcsUri) throws Exception {
try (SpeechClient speechClient = SpeechClient.create()) {
SpeakerDiarizationConfig speakerDiarizationConfig = SpeakerDiarizationConfig.newBuilder().setEnableSpeakerDiarization(true).setMinSpeakerCount(2).setMaxSpeakerCount(2).build();
// Configure request to enable Speaker diarization
RecognitionConfig config = RecognitionConfig.newBuilder().setEncoding(AudioEncoding.LINEAR16).setLanguageCode("en-US").setSampleRateHertz(8000).setDiarizationConfig(speakerDiarizationConfig).build();
// Set the remote path for the audio file
RecognitionAudio audio = RecognitionAudio.newBuilder().setUri(gcsUri).build();
// Use non-blocking call for getting file transcription
OperationFuture<LongRunningRecognizeResponse, LongRunningRecognizeMetadata> response = speechClient.longRunningRecognizeAsync(config, audio);
while (!response.isDone()) {
System.out.println("Waiting for response...");
Thread.sleep(10000);
}
// Speaker Tags are only included in the last result object, which has only one alternative.
LongRunningRecognizeResponse longRunningRecognizeResponse = response.get();
SpeechRecognitionAlternative alternative = longRunningRecognizeResponse.getResults(longRunningRecognizeResponse.getResultsCount() - 1).getAlternatives(0);
// The alternative is made up of WordInfo objects that contain the speaker_tag.
WordInfo wordInfo = alternative.getWords(0);
int currentSpeakerTag = wordInfo.getSpeakerTag();
// For each word, get all the words associated with one speaker, once the speaker changes,
// add a new line with the new speaker and their spoken words.
StringBuilder speakerWords = new StringBuilder(String.format("Speaker %d: %s", wordInfo.getSpeakerTag(), wordInfo.getWord()));
for (int i = 1; i < alternative.getWordsCount(); i++) {
wordInfo = alternative.getWords(i);
if (currentSpeakerTag == wordInfo.getSpeakerTag()) {
speakerWords.append(" ");
speakerWords.append(wordInfo.getWord());
} else {
speakerWords.append(String.format("\nSpeaker %d: %s", wordInfo.getSpeakerTag(), wordInfo.getWord()));
currentSpeakerTag = wordInfo.getSpeakerTag();
}
}
System.out.println(speakerWords.toString());
}
}
use of com.google.cloud.speech.v1.LongRunningRecognizeMetadata in project java-speech by googleapis.
the class RecognizeBeta method transcribeMultiLanguageGcs.
// [END speech_transcribe_multilanguage_beta]
// [START speech_transcribe_multilanguage_gcs_beta]
/**
* Transcribe a remote audio file with multi-language recognition
*
* @param gcsUri the path to the remote audio file
*/
public static void transcribeMultiLanguageGcs(String gcsUri) throws Exception {
try (SpeechClient speechClient = SpeechClient.create()) {
ArrayList<String> languageList = new ArrayList<>();
languageList.add("es-ES");
languageList.add("en-US");
// Configure request to enable multiple languages
RecognitionConfig config = RecognitionConfig.newBuilder().setEncoding(AudioEncoding.LINEAR16).setSampleRateHertz(16000).setLanguageCode("ja-JP").addAllAlternativeLanguageCodes(languageList).build();
// Set the remote path for the audio file
RecognitionAudio audio = RecognitionAudio.newBuilder().setUri(gcsUri).build();
// Use non-blocking call for getting file transcription
OperationFuture<LongRunningRecognizeResponse, LongRunningRecognizeMetadata> response = speechClient.longRunningRecognizeAsync(config, audio);
while (!response.isDone()) {
System.out.println("Waiting for response...");
Thread.sleep(10000);
}
for (SpeechRecognitionResult result : response.get().getResultsList()) {
// There can be several alternative transcripts for a given chunk of speech. Just use the
// first (most likely) one here.
SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0);
// Print out the result
System.out.printf("Transcript : %s\n\n", alternative.getTranscript());
}
}
}
Aggregations