use of com.google.cloud.videointelligence.v1.SpeechRecognitionAlternative in project java-speech by googleapis.
the class Recognize method syncRecognizeFile.
// [START speech_transcribe_sync]
/**
* Performs speech recognition on raw PCM audio and prints the transcription.
*
* @param fileName the path to a PCM audio file to transcribe.
*/
public static void syncRecognizeFile(String fileName) throws Exception {
try (SpeechClient speech = SpeechClient.create()) {
Path path = Paths.get(fileName);
byte[] data = Files.readAllBytes(path);
ByteString audioBytes = ByteString.copyFrom(data);
// Configure request with local raw PCM audio
RecognitionConfig config = RecognitionConfig.newBuilder().setEncoding(AudioEncoding.LINEAR16).setLanguageCode("en-US").setSampleRateHertz(16000).build();
RecognitionAudio audio = RecognitionAudio.newBuilder().setContent(audioBytes).build();
// Use blocking call to get audio transcript
RecognizeResponse response = speech.recognize(config, audio);
List<SpeechRecognitionResult> results = response.getResultsList();
for (SpeechRecognitionResult result : results) {
// There can be several alternative transcripts for a given chunk of speech. Just use the
// first (most likely) one here.
SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0);
System.out.printf("Transcription: %s%n", alternative.getTranscript());
}
}
}
use of com.google.cloud.videointelligence.v1.SpeechRecognitionAlternative in project java-speech by googleapis.
the class Recognize method streamingTranscribeWithAutomaticPunctuation.
// [END speech_transcribe_auto_punctuation]
// [START speech_stream_recognize_punctuation]
/**
* Performs streaming speech recognition on raw PCM audio data.
*
* @param fileName the path to a PCM audio file to transcribe.
*/
public static void streamingTranscribeWithAutomaticPunctuation(String fileName) throws Exception {
Path path = Paths.get(fileName);
byte[] data = Files.readAllBytes(path);
// Instantiates a client with GOOGLE_APPLICATION_CREDENTIALS
try (SpeechClient speech = SpeechClient.create()) {
// Configure request with local raw PCM audio
RecognitionConfig recConfig = RecognitionConfig.newBuilder().setEncoding(AudioEncoding.LINEAR16).setLanguageCode("en-US").setSampleRateHertz(16000).setEnableAutomaticPunctuation(true).build();
// Build the streaming config with the audio config
StreamingRecognitionConfig config = StreamingRecognitionConfig.newBuilder().setConfig(recConfig).build();
class ResponseApiStreamingObserver<T> implements ApiStreamObserver<T> {
private final SettableFuture<List<T>> future = SettableFuture.create();
private final List<T> messages = new java.util.ArrayList<T>();
@Override
public void onNext(T message) {
messages.add(message);
}
@Override
public void onError(Throwable t) {
future.setException(t);
}
@Override
public void onCompleted() {
future.set(messages);
}
// Returns the SettableFuture object to get received messages / exceptions.
public SettableFuture<List<T>> future() {
return future;
}
}
ResponseApiStreamingObserver<StreamingRecognizeResponse> responseObserver = new ResponseApiStreamingObserver<>();
BidiStreamingCallable<StreamingRecognizeRequest, StreamingRecognizeResponse> callable = speech.streamingRecognizeCallable();
ApiStreamObserver<StreamingRecognizeRequest> requestObserver = callable.bidiStreamingCall(responseObserver);
// The first request must **only** contain the audio configuration:
requestObserver.onNext(StreamingRecognizeRequest.newBuilder().setStreamingConfig(config).build());
// Subsequent requests must **only** contain the audio data.
requestObserver.onNext(StreamingRecognizeRequest.newBuilder().setAudioContent(ByteString.copyFrom(data)).build());
// Mark transmission as completed after sending the data.
requestObserver.onCompleted();
List<StreamingRecognizeResponse> responses = responseObserver.future().get();
for (StreamingRecognizeResponse response : responses) {
// For streaming recognize, the results list has one is_final result (if available) followed
// by a number of in-progress results (if iterim_results is true) for subsequent utterances.
// Just print the first result here.
StreamingRecognitionResult result = response.getResultsList().get(0);
// There can be several alternative transcripts for a given chunk of speech. Just use the
// first (most likely) one here.
SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0);
System.out.printf("Transcript : %s\n", alternative.getTranscript());
}
}
}
use of com.google.cloud.videointelligence.v1.SpeechRecognitionAlternative in project java-speech by googleapis.
the class Recognize method syncRecognizeGcs.
// [START speech_transcribe_sync_gcs]
/**
* Performs speech recognition on remote FLAC file and prints the transcription.
*
* @param gcsUri the path to the remote FLAC audio file to transcribe.
*/
public static void syncRecognizeGcs(String gcsUri) throws Exception {
// Instantiates a client with GOOGLE_APPLICATION_CREDENTIALS
try (SpeechClient speech = SpeechClient.create()) {
// Builds the request for remote FLAC file
RecognitionConfig config = RecognitionConfig.newBuilder().setEncoding(AudioEncoding.FLAC).setLanguageCode("en-US").setSampleRateHertz(16000).build();
RecognitionAudio audio = RecognitionAudio.newBuilder().setUri(gcsUri).build();
// Use blocking call for getting audio transcript
RecognizeResponse response = speech.recognize(config, audio);
List<SpeechRecognitionResult> results = response.getResultsList();
for (SpeechRecognitionResult result : results) {
// There can be several alternative transcripts for a given chunk of speech. Just use the
// first (most likely) one here.
SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0);
System.out.printf("Transcription: %s%n", alternative.getTranscript());
}
}
}
use of com.google.cloud.videointelligence.v1.SpeechRecognitionAlternative in project java-speech by googleapis.
the class RecognizeBeta method transcribeDiarization.
// [END speech_transcribe_recognition_metadata_beta]
// [START speech_transcribe_diarization_beta]
/**
* Transcribe the given audio file using speaker diarization.
*
* @param fileName the path to an audio file.
*/
public static void transcribeDiarization(String fileName) throws Exception {
Path path = Paths.get(fileName);
byte[] content = Files.readAllBytes(path);
try (SpeechClient speechClient = SpeechClient.create()) {
// Get the contents of the local audio file
RecognitionAudio recognitionAudio = RecognitionAudio.newBuilder().setContent(ByteString.copyFrom(content)).build();
SpeakerDiarizationConfig speakerDiarizationConfig = SpeakerDiarizationConfig.newBuilder().setEnableSpeakerDiarization(true).setMinSpeakerCount(2).setMaxSpeakerCount(2).build();
// Configure request to enable Speaker diarization
RecognitionConfig config = RecognitionConfig.newBuilder().setEncoding(AudioEncoding.LINEAR16).setLanguageCode("en-US").setSampleRateHertz(8000).setDiarizationConfig(speakerDiarizationConfig).build();
// Perform the transcription request
RecognizeResponse recognizeResponse = speechClient.recognize(config, recognitionAudio);
// Speaker Tags are only included in the last result object, which has only one alternative.
SpeechRecognitionAlternative alternative = recognizeResponse.getResults(recognizeResponse.getResultsCount() - 1).getAlternatives(0);
// The alternative is made up of WordInfo objects that contain the speaker_tag.
WordInfo wordInfo = alternative.getWords(0);
int currentSpeakerTag = wordInfo.getSpeakerTag();
// For each word, get all the words associated with one speaker, once the speaker changes,
// add a new line with the new speaker and their spoken words.
StringBuilder speakerWords = new StringBuilder(String.format("Speaker %d: %s", wordInfo.getSpeakerTag(), wordInfo.getWord()));
for (int i = 1; i < alternative.getWordsCount(); i++) {
wordInfo = alternative.getWords(i);
if (currentSpeakerTag == wordInfo.getSpeakerTag()) {
speakerWords.append(" ");
speakerWords.append(wordInfo.getWord());
} else {
speakerWords.append(String.format("\nSpeaker %d: %s", wordInfo.getSpeakerTag(), wordInfo.getWord()));
currentSpeakerTag = wordInfo.getSpeakerTag();
}
}
System.out.println(speakerWords.toString());
}
}
use of com.google.cloud.videointelligence.v1.SpeechRecognitionAlternative in project java-speech by googleapis.
the class RecognizeBeta method transcribeWordLevelConfidenceGcs.
// [END speech_transcribe_word_level_confidence_beta]
// [START speech_transcribe_word_level_confidence_gcs_beta]
/**
* Transcribe a remote audio file with word level confidence
*
* @param gcsUri path to the remote audio file
*/
public static void transcribeWordLevelConfidenceGcs(String gcsUri) throws Exception {
try (SpeechClient speechClient = SpeechClient.create()) {
// Configure request to enable word level confidence
RecognitionConfig config = RecognitionConfig.newBuilder().setEncoding(AudioEncoding.FLAC).setSampleRateHertz(44100).setLanguageCode("en-US").setEnableWordConfidence(true).build();
// Set the remote path for the audio file
RecognitionAudio audio = RecognitionAudio.newBuilder().setUri(gcsUri).build();
// Use non-blocking call for getting file transcription
OperationFuture<LongRunningRecognizeResponse, LongRunningRecognizeMetadata> response = speechClient.longRunningRecognizeAsync(config, audio);
while (!response.isDone()) {
System.out.println("Waiting for response...");
Thread.sleep(10000);
}
// Just print the first result here.
SpeechRecognitionResult result = response.get().getResultsList().get(0);
// There can be several alternative transcripts for a given chunk of speech. Just use the
// first (most likely) one here.
SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0);
// Print out the result
System.out.printf("Transcript : %s\n", alternative.getTranscript());
System.out.format("First Word and Confidence : %s %s \n", alternative.getWords(0).getWord(), alternative.getWords(0).getConfidence());
}
}
Aggregations