use of com.google.cloud.videointelligence.v1.SpeechRecognitionAlternative in project java-speech by googleapis.
the class RecognizeBeta method transcribeMultiChannel.
// [END speech_transcribe_diarization_gcs_beta]
// [START speech_transcribe_multichannel_beta]
/**
* Transcribe a local audio file with multi-channel recognition
*
* @param fileName the path to local audio file
*/
public static void transcribeMultiChannel(String fileName) throws Exception {
Path path = Paths.get(fileName);
byte[] content = Files.readAllBytes(path);
try (SpeechClient speechClient = SpeechClient.create()) {
// Get the contents of the local audio file
RecognitionAudio recognitionAudio = RecognitionAudio.newBuilder().setContent(ByteString.copyFrom(content)).build();
// Configure request to enable multiple channels
RecognitionConfig config = RecognitionConfig.newBuilder().setEncoding(AudioEncoding.LINEAR16).setLanguageCode("en-US").setSampleRateHertz(44100).setAudioChannelCount(2).setEnableSeparateRecognitionPerChannel(true).build();
// Perform the transcription request
RecognizeResponse recognizeResponse = speechClient.recognize(config, recognitionAudio);
// Print out the results
for (SpeechRecognitionResult result : recognizeResponse.getResultsList()) {
// There can be several alternative transcripts for a given chunk of speech. Just use the
// first (most likely) one here.
SpeechRecognitionAlternative alternative = result.getAlternatives(0);
System.out.format("Transcript : %s\n", alternative.getTranscript());
System.out.printf("Channel Tag : %s\n\n", result.getChannelTag());
}
}
}
use of com.google.cloud.videointelligence.v1.SpeechRecognitionAlternative in project java-speech by googleapis.
the class RecognizeBeta method transcribeFileWithMetadata.
// [START speech_transcribe_recognition_metadata_beta]
/**
* Transcribe the given audio file and include recognition metadata in the request.
*
* @param fileName the path to an audio file.
*/
public static void transcribeFileWithMetadata(String fileName) throws Exception {
Path path = Paths.get(fileName);
byte[] content = Files.readAllBytes(path);
try (SpeechClient speechClient = SpeechClient.create()) {
// Get the contents of the local audio file
RecognitionAudio recognitionAudio = RecognitionAudio.newBuilder().setContent(ByteString.copyFrom(content)).build();
// Construct a recognition metadata object.
// Most metadata fields are specified as enums that can be found
// in speech.enums.RecognitionMetadata
RecognitionMetadata metadata = RecognitionMetadata.newBuilder().setInteractionType(InteractionType.DISCUSSION).setMicrophoneDistance(MicrophoneDistance.NEARFIELD).setRecordingDeviceType(RecordingDeviceType.SMARTPHONE).setRecordingDeviceName(// Some metadata fields are free form strings
"Pixel 2 XL").setIndustryNaicsCodeOfAudio(519190).build();
// Configure request to enable enhanced models
RecognitionConfig config = RecognitionConfig.newBuilder().setEncoding(AudioEncoding.LINEAR16).setLanguageCode("en-US").setSampleRateHertz(8000).setMetadata(// Add the metadata to the config
metadata).build();
// Perform the transcription request
RecognizeResponse recognizeResponse = speechClient.recognize(config, recognitionAudio);
// Print out the results
for (SpeechRecognitionResult result : recognizeResponse.getResultsList()) {
// There can be several alternative transcripts for a given chunk of speech. Just use the
// first (most likely) one here.
SpeechRecognitionAlternative alternative = result.getAlternatives(0);
System.out.format("Transcript: %s\n\n", alternative.getTranscript());
}
}
}
use of com.google.cloud.videointelligence.v1.SpeechRecognitionAlternative in project java-speech by googleapis.
the class RecognizeBeta method transcribeMultiChannelGcs.
// [END speech_transcribe_multichannel_beta]
// [START speech_transcribe_multichannel_gcs_beta]
/**
* Transcribe a remote audio file with multi-channel recognition
*
* @param gcsUri the path to the audio file
*/
public static void transcribeMultiChannelGcs(String gcsUri) throws Exception {
try (SpeechClient speechClient = SpeechClient.create()) {
// Configure request to enable multiple channels
RecognitionConfig config = RecognitionConfig.newBuilder().setEncoding(AudioEncoding.LINEAR16).setLanguageCode("en-US").setSampleRateHertz(44100).setAudioChannelCount(2).setEnableSeparateRecognitionPerChannel(true).build();
// Set the remote path for the audio file
RecognitionAudio audio = RecognitionAudio.newBuilder().setUri(gcsUri).build();
// Use non-blocking call for getting file transcription
OperationFuture<LongRunningRecognizeResponse, LongRunningRecognizeMetadata> response = speechClient.longRunningRecognizeAsync(config, audio);
while (!response.isDone()) {
System.out.println("Waiting for response...");
Thread.sleep(10000);
}
// Just print the first result here.
for (SpeechRecognitionResult result : response.get().getResultsList()) {
// There can be several alternative transcripts for a given chunk of speech. Just use the
// first (most likely) one here.
SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0);
// Print out the result
System.out.printf("Transcript : %s\n", alternative.getTranscript());
System.out.printf("Channel Tag : %s\n\n", result.getChannelTag());
}
}
}
use of com.google.cloud.videointelligence.v1.SpeechRecognitionAlternative in project java-speech by googleapis.
the class RecognizeBeta method transcribeDiarizationGcs.
// [END speech_transcribe_diarization_beta]
// [START speech_transcribe_diarization_gcs_beta]
/**
* Transcribe a remote audio file using speaker diarization.
*
* @param gcsUri the path to an audio file.
*/
public static void transcribeDiarizationGcs(String gcsUri) throws Exception {
try (SpeechClient speechClient = SpeechClient.create()) {
SpeakerDiarizationConfig speakerDiarizationConfig = SpeakerDiarizationConfig.newBuilder().setEnableSpeakerDiarization(true).setMinSpeakerCount(2).setMaxSpeakerCount(2).build();
// Configure request to enable Speaker diarization
RecognitionConfig config = RecognitionConfig.newBuilder().setEncoding(AudioEncoding.LINEAR16).setLanguageCode("en-US").setSampleRateHertz(8000).setDiarizationConfig(speakerDiarizationConfig).build();
// Set the remote path for the audio file
RecognitionAudio audio = RecognitionAudio.newBuilder().setUri(gcsUri).build();
// Use non-blocking call for getting file transcription
OperationFuture<LongRunningRecognizeResponse, LongRunningRecognizeMetadata> response = speechClient.longRunningRecognizeAsync(config, audio);
while (!response.isDone()) {
System.out.println("Waiting for response...");
Thread.sleep(10000);
}
// Speaker Tags are only included in the last result object, which has only one alternative.
LongRunningRecognizeResponse longRunningRecognizeResponse = response.get();
SpeechRecognitionAlternative alternative = longRunningRecognizeResponse.getResults(longRunningRecognizeResponse.getResultsCount() - 1).getAlternatives(0);
// The alternative is made up of WordInfo objects that contain the speaker_tag.
WordInfo wordInfo = alternative.getWords(0);
int currentSpeakerTag = wordInfo.getSpeakerTag();
// For each word, get all the words associated with one speaker, once the speaker changes,
// add a new line with the new speaker and their spoken words.
StringBuilder speakerWords = new StringBuilder(String.format("Speaker %d: %s", wordInfo.getSpeakerTag(), wordInfo.getWord()));
for (int i = 1; i < alternative.getWordsCount(); i++) {
wordInfo = alternative.getWords(i);
if (currentSpeakerTag == wordInfo.getSpeakerTag()) {
speakerWords.append(" ");
speakerWords.append(wordInfo.getWord());
} else {
speakerWords.append(String.format("\nSpeaker %d: %s", wordInfo.getSpeakerTag(), wordInfo.getWord()));
currentSpeakerTag = wordInfo.getSpeakerTag();
}
}
System.out.println(speakerWords.toString());
}
}
use of com.google.cloud.videointelligence.v1.SpeechRecognitionAlternative in project java-speech by googleapis.
the class RecognizeBeta method transcribeMultiLanguageGcs.
// [END speech_transcribe_multilanguage_beta]
// [START speech_transcribe_multilanguage_gcs_beta]
/**
* Transcribe a remote audio file with multi-language recognition
*
* @param gcsUri the path to the remote audio file
*/
public static void transcribeMultiLanguageGcs(String gcsUri) throws Exception {
try (SpeechClient speechClient = SpeechClient.create()) {
ArrayList<String> languageList = new ArrayList<>();
languageList.add("es-ES");
languageList.add("en-US");
// Configure request to enable multiple languages
RecognitionConfig config = RecognitionConfig.newBuilder().setEncoding(AudioEncoding.LINEAR16).setSampleRateHertz(16000).setLanguageCode("ja-JP").addAllAlternativeLanguageCodes(languageList).build();
// Set the remote path for the audio file
RecognitionAudio audio = RecognitionAudio.newBuilder().setUri(gcsUri).build();
// Use non-blocking call for getting file transcription
OperationFuture<LongRunningRecognizeResponse, LongRunningRecognizeMetadata> response = speechClient.longRunningRecognizeAsync(config, audio);
while (!response.isDone()) {
System.out.println("Waiting for response...");
Thread.sleep(10000);
}
for (SpeechRecognitionResult result : response.get().getResultsList()) {
// There can be several alternative transcripts for a given chunk of speech. Just use the
// first (most likely) one here.
SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0);
// Print out the result
System.out.printf("Transcript : %s\n\n", alternative.getTranscript());
}
}
}
Aggregations