use of com.google.cloud.speech.v1.SpeakerDiarizationConfig in project java-speech by googleapis.
the class RecognizeBeta method transcribeDiarizationGcs.
// [END speech_transcribe_diarization_beta]
// [START speech_transcribe_diarization_gcs_beta]
/**
* Transcribe a remote audio file using speaker diarization.
*
* @param gcsUri the path to an audio file.
*/
public static void transcribeDiarizationGcs(String gcsUri) throws Exception {
try (SpeechClient speechClient = SpeechClient.create()) {
SpeakerDiarizationConfig speakerDiarizationConfig = SpeakerDiarizationConfig.newBuilder().setEnableSpeakerDiarization(true).setMinSpeakerCount(2).setMaxSpeakerCount(2).build();
// Configure request to enable Speaker diarization
RecognitionConfig config = RecognitionConfig.newBuilder().setEncoding(AudioEncoding.LINEAR16).setLanguageCode("en-US").setSampleRateHertz(8000).setDiarizationConfig(speakerDiarizationConfig).build();
// Set the remote path for the audio file
RecognitionAudio audio = RecognitionAudio.newBuilder().setUri(gcsUri).build();
// Use non-blocking call for getting file transcription
OperationFuture<LongRunningRecognizeResponse, LongRunningRecognizeMetadata> response = speechClient.longRunningRecognizeAsync(config, audio);
while (!response.isDone()) {
System.out.println("Waiting for response...");
Thread.sleep(10000);
}
// Speaker Tags are only included in the last result object, which has only one alternative.
LongRunningRecognizeResponse longRunningRecognizeResponse = response.get();
SpeechRecognitionAlternative alternative = longRunningRecognizeResponse.getResults(longRunningRecognizeResponse.getResultsCount() - 1).getAlternatives(0);
// The alternative is made up of WordInfo objects that contain the speaker_tag.
WordInfo wordInfo = alternative.getWords(0);
int currentSpeakerTag = wordInfo.getSpeakerTag();
// For each word, get all the words associated with one speaker, once the speaker changes,
// add a new line with the new speaker and their spoken words.
StringBuilder speakerWords = new StringBuilder(String.format("Speaker %d: %s", wordInfo.getSpeakerTag(), wordInfo.getWord()));
for (int i = 1; i < alternative.getWordsCount(); i++) {
wordInfo = alternative.getWords(i);
if (currentSpeakerTag == wordInfo.getSpeakerTag()) {
speakerWords.append(" ");
speakerWords.append(wordInfo.getWord());
} else {
speakerWords.append(String.format("\nSpeaker %d: %s", wordInfo.getSpeakerTag(), wordInfo.getWord()));
currentSpeakerTag = wordInfo.getSpeakerTag();
}
}
System.out.println(speakerWords.toString());
}
}
use of com.google.cloud.speech.v1.SpeakerDiarizationConfig in project java-speech by googleapis.
the class TranscribeDiarization method transcribeDiarization.
// Transcribe the given audio file using speaker diarization.
static void transcribeDiarization(String fileName) throws IOException {
Path path = Paths.get(fileName);
byte[] content = Files.readAllBytes(path);
// the "close" method on the client to safely clean up any remaining background resources.
try (SpeechClient client = SpeechClient.create()) {
// Get the contents of the local audio file
RecognitionAudio recognitionAudio = RecognitionAudio.newBuilder().setContent(ByteString.copyFrom(content)).build();
SpeakerDiarizationConfig speakerDiarizationConfig = SpeakerDiarizationConfig.newBuilder().setEnableSpeakerDiarization(true).setMinSpeakerCount(2).setMaxSpeakerCount(2).build();
// Configure request to enable Speaker diarization
RecognitionConfig config = RecognitionConfig.newBuilder().setEncoding(RecognitionConfig.AudioEncoding.LINEAR16).setLanguageCode("en-US").setSampleRateHertz(8000).setDiarizationConfig(speakerDiarizationConfig).build();
// Perform the transcription request
RecognizeResponse recognizeResponse = client.recognize(config, recognitionAudio);
// Speaker Tags are only included in the last result object, which has only one alternative.
SpeechRecognitionAlternative alternative = recognizeResponse.getResults(recognizeResponse.getResultsCount() - 1).getAlternatives(0);
// The alternative is made up of WordInfo objects that contain the speaker_tag.
WordInfo wordInfo = alternative.getWords(0);
int currentSpeakerTag = wordInfo.getSpeakerTag();
// For each word, get all the words associated with one speaker, once the speaker changes,
// add a new line with the new speaker and their spoken words.
StringBuilder speakerWords = new StringBuilder(String.format("Speaker %d: %s", wordInfo.getSpeakerTag(), wordInfo.getWord()));
for (int i = 1; i < alternative.getWordsCount(); i++) {
wordInfo = alternative.getWords(i);
if (currentSpeakerTag == wordInfo.getSpeakerTag()) {
speakerWords.append(" ");
speakerWords.append(wordInfo.getWord());
} else {
speakerWords.append(String.format("\nSpeaker %d: %s", wordInfo.getSpeakerTag(), wordInfo.getWord()));
currentSpeakerTag = wordInfo.getSpeakerTag();
}
}
System.out.println(speakerWords.toString());
}
}
use of com.google.cloud.speech.v1.SpeakerDiarizationConfig in project java-speech by googleapis.
the class TranscribeDiarizationGcs method transcribeDiarizationGcs.
// Transcribe the give gcs file using speaker diarization
public static void transcribeDiarizationGcs(String gcsUri) throws IOException, ExecutionException, InterruptedException {
// the "close" method on the client to safely clean up any remaining background resources.
try (SpeechClient speechClient = SpeechClient.create()) {
SpeakerDiarizationConfig speakerDiarizationConfig = SpeakerDiarizationConfig.newBuilder().setEnableSpeakerDiarization(true).setMinSpeakerCount(2).setMaxSpeakerCount(2).build();
// Configure request to enable Speaker diarization
RecognitionConfig config = RecognitionConfig.newBuilder().setEncoding(RecognitionConfig.AudioEncoding.LINEAR16).setLanguageCode("en-US").setSampleRateHertz(8000).setDiarizationConfig(speakerDiarizationConfig).build();
// Set the remote path for the audio file
RecognitionAudio audio = RecognitionAudio.newBuilder().setUri(gcsUri).build();
// Use non-blocking call for getting file transcription
OperationFuture<LongRunningRecognizeResponse, LongRunningRecognizeMetadata> future = speechClient.longRunningRecognizeAsync(config, audio);
System.out.println("Waiting for response...");
// Speaker Tags are only included in the last result object, which has only one alternative.
LongRunningRecognizeResponse response = future.get();
SpeechRecognitionAlternative alternative = response.getResults(response.getResultsCount() - 1).getAlternatives(0);
// The alternative is made up of WordInfo objects that contain the speaker_tag.
WordInfo wordInfo = alternative.getWords(0);
int currentSpeakerTag = wordInfo.getSpeakerTag();
// For each word, get all the words associated with one speaker, once the speaker changes,
// add a new line with the new speaker and their spoken words.
StringBuilder speakerWords = new StringBuilder(String.format("Speaker %d: %s", wordInfo.getSpeakerTag(), wordInfo.getWord()));
for (int i = 1; i < alternative.getWordsCount(); i++) {
wordInfo = alternative.getWords(i);
if (currentSpeakerTag == wordInfo.getSpeakerTag()) {
speakerWords.append(" ");
speakerWords.append(wordInfo.getWord());
} else {
speakerWords.append(String.format("\nSpeaker %d: %s", wordInfo.getSpeakerTag(), wordInfo.getWord()));
currentSpeakerTag = wordInfo.getSpeakerTag();
}
}
System.out.println(speakerWords.toString());
}
}
use of com.google.cloud.speech.v1.SpeakerDiarizationConfig in project java-speech by googleapis.
the class RecognizeBeta method transcribeDiarization.
// [END speech_transcribe_recognition_metadata_beta]
// [START speech_transcribe_diarization_beta]
/**
* Transcribe the given audio file using speaker diarization.
*
* @param fileName the path to an audio file.
*/
public static void transcribeDiarization(String fileName) throws Exception {
Path path = Paths.get(fileName);
byte[] content = Files.readAllBytes(path);
try (SpeechClient speechClient = SpeechClient.create()) {
// Get the contents of the local audio file
RecognitionAudio recognitionAudio = RecognitionAudio.newBuilder().setContent(ByteString.copyFrom(content)).build();
SpeakerDiarizationConfig speakerDiarizationConfig = SpeakerDiarizationConfig.newBuilder().setEnableSpeakerDiarization(true).setMinSpeakerCount(2).setMaxSpeakerCount(2).build();
// Configure request to enable Speaker diarization
RecognitionConfig config = RecognitionConfig.newBuilder().setEncoding(AudioEncoding.LINEAR16).setLanguageCode("en-US").setSampleRateHertz(8000).setDiarizationConfig(speakerDiarizationConfig).build();
// Perform the transcription request
RecognizeResponse recognizeResponse = speechClient.recognize(config, recognitionAudio);
// Speaker Tags are only included in the last result object, which has only one alternative.
SpeechRecognitionAlternative alternative = recognizeResponse.getResults(recognizeResponse.getResultsCount() - 1).getAlternatives(0);
// The alternative is made up of WordInfo objects that contain the speaker_tag.
WordInfo wordInfo = alternative.getWords(0);
int currentSpeakerTag = wordInfo.getSpeakerTag();
// For each word, get all the words associated with one speaker, once the speaker changes,
// add a new line with the new speaker and their spoken words.
StringBuilder speakerWords = new StringBuilder(String.format("Speaker %d: %s", wordInfo.getSpeakerTag(), wordInfo.getWord()));
for (int i = 1; i < alternative.getWordsCount(); i++) {
wordInfo = alternative.getWords(i);
if (currentSpeakerTag == wordInfo.getSpeakerTag()) {
speakerWords.append(" ");
speakerWords.append(wordInfo.getWord());
} else {
speakerWords.append(String.format("\nSpeaker %d: %s", wordInfo.getSpeakerTag(), wordInfo.getWord()));
currentSpeakerTag = wordInfo.getSpeakerTag();
}
}
System.out.println(speakerWords.toString());
}
}
Aggregations