use of com.google.cloud.videointelligence.v1.SpeechRecognitionAlternative in project java-speech by googleapis.
the class InfiniteStreamRecognize method infiniteStreamingRecognize.
/**
* Performs infinite streaming speech recognition
*/
public static void infiniteStreamingRecognize(String languageCode) throws Exception {
// Microphone Input buffering
class MicBuffer implements Runnable {
@Override
public void run() {
System.out.println(YELLOW);
System.out.println("Start speaking...Press Ctrl-C to stop");
targetDataLine.start();
byte[] data = new byte[BYTES_PER_BUFFER];
while (targetDataLine.isOpen()) {
try {
int numBytesRead = targetDataLine.read(data, 0, data.length);
if ((numBytesRead <= 0) && (targetDataLine.isOpen())) {
continue;
}
sharedQueue.put(data.clone());
} catch (InterruptedException e) {
System.out.println("Microphone input buffering interrupted : " + e.getMessage());
}
}
}
}
// Creating microphone input buffer thread
MicBuffer micrunnable = new MicBuffer();
Thread micThread = new Thread(micrunnable);
ResponseObserver<StreamingRecognizeResponse> responseObserver = null;
try (SpeechClient client = SpeechClient.create()) {
ClientStream<StreamingRecognizeRequest> clientStream;
responseObserver = new ResponseObserver<StreamingRecognizeResponse>() {
ArrayList<StreamingRecognizeResponse> responses = new ArrayList<>();
public void onStart(StreamController controller) {
referenceToStreamController = controller;
}
public void onResponse(StreamingRecognizeResponse response) {
responses.add(response);
StreamingRecognitionResult result = response.getResultsList().get(0);
Duration resultEndTime = result.getResultEndTime();
resultEndTimeInMS = (int) ((resultEndTime.getSeconds() * 1000) + (resultEndTime.getNanos() / 1000000));
double correctedTime = resultEndTimeInMS - bridgingOffset + (STREAMING_LIMIT * restartCounter);
SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0);
if (result.getIsFinal()) {
System.out.print(GREEN);
System.out.print("\033[2K\r");
System.out.printf("%s: %s [confidence: %.2f]\n", convertMillisToDate(correctedTime), alternative.getTranscript(), alternative.getConfidence());
isFinalEndTime = resultEndTimeInMS;
lastTranscriptWasFinal = true;
} else {
System.out.print(RED);
System.out.print("\033[2K\r");
System.out.printf("%s: %s", convertMillisToDate(correctedTime), alternative.getTranscript());
lastTranscriptWasFinal = false;
}
}
public void onComplete() {
}
public void onError(Throwable t) {
}
};
clientStream = client.streamingRecognizeCallable().splitCall(responseObserver);
RecognitionConfig recognitionConfig = RecognitionConfig.newBuilder().setEncoding(RecognitionConfig.AudioEncoding.LINEAR16).setLanguageCode(languageCode).setSampleRateHertz(16000).build();
StreamingRecognitionConfig streamingRecognitionConfig = StreamingRecognitionConfig.newBuilder().setConfig(recognitionConfig).setInterimResults(true).build();
StreamingRecognizeRequest request = StreamingRecognizeRequest.newBuilder().setStreamingConfig(streamingRecognitionConfig).build();
clientStream.send(request);
try {
// SampleRate:16000Hz, SampleSizeInBits: 16, Number of channels: 1, Signed: true,
// bigEndian: false
AudioFormat audioFormat = new AudioFormat(16000, 16, 1, true, false);
DataLine.Info targetInfo = new Info(TargetDataLine.class, // Set the system information to read from the microphone audio
audioFormat);
if (!AudioSystem.isLineSupported(targetInfo)) {
System.out.println("Microphone not supported");
System.exit(0);
}
// Target data line captures the audio stream the microphone produces.
targetDataLine = (TargetDataLine) AudioSystem.getLine(targetInfo);
targetDataLine.open(audioFormat);
micThread.start();
long startTime = System.currentTimeMillis();
while (true) {
long estimatedTime = System.currentTimeMillis() - startTime;
if (estimatedTime >= STREAMING_LIMIT) {
clientStream.closeSend();
// remove Observer
referenceToStreamController.cancel();
if (resultEndTimeInMS > 0) {
finalRequestEndTime = isFinalEndTime;
}
resultEndTimeInMS = 0;
lastAudioInput = null;
lastAudioInput = audioInput;
audioInput = new ArrayList<ByteString>();
restartCounter++;
if (!lastTranscriptWasFinal) {
System.out.print('\n');
}
newStream = true;
clientStream = client.streamingRecognizeCallable().splitCall(responseObserver);
request = StreamingRecognizeRequest.newBuilder().setStreamingConfig(streamingRecognitionConfig).build();
System.out.println(YELLOW);
System.out.printf("%d: RESTARTING REQUEST\n", restartCounter * STREAMING_LIMIT);
startTime = System.currentTimeMillis();
} else {
if ((newStream) && (lastAudioInput.size() > 0)) {
// if this is the first audio from a new request
// calculate amount of unfinalized audio from last request
// resend the audio to the speech client before incoming audio
double chunkTime = STREAMING_LIMIT / lastAudioInput.size();
// ms length of each chunk in previous request audio arrayList
if (chunkTime != 0) {
if (bridgingOffset < 0) {
// bridging Offset accounts for time of resent audio
// calculated from last request
bridgingOffset = 0;
}
if (bridgingOffset > finalRequestEndTime) {
bridgingOffset = finalRequestEndTime;
}
int chunksFromMs = (int) Math.floor((finalRequestEndTime - bridgingOffset) / chunkTime);
// chunks from MS is number of chunks to resend
bridgingOffset = (int) Math.floor((lastAudioInput.size() - chunksFromMs) * chunkTime);
// set bridging offset for next request
for (int i = chunksFromMs; i < lastAudioInput.size(); i++) {
request = StreamingRecognizeRequest.newBuilder().setAudioContent(lastAudioInput.get(i)).build();
clientStream.send(request);
}
}
newStream = false;
}
tempByteString = ByteString.copyFrom(sharedQueue.take());
request = StreamingRecognizeRequest.newBuilder().setAudioContent(tempByteString).build();
audioInput.add(tempByteString);
}
clientStream.send(request);
}
} catch (Exception e) {
System.out.println(e);
}
}
}
use of com.google.cloud.videointelligence.v1.SpeechRecognitionAlternative in project java-speech by googleapis.
the class Recognize method syncRecognizeWords.
// [END speech_transcribe_sync]
/**
* Performs sync recognize and prints word time offsets.
*
* @param fileName the path to a PCM audio file to transcribe get offsets on.
*/
public static void syncRecognizeWords(String fileName) throws Exception {
try (SpeechClient speech = SpeechClient.create()) {
Path path = Paths.get(fileName);
byte[] data = Files.readAllBytes(path);
ByteString audioBytes = ByteString.copyFrom(data);
// Configure request with local raw PCM audio
RecognitionConfig config = RecognitionConfig.newBuilder().setEncoding(AudioEncoding.LINEAR16).setLanguageCode("en-US").setSampleRateHertz(16000).setEnableWordTimeOffsets(true).build();
RecognitionAudio audio = RecognitionAudio.newBuilder().setContent(audioBytes).build();
// Use blocking call to get audio transcript
RecognizeResponse response = speech.recognize(config, audio);
List<SpeechRecognitionResult> results = response.getResultsList();
for (SpeechRecognitionResult result : results) {
// There can be several alternative transcripts for a given chunk of speech. Just use the
// first (most likely) one here.
SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0);
System.out.printf("Transcription: %s%n", alternative.getTranscript());
for (WordInfo wordInfo : alternative.getWordsList()) {
System.out.println(wordInfo.getWord());
System.out.printf("\t%s.%s sec - %s.%s sec\n", wordInfo.getStartTime().getSeconds(), wordInfo.getStartTime().getNanos() / 100000000, wordInfo.getEndTime().getSeconds(), wordInfo.getEndTime().getNanos() / 100000000);
}
}
}
}
use of com.google.cloud.videointelligence.v1.SpeechRecognitionAlternative in project java-speech by googleapis.
the class Recognize method asyncRecognizeGcs.
// [END speech_transcribe_async_word_time_offsets_gcs]
// [START speech_transcribe_async_gcs]
/**
* Performs non-blocking speech recognition on remote FLAC file and prints the transcription.
*
* @param gcsUri the path to the remote LINEAR16 audio file to transcribe.
*/
public static void asyncRecognizeGcs(String gcsUri) throws Exception {
// Configure polling algorithm
SpeechSettings.Builder speechSettings = SpeechSettings.newBuilder();
TimedRetryAlgorithm timedRetryAlgorithm = OperationTimedPollAlgorithm.create(RetrySettings.newBuilder().setInitialRetryDelay(Duration.ofMillis(500L)).setRetryDelayMultiplier(1.5).setMaxRetryDelay(Duration.ofMillis(5000L)).setInitialRpcTimeout(// ignored
Duration.ZERO).setRpcTimeoutMultiplier(// ignored
1.0).setMaxRpcTimeout(// ignored
Duration.ZERO).setTotalTimeout(// set polling timeout to 24 hours
Duration.ofHours(24L)).build());
speechSettings.longRunningRecognizeOperationSettings().setPollingAlgorithm(timedRetryAlgorithm);
// Instantiates a client with GOOGLE_APPLICATION_CREDENTIALS
try (SpeechClient speech = SpeechClient.create(speechSettings.build())) {
// Configure remote file request for FLAC
RecognitionConfig config = RecognitionConfig.newBuilder().setEncoding(AudioEncoding.FLAC).setLanguageCode("en-US").setSampleRateHertz(16000).build();
RecognitionAudio audio = RecognitionAudio.newBuilder().setUri(gcsUri).build();
// Use non-blocking call for getting file transcription
OperationFuture<LongRunningRecognizeResponse, LongRunningRecognizeMetadata> response = speech.longRunningRecognizeAsync(config, audio);
while (!response.isDone()) {
System.out.println("Waiting for response...");
Thread.sleep(10000);
}
List<SpeechRecognitionResult> results = response.get().getResultsList();
for (SpeechRecognitionResult result : results) {
// There can be several alternative transcripts for a given chunk of speech. Just use the
// first (most likely) one here.
SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0);
System.out.printf("Transcription: %s\n", alternative.getTranscript());
}
}
}
use of com.google.cloud.videointelligence.v1.SpeechRecognitionAlternative in project java-speech by googleapis.
the class Recognize method transcribeFileWithAutomaticPunctuation.
// [END speech_transcribe_streaming]
// [START speech_sync_recognize_punctuation]
/**
* Performs transcription with automatic punctuation on raw PCM audio data.
*
* @param fileName the path to a PCM audio file to transcribe.
*/
public static void transcribeFileWithAutomaticPunctuation(String fileName) throws Exception {
Path path = Paths.get(fileName);
byte[] content = Files.readAllBytes(path);
try (SpeechClient speechClient = SpeechClient.create()) {
// Configure request with local raw PCM audio
RecognitionConfig recConfig = RecognitionConfig.newBuilder().setEncoding(AudioEncoding.LINEAR16).setLanguageCode("en-US").setSampleRateHertz(16000).setEnableAutomaticPunctuation(true).build();
// Get the contents of the local audio file
RecognitionAudio recognitionAudio = RecognitionAudio.newBuilder().setContent(ByteString.copyFrom(content)).build();
// Perform the transcription request
RecognizeResponse recognizeResponse = speechClient.recognize(recConfig, recognitionAudio);
// Just print the first result here.
SpeechRecognitionResult result = recognizeResponse.getResultsList().get(0);
// There can be several alternative transcripts for a given chunk of speech. Just use the
// first (most likely) one here.
SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0);
// Print out the result
System.out.printf("Transcript : %s\n", alternative.getTranscript());
}
}
use of com.google.cloud.videointelligence.v1.SpeechRecognitionAlternative in project java-speech by googleapis.
the class Recognize method transcribeModelSelectionGcs.
// [END speech_transcribe_model_selection]
// [START speech_transcribe_model_selection_gcs]
/**
* Performs transcription of the remote audio file asynchronously with the selected model.
*
* @param gcsUri the path to the remote audio file to transcribe.
*/
public static void transcribeModelSelectionGcs(String gcsUri) throws Exception {
try (SpeechClient speech = SpeechClient.create()) {
// Configure request with video media type
RecognitionConfig config = RecognitionConfig.newBuilder().setEncoding(AudioEncoding.LINEAR16).setLanguageCode("en-US").setSampleRateHertz(16000).setModel("video").build();
RecognitionAudio audio = RecognitionAudio.newBuilder().setUri(gcsUri).build();
// Use non-blocking call for getting file transcription
OperationFuture<LongRunningRecognizeResponse, LongRunningRecognizeMetadata> response = speech.longRunningRecognizeAsync(config, audio);
while (!response.isDone()) {
System.out.println("Waiting for response...");
Thread.sleep(10000);
}
List<SpeechRecognitionResult> results = response.get().getResultsList();
// Just print the first result here.
SpeechRecognitionResult result = results.get(0);
// There can be several alternative transcripts for a given chunk of speech. Just use the
// first (most likely) one here.
SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0);
System.out.printf("Transcript : %s\n", alternative.getTranscript());
}
}
Aggregations