use of com.google.cloud.speech.v1.StreamingRecognitionResult in project java-speech by googleapis.
the class InfiniteStreamRecognize method infiniteStreamingRecognize.
/**
* Performs infinite streaming speech recognition
*/
public static void infiniteStreamingRecognize(String languageCode) throws Exception {
// Microphone Input buffering
class MicBuffer implements Runnable {
@Override
public void run() {
System.out.println(YELLOW);
System.out.println("Start speaking...Press Ctrl-C to stop");
targetDataLine.start();
byte[] data = new byte[BYTES_PER_BUFFER];
while (targetDataLine.isOpen()) {
try {
int numBytesRead = targetDataLine.read(data, 0, data.length);
if ((numBytesRead <= 0) && (targetDataLine.isOpen())) {
continue;
}
sharedQueue.put(data.clone());
} catch (InterruptedException e) {
System.out.println("Microphone input buffering interrupted : " + e.getMessage());
}
}
}
}
// Creating microphone input buffer thread
MicBuffer micrunnable = new MicBuffer();
Thread micThread = new Thread(micrunnable);
ResponseObserver<StreamingRecognizeResponse> responseObserver = null;
try (SpeechClient client = SpeechClient.create()) {
ClientStream<StreamingRecognizeRequest> clientStream;
responseObserver = new ResponseObserver<StreamingRecognizeResponse>() {
ArrayList<StreamingRecognizeResponse> responses = new ArrayList<>();
public void onStart(StreamController controller) {
referenceToStreamController = controller;
}
public void onResponse(StreamingRecognizeResponse response) {
responses.add(response);
StreamingRecognitionResult result = response.getResultsList().get(0);
Duration resultEndTime = result.getResultEndTime();
resultEndTimeInMS = (int) ((resultEndTime.getSeconds() * 1000) + (resultEndTime.getNanos() / 1000000));
double correctedTime = resultEndTimeInMS - bridgingOffset + (STREAMING_LIMIT * restartCounter);
SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0);
if (result.getIsFinal()) {
System.out.print(GREEN);
System.out.print("\033[2K\r");
System.out.printf("%s: %s [confidence: %.2f]\n", convertMillisToDate(correctedTime), alternative.getTranscript(), alternative.getConfidence());
isFinalEndTime = resultEndTimeInMS;
lastTranscriptWasFinal = true;
} else {
System.out.print(RED);
System.out.print("\033[2K\r");
System.out.printf("%s: %s", convertMillisToDate(correctedTime), alternative.getTranscript());
lastTranscriptWasFinal = false;
}
}
public void onComplete() {
}
public void onError(Throwable t) {
}
};
clientStream = client.streamingRecognizeCallable().splitCall(responseObserver);
RecognitionConfig recognitionConfig = RecognitionConfig.newBuilder().setEncoding(RecognitionConfig.AudioEncoding.LINEAR16).setLanguageCode(languageCode).setSampleRateHertz(16000).build();
StreamingRecognitionConfig streamingRecognitionConfig = StreamingRecognitionConfig.newBuilder().setConfig(recognitionConfig).setInterimResults(true).build();
StreamingRecognizeRequest request = StreamingRecognizeRequest.newBuilder().setStreamingConfig(streamingRecognitionConfig).build();
clientStream.send(request);
try {
// SampleRate:16000Hz, SampleSizeInBits: 16, Number of channels: 1, Signed: true,
// bigEndian: false
AudioFormat audioFormat = new AudioFormat(16000, 16, 1, true, false);
DataLine.Info targetInfo = new Info(TargetDataLine.class, // Set the system information to read from the microphone audio
audioFormat);
if (!AudioSystem.isLineSupported(targetInfo)) {
System.out.println("Microphone not supported");
System.exit(0);
}
// Target data line captures the audio stream the microphone produces.
targetDataLine = (TargetDataLine) AudioSystem.getLine(targetInfo);
targetDataLine.open(audioFormat);
micThread.start();
long startTime = System.currentTimeMillis();
while (true) {
long estimatedTime = System.currentTimeMillis() - startTime;
if (estimatedTime >= STREAMING_LIMIT) {
clientStream.closeSend();
// remove Observer
referenceToStreamController.cancel();
if (resultEndTimeInMS > 0) {
finalRequestEndTime = isFinalEndTime;
}
resultEndTimeInMS = 0;
lastAudioInput = null;
lastAudioInput = audioInput;
audioInput = new ArrayList<ByteString>();
restartCounter++;
if (!lastTranscriptWasFinal) {
System.out.print('\n');
}
newStream = true;
clientStream = client.streamingRecognizeCallable().splitCall(responseObserver);
request = StreamingRecognizeRequest.newBuilder().setStreamingConfig(streamingRecognitionConfig).build();
System.out.println(YELLOW);
System.out.printf("%d: RESTARTING REQUEST\n", restartCounter * STREAMING_LIMIT);
startTime = System.currentTimeMillis();
} else {
if ((newStream) && (lastAudioInput.size() > 0)) {
// if this is the first audio from a new request
// calculate amount of unfinalized audio from last request
// resend the audio to the speech client before incoming audio
double chunkTime = STREAMING_LIMIT / lastAudioInput.size();
// ms length of each chunk in previous request audio arrayList
if (chunkTime != 0) {
if (bridgingOffset < 0) {
// bridging Offset accounts for time of resent audio
// calculated from last request
bridgingOffset = 0;
}
if (bridgingOffset > finalRequestEndTime) {
bridgingOffset = finalRequestEndTime;
}
int chunksFromMs = (int) Math.floor((finalRequestEndTime - bridgingOffset) / chunkTime);
// chunks from MS is number of chunks to resend
bridgingOffset = (int) Math.floor((lastAudioInput.size() - chunksFromMs) * chunkTime);
// set bridging offset for next request
for (int i = chunksFromMs; i < lastAudioInput.size(); i++) {
request = StreamingRecognizeRequest.newBuilder().setAudioContent(lastAudioInput.get(i)).build();
clientStream.send(request);
}
}
newStream = false;
}
tempByteString = ByteString.copyFrom(sharedQueue.take());
request = StreamingRecognizeRequest.newBuilder().setAudioContent(tempByteString).build();
audioInput.add(tempByteString);
}
clientStream.send(request);
}
} catch (Exception e) {
System.out.println(e);
}
}
}
use of com.google.cloud.speech.v1.StreamingRecognitionResult in project java-speech by googleapis.
the class Recognize method streamingMicRecognize.
// [END speech_stream_recognize_punctuation]
// [START speech_transcribe_streaming_mic]
/**
* Performs microphone streaming speech recognition with a duration of 1 minute.
*/
public static void streamingMicRecognize() throws Exception {
ResponseObserver<StreamingRecognizeResponse> responseObserver = null;
try (SpeechClient client = SpeechClient.create()) {
responseObserver = new ResponseObserver<StreamingRecognizeResponse>() {
ArrayList<StreamingRecognizeResponse> responses = new ArrayList<>();
public void onStart(StreamController controller) {
}
public void onResponse(StreamingRecognizeResponse response) {
responses.add(response);
}
public void onComplete() {
for (StreamingRecognizeResponse response : responses) {
StreamingRecognitionResult result = response.getResultsList().get(0);
SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0);
System.out.printf("Transcript : %s\n", alternative.getTranscript());
}
}
public void onError(Throwable t) {
System.out.println(t);
}
};
ClientStream<StreamingRecognizeRequest> clientStream = client.streamingRecognizeCallable().splitCall(responseObserver);
RecognitionConfig recognitionConfig = RecognitionConfig.newBuilder().setEncoding(RecognitionConfig.AudioEncoding.LINEAR16).setLanguageCode("en-US").setSampleRateHertz(16000).build();
StreamingRecognitionConfig streamingRecognitionConfig = StreamingRecognitionConfig.newBuilder().setConfig(recognitionConfig).build();
StreamingRecognizeRequest request = StreamingRecognizeRequest.newBuilder().setStreamingConfig(streamingRecognitionConfig).build();
clientStream.send(request);
// SampleRate:16000Hz, SampleSizeInBits: 16, Number of channels: 1, Signed: true,
// bigEndian: false
AudioFormat audioFormat = new AudioFormat(16000, 16, 1, true, false);
DataLine.Info targetInfo = new Info(TargetDataLine.class, // Set the system information to read from the microphone audio stream
audioFormat);
if (!AudioSystem.isLineSupported(targetInfo)) {
System.out.println("Microphone not supported");
System.exit(0);
}
// Target data line captures the audio stream the microphone produces.
TargetDataLine targetDataLine = (TargetDataLine) AudioSystem.getLine(targetInfo);
targetDataLine.open(audioFormat);
targetDataLine.start();
System.out.println("Start speaking");
long startTime = System.currentTimeMillis();
// Audio Input Stream
AudioInputStream audio = new AudioInputStream(targetDataLine);
while (true) {
long estimatedTime = System.currentTimeMillis() - startTime;
byte[] data = new byte[6400];
audio.read(data);
if (estimatedTime > 60000) {
// 60 seconds
System.out.println("Stop speaking.");
targetDataLine.stop();
targetDataLine.close();
break;
}
request = StreamingRecognizeRequest.newBuilder().setAudioContent(ByteString.copyFrom(data)).build();
clientStream.send(request);
}
} catch (Exception e) {
System.out.println(e);
}
responseObserver.onComplete();
}
use of com.google.cloud.speech.v1.StreamingRecognitionResult in project java-speech by googleapis.
the class Recognize method streamingRecognizeFile.
// [END speech_transcribe_async_gcs]
// [START speech_transcribe_streaming]
/**
* Performs streaming speech recognition on raw PCM audio data.
*
* @param fileName the path to a PCM audio file to transcribe.
*/
public static void streamingRecognizeFile(String fileName) throws Exception, IOException {
Path path = Paths.get(fileName);
byte[] data = Files.readAllBytes(path);
// Instantiates a client with GOOGLE_APPLICATION_CREDENTIALS
try (SpeechClient speech = SpeechClient.create()) {
// Configure request with local raw PCM audio
RecognitionConfig recConfig = RecognitionConfig.newBuilder().setEncoding(AudioEncoding.LINEAR16).setLanguageCode("en-US").setSampleRateHertz(16000).setModel("default").build();
StreamingRecognitionConfig config = StreamingRecognitionConfig.newBuilder().setConfig(recConfig).build();
class ResponseApiStreamingObserver<T> implements ApiStreamObserver<T> {
private final SettableFuture<List<T>> future = SettableFuture.create();
private final List<T> messages = new java.util.ArrayList<T>();
@Override
public void onNext(T message) {
messages.add(message);
}
@Override
public void onError(Throwable t) {
future.setException(t);
}
@Override
public void onCompleted() {
future.set(messages);
}
// Returns the SettableFuture object to get received messages / exceptions.
public SettableFuture<List<T>> future() {
return future;
}
}
ResponseApiStreamingObserver<StreamingRecognizeResponse> responseObserver = new ResponseApiStreamingObserver<>();
BidiStreamingCallable<StreamingRecognizeRequest, StreamingRecognizeResponse> callable = speech.streamingRecognizeCallable();
ApiStreamObserver<StreamingRecognizeRequest> requestObserver = callable.bidiStreamingCall(responseObserver);
// The first request must **only** contain the audio configuration:
requestObserver.onNext(StreamingRecognizeRequest.newBuilder().setStreamingConfig(config).build());
// Subsequent requests must **only** contain the audio data.
requestObserver.onNext(StreamingRecognizeRequest.newBuilder().setAudioContent(ByteString.copyFrom(data)).build());
// Mark transmission as completed after sending the data.
requestObserver.onCompleted();
List<StreamingRecognizeResponse> responses = responseObserver.future().get();
for (StreamingRecognizeResponse response : responses) {
// For streaming recognize, the results list has one is_final result (if available) followed
// by a number of in-progress results (if iterim_results is true) for subsequent utterances.
// Just print the first result here.
StreamingRecognitionResult result = response.getResultsList().get(0);
// There can be several alternative transcripts for a given chunk of speech. Just use the
// first (most likely) one here.
SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0);
System.out.printf("Transcript : %s\n", alternative.getTranscript());
}
}
}
use of com.google.cloud.speech.v1.StreamingRecognitionResult in project Saiy-PS by brandall76.
the class RecognitionGoogleCloud method onNext.
/**
* Receives a value from the stream.
* <p>
* <p>Can be called many times but is never called after {@link #onError(Throwable)} or {@link
* #onCompleted()} are called.
* <p>
* <p>Unary calls must invoke onNext at most once. Clients may invoke onNext at most once for
* server streaming calls, but may receive many onNext callbacks. Servers may invoke onNext at
* most once for client streaming calls, but may receive many onNext callbacks.
* <p>
* <p>If an exception is thrown by an implementation the caller is expected to terminate the
* stream by calling {@link #onError(Throwable)} with the caught exception prior to
* propagating it.
*
* @param value the value passed to the stream
*/
@Override
public void onNext(final StreamingRecognizeResponse value) {
if (DEBUG) {
MyLog.i(CLS_NAME, "onNext: " + TextFormat.printToString(value));
}
final StreamingRecognizeResponse.EndpointerType endpointerType = value.getEndpointerType();
switch(endpointerType) {
case START_OF_SPEECH:
if (DEBUG) {
MyLog.i(CLS_NAME, "onNext: START_OF_SPEECH");
}
if (doBeginning.get()) {
doBeginning.set(false);
listener.onBeginningOfSpeech();
}
break;
case END_OF_SPEECH:
if (DEBUG) {
MyLog.i(CLS_NAME, "onNext: END_OF_SPEECH");
}
if (doEnd.get()) {
doEnd.set(false);
stopListening();
}
break;
case END_OF_AUDIO:
if (DEBUG) {
MyLog.i(CLS_NAME, "onNext: END_OF_AUDIO");
}
if (doEnd.get()) {
doEnd.set(false);
stopListening();
}
break;
case END_OF_UTTERANCE:
if (DEBUG) {
MyLog.i(CLS_NAME, "onNext: END_OF_UTTERANCE");
}
if (doEnd.get()) {
doEnd.set(false);
stopListening();
}
break;
case UNRECOGNIZED:
if (DEBUG) {
MyLog.i(CLS_NAME, "onNext: UNRECOGNIZED");
}
break;
case ENDPOINTER_EVENT_UNSPECIFIED:
default:
if (DEBUG) {
MyLog.i(CLS_NAME, "onNext: ENDPOINTER_EVENT_UNSPECIFIED");
}
break;
}
if (doResults.get()) {
if (UtilsList.notNaked(value.getResultsList())) {
partialArray.clear();
resultsArray.clear();
confidenceArray.clear();
bundle.clear();
boolean isFinal = false;
for (final StreamingRecognitionResult recognitionResult : value.getResultsList()) {
if (DEBUG) {
MyLog.i(CLS_NAME, "recognitionResult stability: " + recognitionResult.getStability());
}
isFinal = recognitionResult.getIsFinal();
if (DEBUG) {
MyLog.i(CLS_NAME, "isFinal: " + isFinal);
}
for (final SpeechRecognitionAlternative alternative : recognitionResult.getAlternativesList()) {
if (DEBUG) {
MyLog.i(CLS_NAME, "alternative: " + alternative.getTranscript());
}
if (isFinal) {
resultsArray.add(alternative.getTranscript());
confidenceArray.add(alternative.getConfidence());
} else {
if (partialArray.isEmpty()) {
partialArray.add(alternative.getTranscript());
} else {
partialArray.add(partialArray.get(0) + " " + alternative.getTranscript());
}
}
}
}
doResults.set(!isFinal);
if (isFinal) {
bundle.putStringArrayList(SpeechRecognizer.RESULTS_RECOGNITION, resultsArray);
bundle.putFloatArray(SpeechRecognizer.CONFIDENCE_SCORES, ArrayUtils.toPrimitive(confidenceArray.toArray(new Float[0]), 0.0F));
listener.onResults(bundle);
stopListening();
} else {
bundle.putStringArrayList(SpeechRecognizer.RESULTS_RECOGNITION, partialArray);
listener.onPartialResults(bundle);
}
} else {
if (DEBUG) {
MyLog.i(CLS_NAME, "onNext: results list naked");
}
}
} else {
if (DEBUG) {
MyLog.i(CLS_NAME, "onNext: doResults false");
}
}
}
use of com.google.cloud.speech.v1.StreamingRecognitionResult in project java-docs-samples by GoogleCloudPlatform.
the class Recognize method streamingRecognizeFile.
/**
* Performs streaming speech recognition on raw PCM audio data.
*
* @param fileName the path to a PCM audio file to transcribe.
*/
public static void streamingRecognizeFile(String fileName) throws Exception, IOException {
Path path = Paths.get(fileName);
byte[] data = Files.readAllBytes(path);
// Instantiates a client with GOOGLE_APPLICATION_CREDENTIALS
try (SpeechClient speech = SpeechClient.create()) {
// Configure request with local raw PCM audio
RecognitionConfig recConfig = RecognitionConfig.newBuilder().setEncoding(AudioEncoding.LINEAR16).setLanguageCode("en-US").setSampleRateHertz(16000).setModel("default").build();
StreamingRecognitionConfig config = StreamingRecognitionConfig.newBuilder().setConfig(recConfig).build();
class ResponseApiStreamingObserver<T> implements ApiStreamObserver<T> {
private final SettableFuture<List<T>> future = SettableFuture.create();
private final List<T> messages = new java.util.ArrayList<T>();
@Override
public void onNext(T message) {
messages.add(message);
}
@Override
public void onError(Throwable t) {
future.setException(t);
}
@Override
public void onCompleted() {
future.set(messages);
}
// Returns the SettableFuture object to get received messages / exceptions.
public SettableFuture<List<T>> future() {
return future;
}
}
ResponseApiStreamingObserver<StreamingRecognizeResponse> responseObserver = new ResponseApiStreamingObserver<>();
BidiStreamingCallable<StreamingRecognizeRequest, StreamingRecognizeResponse> callable = speech.streamingRecognizeCallable();
ApiStreamObserver<StreamingRecognizeRequest> requestObserver = callable.bidiStreamingCall(responseObserver);
// The first request must **only** contain the audio configuration:
requestObserver.onNext(StreamingRecognizeRequest.newBuilder().setStreamingConfig(config).build());
// Subsequent requests must **only** contain the audio data.
requestObserver.onNext(StreamingRecognizeRequest.newBuilder().setAudioContent(ByteString.copyFrom(data)).build());
// Mark transmission as completed after sending the data.
requestObserver.onCompleted();
List<StreamingRecognizeResponse> responses = responseObserver.future().get();
for (StreamingRecognizeResponse response : responses) {
// For streaming recognize, the results list has one is_final result (if available) followed
// by a number of in-progress results (if iterim_results is true) for subsequent utterances.
// Just print the first result here.
StreamingRecognitionResult result = response.getResultsList().get(0);
// There can be several alternative transcripts for a given chunk of speech. Just use the
// first (most likely) one here.
SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0);
System.out.printf("Transcript : %s\n", alternative.getTranscript());
}
}
}
Aggregations