Search in sources :

Example 1 with StreamingRecognitionConfig

use of com.google.cloud.speech.v1.StreamingRecognitionConfig in project java-speech by googleapis.

the class InfiniteStreamRecognize method infiniteStreamingRecognize.

/**
 * Performs infinite streaming speech recognition
 */
public static void infiniteStreamingRecognize(String languageCode) throws Exception {
    // Microphone Input buffering
    class MicBuffer implements Runnable {

        @Override
        public void run() {
            System.out.println(YELLOW);
            System.out.println("Start speaking...Press Ctrl-C to stop");
            targetDataLine.start();
            byte[] data = new byte[BYTES_PER_BUFFER];
            while (targetDataLine.isOpen()) {
                try {
                    int numBytesRead = targetDataLine.read(data, 0, data.length);
                    if ((numBytesRead <= 0) && (targetDataLine.isOpen())) {
                        continue;
                    }
                    sharedQueue.put(data.clone());
                } catch (InterruptedException e) {
                    System.out.println("Microphone input buffering interrupted : " + e.getMessage());
                }
            }
        }
    }
    // Creating microphone input buffer thread
    MicBuffer micrunnable = new MicBuffer();
    Thread micThread = new Thread(micrunnable);
    ResponseObserver<StreamingRecognizeResponse> responseObserver = null;
    try (SpeechClient client = SpeechClient.create()) {
        ClientStream<StreamingRecognizeRequest> clientStream;
        responseObserver = new ResponseObserver<StreamingRecognizeResponse>() {

            ArrayList<StreamingRecognizeResponse> responses = new ArrayList<>();

            public void onStart(StreamController controller) {
                referenceToStreamController = controller;
            }

            public void onResponse(StreamingRecognizeResponse response) {
                responses.add(response);
                StreamingRecognitionResult result = response.getResultsList().get(0);
                Duration resultEndTime = result.getResultEndTime();
                resultEndTimeInMS = (int) ((resultEndTime.getSeconds() * 1000) + (resultEndTime.getNanos() / 1000000));
                double correctedTime = resultEndTimeInMS - bridgingOffset + (STREAMING_LIMIT * restartCounter);
                SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0);
                if (result.getIsFinal()) {
                    System.out.print(GREEN);
                    System.out.print("\033[2K\r");
                    System.out.printf("%s: %s [confidence: %.2f]\n", convertMillisToDate(correctedTime), alternative.getTranscript(), alternative.getConfidence());
                    isFinalEndTime = resultEndTimeInMS;
                    lastTranscriptWasFinal = true;
                } else {
                    System.out.print(RED);
                    System.out.print("\033[2K\r");
                    System.out.printf("%s: %s", convertMillisToDate(correctedTime), alternative.getTranscript());
                    lastTranscriptWasFinal = false;
                }
            }

            public void onComplete() {
            }

            public void onError(Throwable t) {
            }
        };
        clientStream = client.streamingRecognizeCallable().splitCall(responseObserver);
        RecognitionConfig recognitionConfig = RecognitionConfig.newBuilder().setEncoding(RecognitionConfig.AudioEncoding.LINEAR16).setLanguageCode(languageCode).setSampleRateHertz(16000).build();
        StreamingRecognitionConfig streamingRecognitionConfig = StreamingRecognitionConfig.newBuilder().setConfig(recognitionConfig).setInterimResults(true).build();
        StreamingRecognizeRequest request = StreamingRecognizeRequest.newBuilder().setStreamingConfig(streamingRecognitionConfig).build();
        clientStream.send(request);
        try {
            // SampleRate:16000Hz, SampleSizeInBits: 16, Number of channels: 1, Signed: true,
            // bigEndian: false
            AudioFormat audioFormat = new AudioFormat(16000, 16, 1, true, false);
            DataLine.Info targetInfo = new Info(TargetDataLine.class, // Set the system information to read from the microphone audio
            audioFormat);
            if (!AudioSystem.isLineSupported(targetInfo)) {
                System.out.println("Microphone not supported");
                System.exit(0);
            }
            // Target data line captures the audio stream the microphone produces.
            targetDataLine = (TargetDataLine) AudioSystem.getLine(targetInfo);
            targetDataLine.open(audioFormat);
            micThread.start();
            long startTime = System.currentTimeMillis();
            while (true) {
                long estimatedTime = System.currentTimeMillis() - startTime;
                if (estimatedTime >= STREAMING_LIMIT) {
                    clientStream.closeSend();
                    // remove Observer
                    referenceToStreamController.cancel();
                    if (resultEndTimeInMS > 0) {
                        finalRequestEndTime = isFinalEndTime;
                    }
                    resultEndTimeInMS = 0;
                    lastAudioInput = null;
                    lastAudioInput = audioInput;
                    audioInput = new ArrayList<ByteString>();
                    restartCounter++;
                    if (!lastTranscriptWasFinal) {
                        System.out.print('\n');
                    }
                    newStream = true;
                    clientStream = client.streamingRecognizeCallable().splitCall(responseObserver);
                    request = StreamingRecognizeRequest.newBuilder().setStreamingConfig(streamingRecognitionConfig).build();
                    System.out.println(YELLOW);
                    System.out.printf("%d: RESTARTING REQUEST\n", restartCounter * STREAMING_LIMIT);
                    startTime = System.currentTimeMillis();
                } else {
                    if ((newStream) && (lastAudioInput.size() > 0)) {
                        // if this is the first audio from a new request
                        // calculate amount of unfinalized audio from last request
                        // resend the audio to the speech client before incoming audio
                        double chunkTime = STREAMING_LIMIT / lastAudioInput.size();
                        // ms length of each chunk in previous request audio arrayList
                        if (chunkTime != 0) {
                            if (bridgingOffset < 0) {
                                // bridging Offset accounts for time of resent audio
                                // calculated from last request
                                bridgingOffset = 0;
                            }
                            if (bridgingOffset > finalRequestEndTime) {
                                bridgingOffset = finalRequestEndTime;
                            }
                            int chunksFromMs = (int) Math.floor((finalRequestEndTime - bridgingOffset) / chunkTime);
                            // chunks from MS is number of chunks to resend
                            bridgingOffset = (int) Math.floor((lastAudioInput.size() - chunksFromMs) * chunkTime);
                            // set bridging offset for next request
                            for (int i = chunksFromMs; i < lastAudioInput.size(); i++) {
                                request = StreamingRecognizeRequest.newBuilder().setAudioContent(lastAudioInput.get(i)).build();
                                clientStream.send(request);
                            }
                        }
                        newStream = false;
                    }
                    tempByteString = ByteString.copyFrom(sharedQueue.take());
                    request = StreamingRecognizeRequest.newBuilder().setAudioContent(tempByteString).build();
                    audioInput.add(tempByteString);
                }
                clientStream.send(request);
            }
        } catch (Exception e) {
            System.out.println(e);
        }
    }
}
Also used : ByteString(com.google.protobuf.ByteString) ArrayList(java.util.ArrayList) StreamingRecognitionConfig(com.google.cloud.speech.v1p1beta1.StreamingRecognitionConfig) RecognitionConfig(com.google.cloud.speech.v1p1beta1.RecognitionConfig) SpeechClient(com.google.cloud.speech.v1p1beta1.SpeechClient) StreamingRecognizeResponse(com.google.cloud.speech.v1p1beta1.StreamingRecognizeResponse) AudioFormat(javax.sound.sampled.AudioFormat) StreamingRecognitionConfig(com.google.cloud.speech.v1p1beta1.StreamingRecognitionConfig) TargetDataLine(javax.sound.sampled.TargetDataLine) DataLine(javax.sound.sampled.DataLine) Duration(com.google.protobuf.Duration) Info(javax.sound.sampled.DataLine.Info) Info(javax.sound.sampled.DataLine.Info) StreamController(com.google.api.gax.rpc.StreamController) SpeechRecognitionAlternative(com.google.cloud.speech.v1p1beta1.SpeechRecognitionAlternative) StreamingRecognitionResult(com.google.cloud.speech.v1p1beta1.StreamingRecognitionResult) StreamingRecognizeRequest(com.google.cloud.speech.v1p1beta1.StreamingRecognizeRequest)

Example 2 with StreamingRecognitionConfig

use of com.google.cloud.speech.v1.StreamingRecognitionConfig in project java-speech by googleapis.

the class Recognize method streamingMicRecognize.

// [END speech_stream_recognize_punctuation]
// [START speech_transcribe_streaming_mic]
/**
 * Performs microphone streaming speech recognition with a duration of 1 minute.
 */
public static void streamingMicRecognize() throws Exception {
    ResponseObserver<StreamingRecognizeResponse> responseObserver = null;
    try (SpeechClient client = SpeechClient.create()) {
        responseObserver = new ResponseObserver<StreamingRecognizeResponse>() {

            ArrayList<StreamingRecognizeResponse> responses = new ArrayList<>();

            public void onStart(StreamController controller) {
            }

            public void onResponse(StreamingRecognizeResponse response) {
                responses.add(response);
            }

            public void onComplete() {
                for (StreamingRecognizeResponse response : responses) {
                    StreamingRecognitionResult result = response.getResultsList().get(0);
                    SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0);
                    System.out.printf("Transcript : %s\n", alternative.getTranscript());
                }
            }

            public void onError(Throwable t) {
                System.out.println(t);
            }
        };
        ClientStream<StreamingRecognizeRequest> clientStream = client.streamingRecognizeCallable().splitCall(responseObserver);
        RecognitionConfig recognitionConfig = RecognitionConfig.newBuilder().setEncoding(RecognitionConfig.AudioEncoding.LINEAR16).setLanguageCode("en-US").setSampleRateHertz(16000).build();
        StreamingRecognitionConfig streamingRecognitionConfig = StreamingRecognitionConfig.newBuilder().setConfig(recognitionConfig).build();
        StreamingRecognizeRequest request = StreamingRecognizeRequest.newBuilder().setStreamingConfig(streamingRecognitionConfig).build();
        clientStream.send(request);
        // SampleRate:16000Hz, SampleSizeInBits: 16, Number of channels: 1, Signed: true,
        // bigEndian: false
        AudioFormat audioFormat = new AudioFormat(16000, 16, 1, true, false);
        DataLine.Info targetInfo = new Info(TargetDataLine.class, // Set the system information to read from the microphone audio stream
        audioFormat);
        if (!AudioSystem.isLineSupported(targetInfo)) {
            System.out.println("Microphone not supported");
            System.exit(0);
        }
        // Target data line captures the audio stream the microphone produces.
        TargetDataLine targetDataLine = (TargetDataLine) AudioSystem.getLine(targetInfo);
        targetDataLine.open(audioFormat);
        targetDataLine.start();
        System.out.println("Start speaking");
        long startTime = System.currentTimeMillis();
        // Audio Input Stream
        AudioInputStream audio = new AudioInputStream(targetDataLine);
        while (true) {
            long estimatedTime = System.currentTimeMillis() - startTime;
            byte[] data = new byte[6400];
            audio.read(data);
            if (estimatedTime > 60000) {
                // 60 seconds
                System.out.println("Stop speaking.");
                targetDataLine.stop();
                targetDataLine.close();
                break;
            }
            request = StreamingRecognizeRequest.newBuilder().setAudioContent(ByteString.copyFrom(data)).build();
            clientStream.send(request);
        }
    } catch (Exception e) {
        System.out.println(e);
    }
    responseObserver.onComplete();
}
Also used : StreamingRecognitionConfig(com.google.cloud.speech.v1.StreamingRecognitionConfig) TargetDataLine(javax.sound.sampled.TargetDataLine) DataLine(javax.sound.sampled.DataLine) ArrayList(java.util.ArrayList) WordInfo(com.google.cloud.speech.v1.WordInfo) Info(javax.sound.sampled.DataLine.Info) Info(javax.sound.sampled.DataLine.Info) IOException(java.io.IOException) TargetDataLine(javax.sound.sampled.TargetDataLine) StreamController(com.google.api.gax.rpc.StreamController) AudioInputStream(javax.sound.sampled.AudioInputStream) SpeechRecognitionAlternative(com.google.cloud.speech.v1.SpeechRecognitionAlternative) StreamingRecognitionResult(com.google.cloud.speech.v1.StreamingRecognitionResult) StreamingRecognizeRequest(com.google.cloud.speech.v1.StreamingRecognizeRequest) RecognitionConfig(com.google.cloud.speech.v1.RecognitionConfig) StreamingRecognitionConfig(com.google.cloud.speech.v1.StreamingRecognitionConfig) SpeechClient(com.google.cloud.speech.v1.SpeechClient) StreamingRecognizeResponse(com.google.cloud.speech.v1.StreamingRecognizeResponse) AudioFormat(javax.sound.sampled.AudioFormat)

Example 3 with StreamingRecognitionConfig

use of com.google.cloud.speech.v1.StreamingRecognitionConfig in project java-speech by googleapis.

the class Recognize method streamingRecognizeFile.

// [END speech_transcribe_async_gcs]
// [START speech_transcribe_streaming]
/**
 * Performs streaming speech recognition on raw PCM audio data.
 *
 * @param fileName the path to a PCM audio file to transcribe.
 */
public static void streamingRecognizeFile(String fileName) throws Exception, IOException {
    Path path = Paths.get(fileName);
    byte[] data = Files.readAllBytes(path);
    // Instantiates a client with GOOGLE_APPLICATION_CREDENTIALS
    try (SpeechClient speech = SpeechClient.create()) {
        // Configure request with local raw PCM audio
        RecognitionConfig recConfig = RecognitionConfig.newBuilder().setEncoding(AudioEncoding.LINEAR16).setLanguageCode("en-US").setSampleRateHertz(16000).setModel("default").build();
        StreamingRecognitionConfig config = StreamingRecognitionConfig.newBuilder().setConfig(recConfig).build();
        class ResponseApiStreamingObserver<T> implements ApiStreamObserver<T> {

            private final SettableFuture<List<T>> future = SettableFuture.create();

            private final List<T> messages = new java.util.ArrayList<T>();

            @Override
            public void onNext(T message) {
                messages.add(message);
            }

            @Override
            public void onError(Throwable t) {
                future.setException(t);
            }

            @Override
            public void onCompleted() {
                future.set(messages);
            }

            // Returns the SettableFuture object to get received messages / exceptions.
            public SettableFuture<List<T>> future() {
                return future;
            }
        }
        ResponseApiStreamingObserver<StreamingRecognizeResponse> responseObserver = new ResponseApiStreamingObserver<>();
        BidiStreamingCallable<StreamingRecognizeRequest, StreamingRecognizeResponse> callable = speech.streamingRecognizeCallable();
        ApiStreamObserver<StreamingRecognizeRequest> requestObserver = callable.bidiStreamingCall(responseObserver);
        // The first request must **only** contain the audio configuration:
        requestObserver.onNext(StreamingRecognizeRequest.newBuilder().setStreamingConfig(config).build());
        // Subsequent requests must **only** contain the audio data.
        requestObserver.onNext(StreamingRecognizeRequest.newBuilder().setAudioContent(ByteString.copyFrom(data)).build());
        // Mark transmission as completed after sending the data.
        requestObserver.onCompleted();
        List<StreamingRecognizeResponse> responses = responseObserver.future().get();
        for (StreamingRecognizeResponse response : responses) {
            // For streaming recognize, the results list has one is_final result (if available) followed
            // by a number of in-progress results (if iterim_results is true) for subsequent utterances.
            // Just print the first result here.
            StreamingRecognitionResult result = response.getResultsList().get(0);
            // There can be several alternative transcripts for a given chunk of speech. Just use the
            // first (most likely) one here.
            SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0);
            System.out.printf("Transcript : %s\n", alternative.getTranscript());
        }
    }
}
Also used : Path(java.nio.file.Path) SettableFuture(com.google.common.util.concurrent.SettableFuture) StreamingRecognitionConfig(com.google.cloud.speech.v1.StreamingRecognitionConfig) SpeechRecognitionAlternative(com.google.cloud.speech.v1.SpeechRecognitionAlternative) StreamingRecognitionResult(com.google.cloud.speech.v1.StreamingRecognitionResult) StreamingRecognizeRequest(com.google.cloud.speech.v1.StreamingRecognizeRequest) ApiStreamObserver(com.google.api.gax.rpc.ApiStreamObserver) RecognitionConfig(com.google.cloud.speech.v1.RecognitionConfig) StreamingRecognitionConfig(com.google.cloud.speech.v1.StreamingRecognitionConfig) SpeechClient(com.google.cloud.speech.v1.SpeechClient) ArrayList(java.util.ArrayList) List(java.util.List) StreamingRecognizeResponse(com.google.cloud.speech.v1.StreamingRecognizeResponse)

Example 4 with StreamingRecognitionConfig

use of com.google.cloud.speech.v1.StreamingRecognitionConfig in project java-docs-samples by GoogleCloudPlatform.

the class Recognize method streamingRecognizeFile.

/**
 * Performs streaming speech recognition on raw PCM audio data.
 *
 * @param fileName the path to a PCM audio file to transcribe.
 */
public static void streamingRecognizeFile(String fileName) throws Exception, IOException {
    Path path = Paths.get(fileName);
    byte[] data = Files.readAllBytes(path);
    // Instantiates a client with GOOGLE_APPLICATION_CREDENTIALS
    try (SpeechClient speech = SpeechClient.create()) {
        // Configure request with local raw PCM audio
        RecognitionConfig recConfig = RecognitionConfig.newBuilder().setEncoding(AudioEncoding.LINEAR16).setLanguageCode("en-US").setSampleRateHertz(16000).setModel("default").build();
        StreamingRecognitionConfig config = StreamingRecognitionConfig.newBuilder().setConfig(recConfig).build();
        class ResponseApiStreamingObserver<T> implements ApiStreamObserver<T> {

            private final SettableFuture<List<T>> future = SettableFuture.create();

            private final List<T> messages = new java.util.ArrayList<T>();

            @Override
            public void onNext(T message) {
                messages.add(message);
            }

            @Override
            public void onError(Throwable t) {
                future.setException(t);
            }

            @Override
            public void onCompleted() {
                future.set(messages);
            }

            // Returns the SettableFuture object to get received messages / exceptions.
            public SettableFuture<List<T>> future() {
                return future;
            }
        }
        ResponseApiStreamingObserver<StreamingRecognizeResponse> responseObserver = new ResponseApiStreamingObserver<>();
        BidiStreamingCallable<StreamingRecognizeRequest, StreamingRecognizeResponse> callable = speech.streamingRecognizeCallable();
        ApiStreamObserver<StreamingRecognizeRequest> requestObserver = callable.bidiStreamingCall(responseObserver);
        // The first request must **only** contain the audio configuration:
        requestObserver.onNext(StreamingRecognizeRequest.newBuilder().setStreamingConfig(config).build());
        // Subsequent requests must **only** contain the audio data.
        requestObserver.onNext(StreamingRecognizeRequest.newBuilder().setAudioContent(ByteString.copyFrom(data)).build());
        // Mark transmission as completed after sending the data.
        requestObserver.onCompleted();
        List<StreamingRecognizeResponse> responses = responseObserver.future().get();
        for (StreamingRecognizeResponse response : responses) {
            // For streaming recognize, the results list has one is_final result (if available) followed
            // by a number of in-progress results (if iterim_results is true) for subsequent utterances.
            // Just print the first result here.
            StreamingRecognitionResult result = response.getResultsList().get(0);
            // There can be several alternative transcripts for a given chunk of speech. Just use the
            // first (most likely) one here.
            SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0);
            System.out.printf("Transcript : %s\n", alternative.getTranscript());
        }
    }
}
Also used : Path(java.nio.file.Path) SettableFuture(com.google.common.util.concurrent.SettableFuture) StreamingRecognitionConfig(com.google.cloud.speech.v1p1beta1.StreamingRecognitionConfig) SpeechRecognitionAlternative(com.google.cloud.speech.v1p1beta1.SpeechRecognitionAlternative) StreamingRecognitionResult(com.google.cloud.speech.v1p1beta1.StreamingRecognitionResult) StreamingRecognizeRequest(com.google.cloud.speech.v1p1beta1.StreamingRecognizeRequest) ApiStreamObserver(com.google.api.gax.rpc.ApiStreamObserver) StreamingRecognitionConfig(com.google.cloud.speech.v1p1beta1.StreamingRecognitionConfig) RecognitionConfig(com.google.cloud.speech.v1p1beta1.RecognitionConfig) SpeechClient(com.google.cloud.speech.v1p1beta1.SpeechClient) List(java.util.List) StreamingRecognizeResponse(com.google.cloud.speech.v1p1beta1.StreamingRecognizeResponse)

Example 5 with StreamingRecognitionConfig

use of com.google.cloud.speech.v1.StreamingRecognitionConfig in project java-speech by googleapis.

the class ITSpeechTest method streamingRecognize.

@Test
public void streamingRecognize() throws Exception {
    byte[] audioBytes = ByteString.readFrom(getClass().getClassLoader().getResourceAsStream("hello.flac")).toByteArray();
    StreamingRecognitionConfig streamingConfig = StreamingRecognitionConfig.newBuilder().setConfig(config(1)).build();
    ResponseApiStreamingObserver<StreamingRecognizeResponse> responseObserver = new ResponseApiStreamingObserver<>();
    ApiStreamObserver<StreamingRecognizeRequest> requestObserver = speechClient.streamingRecognizeCallable().bidiStreamingCall(responseObserver);
    // The first request must **only** contain the audio configuration:
    requestObserver.onNext(StreamingRecognizeRequest.newBuilder().setStreamingConfig(streamingConfig).build());
    // Subsequent requests must **only** contain the audio data.
    requestObserver.onNext(StreamingRecognizeRequest.newBuilder().setAudioContent(ByteString.copyFrom(audioBytes)).build());
    // Mark transmission as completed after sending the data.
    requestObserver.onCompleted();
    List<StreamingRecognizeResponse> responses = responseObserver.future().get();
    Truth.assertThat(responses.size()).isGreaterThan(0);
    Truth.assertThat(responses.get(0).getResultsCount()).isGreaterThan(0);
    Truth.assertThat(responses.get(0).getResults(0).getAlternativesCount()).isGreaterThan(0);
    String text = responses.get(0).getResults(0).getAlternatives(0).getTranscript();
    Truth.assertThat(text).isEqualTo("hello");
}
Also used : StreamingRecognitionConfig(com.google.cloud.speech.v1.StreamingRecognitionConfig) StreamingRecognizeRequest(com.google.cloud.speech.v1.StreamingRecognizeRequest) StreamingRecognizeResponse(com.google.cloud.speech.v1.StreamingRecognizeResponse) ByteString(com.google.protobuf.ByteString) Test(org.junit.Test)

Aggregations

StreamingRecognitionConfig (com.google.cloud.speech.v1.StreamingRecognitionConfig)5 RecognitionConfig (com.google.cloud.speech.v1.RecognitionConfig)4 StreamingRecognizeRequest (com.google.cloud.speech.v1.StreamingRecognizeRequest)4 StreamingRecognizeResponse (com.google.cloud.speech.v1.StreamingRecognizeResponse)4 ArrayList (java.util.ArrayList)4 ApiStreamObserver (com.google.api.gax.rpc.ApiStreamObserver)3 SpeechClient (com.google.cloud.speech.v1.SpeechClient)3 SpeechRecognitionAlternative (com.google.cloud.speech.v1.SpeechRecognitionAlternative)3 StreamingRecognitionResult (com.google.cloud.speech.v1.StreamingRecognitionResult)3 SettableFuture (com.google.common.util.concurrent.SettableFuture)3 Path (java.nio.file.Path)3 List (java.util.List)3 StreamController (com.google.api.gax.rpc.StreamController)2 RecognitionConfig (com.google.cloud.speech.v1p1beta1.RecognitionConfig)2 SpeechClient (com.google.cloud.speech.v1p1beta1.SpeechClient)2 SpeechRecognitionAlternative (com.google.cloud.speech.v1p1beta1.SpeechRecognitionAlternative)2 StreamingRecognitionConfig (com.google.cloud.speech.v1p1beta1.StreamingRecognitionConfig)2 StreamingRecognitionResult (com.google.cloud.speech.v1p1beta1.StreamingRecognitionResult)2 StreamingRecognizeRequest (com.google.cloud.speech.v1p1beta1.StreamingRecognizeRequest)2 StreamingRecognizeResponse (com.google.cloud.speech.v1p1beta1.StreamingRecognizeResponse)2