Search in sources :

Example 1 with SpeechRecognitionAlternative

use of com.google.cloud.videointelligence.v1.SpeechRecognitionAlternative in project java-speech by googleapis.

the class InfiniteStreamRecognize method infiniteStreamingRecognize.

/**
 * Performs infinite streaming speech recognition
 */
public static void infiniteStreamingRecognize(String languageCode) throws Exception {
    // Microphone Input buffering
    class MicBuffer implements Runnable {

        @Override
        public void run() {
            System.out.println(YELLOW);
            System.out.println("Start speaking...Press Ctrl-C to stop");
            targetDataLine.start();
            byte[] data = new byte[BYTES_PER_BUFFER];
            while (targetDataLine.isOpen()) {
                try {
                    int numBytesRead = targetDataLine.read(data, 0, data.length);
                    if ((numBytesRead <= 0) && (targetDataLine.isOpen())) {
                        continue;
                    }
                    sharedQueue.put(data.clone());
                } catch (InterruptedException e) {
                    System.out.println("Microphone input buffering interrupted : " + e.getMessage());
                }
            }
        }
    }
    // Creating microphone input buffer thread
    MicBuffer micrunnable = new MicBuffer();
    Thread micThread = new Thread(micrunnable);
    ResponseObserver<StreamingRecognizeResponse> responseObserver = null;
    try (SpeechClient client = SpeechClient.create()) {
        ClientStream<StreamingRecognizeRequest> clientStream;
        responseObserver = new ResponseObserver<StreamingRecognizeResponse>() {

            ArrayList<StreamingRecognizeResponse> responses = new ArrayList<>();

            public void onStart(StreamController controller) {
                referenceToStreamController = controller;
            }

            public void onResponse(StreamingRecognizeResponse response) {
                responses.add(response);
                StreamingRecognitionResult result = response.getResultsList().get(0);
                Duration resultEndTime = result.getResultEndTime();
                resultEndTimeInMS = (int) ((resultEndTime.getSeconds() * 1000) + (resultEndTime.getNanos() / 1000000));
                double correctedTime = resultEndTimeInMS - bridgingOffset + (STREAMING_LIMIT * restartCounter);
                SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0);
                if (result.getIsFinal()) {
                    System.out.print(GREEN);
                    System.out.print("\033[2K\r");
                    System.out.printf("%s: %s [confidence: %.2f]\n", convertMillisToDate(correctedTime), alternative.getTranscript(), alternative.getConfidence());
                    isFinalEndTime = resultEndTimeInMS;
                    lastTranscriptWasFinal = true;
                } else {
                    System.out.print(RED);
                    System.out.print("\033[2K\r");
                    System.out.printf("%s: %s", convertMillisToDate(correctedTime), alternative.getTranscript());
                    lastTranscriptWasFinal = false;
                }
            }

            public void onComplete() {
            }

            public void onError(Throwable t) {
            }
        };
        clientStream = client.streamingRecognizeCallable().splitCall(responseObserver);
        RecognitionConfig recognitionConfig = RecognitionConfig.newBuilder().setEncoding(RecognitionConfig.AudioEncoding.LINEAR16).setLanguageCode(languageCode).setSampleRateHertz(16000).build();
        StreamingRecognitionConfig streamingRecognitionConfig = StreamingRecognitionConfig.newBuilder().setConfig(recognitionConfig).setInterimResults(true).build();
        StreamingRecognizeRequest request = StreamingRecognizeRequest.newBuilder().setStreamingConfig(streamingRecognitionConfig).build();
        clientStream.send(request);
        try {
            // SampleRate:16000Hz, SampleSizeInBits: 16, Number of channels: 1, Signed: true,
            // bigEndian: false
            AudioFormat audioFormat = new AudioFormat(16000, 16, 1, true, false);
            DataLine.Info targetInfo = new Info(TargetDataLine.class, // Set the system information to read from the microphone audio
            audioFormat);
            if (!AudioSystem.isLineSupported(targetInfo)) {
                System.out.println("Microphone not supported");
                System.exit(0);
            }
            // Target data line captures the audio stream the microphone produces.
            targetDataLine = (TargetDataLine) AudioSystem.getLine(targetInfo);
            targetDataLine.open(audioFormat);
            micThread.start();
            long startTime = System.currentTimeMillis();
            while (true) {
                long estimatedTime = System.currentTimeMillis() - startTime;
                if (estimatedTime >= STREAMING_LIMIT) {
                    clientStream.closeSend();
                    // remove Observer
                    referenceToStreamController.cancel();
                    if (resultEndTimeInMS > 0) {
                        finalRequestEndTime = isFinalEndTime;
                    }
                    resultEndTimeInMS = 0;
                    lastAudioInput = null;
                    lastAudioInput = audioInput;
                    audioInput = new ArrayList<ByteString>();
                    restartCounter++;
                    if (!lastTranscriptWasFinal) {
                        System.out.print('\n');
                    }
                    newStream = true;
                    clientStream = client.streamingRecognizeCallable().splitCall(responseObserver);
                    request = StreamingRecognizeRequest.newBuilder().setStreamingConfig(streamingRecognitionConfig).build();
                    System.out.println(YELLOW);
                    System.out.printf("%d: RESTARTING REQUEST\n", restartCounter * STREAMING_LIMIT);
                    startTime = System.currentTimeMillis();
                } else {
                    if ((newStream) && (lastAudioInput.size() > 0)) {
                        // if this is the first audio from a new request
                        // calculate amount of unfinalized audio from last request
                        // resend the audio to the speech client before incoming audio
                        double chunkTime = STREAMING_LIMIT / lastAudioInput.size();
                        // ms length of each chunk in previous request audio arrayList
                        if (chunkTime != 0) {
                            if (bridgingOffset < 0) {
                                // bridging Offset accounts for time of resent audio
                                // calculated from last request
                                bridgingOffset = 0;
                            }
                            if (bridgingOffset > finalRequestEndTime) {
                                bridgingOffset = finalRequestEndTime;
                            }
                            int chunksFromMs = (int) Math.floor((finalRequestEndTime - bridgingOffset) / chunkTime);
                            // chunks from MS is number of chunks to resend
                            bridgingOffset = (int) Math.floor((lastAudioInput.size() - chunksFromMs) * chunkTime);
                            // set bridging offset for next request
                            for (int i = chunksFromMs; i < lastAudioInput.size(); i++) {
                                request = StreamingRecognizeRequest.newBuilder().setAudioContent(lastAudioInput.get(i)).build();
                                clientStream.send(request);
                            }
                        }
                        newStream = false;
                    }
                    tempByteString = ByteString.copyFrom(sharedQueue.take());
                    request = StreamingRecognizeRequest.newBuilder().setAudioContent(tempByteString).build();
                    audioInput.add(tempByteString);
                }
                clientStream.send(request);
            }
        } catch (Exception e) {
            System.out.println(e);
        }
    }
}
Also used : ByteString(com.google.protobuf.ByteString) ArrayList(java.util.ArrayList) StreamingRecognitionConfig(com.google.cloud.speech.v1p1beta1.StreamingRecognitionConfig) RecognitionConfig(com.google.cloud.speech.v1p1beta1.RecognitionConfig) SpeechClient(com.google.cloud.speech.v1p1beta1.SpeechClient) StreamingRecognizeResponse(com.google.cloud.speech.v1p1beta1.StreamingRecognizeResponse) AudioFormat(javax.sound.sampled.AudioFormat) StreamingRecognitionConfig(com.google.cloud.speech.v1p1beta1.StreamingRecognitionConfig) TargetDataLine(javax.sound.sampled.TargetDataLine) DataLine(javax.sound.sampled.DataLine) Duration(com.google.protobuf.Duration) Info(javax.sound.sampled.DataLine.Info) Info(javax.sound.sampled.DataLine.Info) StreamController(com.google.api.gax.rpc.StreamController) SpeechRecognitionAlternative(com.google.cloud.speech.v1p1beta1.SpeechRecognitionAlternative) StreamingRecognitionResult(com.google.cloud.speech.v1p1beta1.StreamingRecognitionResult) StreamingRecognizeRequest(com.google.cloud.speech.v1p1beta1.StreamingRecognizeRequest)

Example 2 with SpeechRecognitionAlternative

use of com.google.cloud.videointelligence.v1.SpeechRecognitionAlternative in project java-speech by googleapis.

the class Recognize method syncRecognizeWords.

// [END speech_transcribe_sync]
/**
 * Performs sync recognize and prints word time offsets.
 *
 * @param fileName the path to a PCM audio file to transcribe get offsets on.
 */
public static void syncRecognizeWords(String fileName) throws Exception {
    try (SpeechClient speech = SpeechClient.create()) {
        Path path = Paths.get(fileName);
        byte[] data = Files.readAllBytes(path);
        ByteString audioBytes = ByteString.copyFrom(data);
        // Configure request with local raw PCM audio
        RecognitionConfig config = RecognitionConfig.newBuilder().setEncoding(AudioEncoding.LINEAR16).setLanguageCode("en-US").setSampleRateHertz(16000).setEnableWordTimeOffsets(true).build();
        RecognitionAudio audio = RecognitionAudio.newBuilder().setContent(audioBytes).build();
        // Use blocking call to get audio transcript
        RecognizeResponse response = speech.recognize(config, audio);
        List<SpeechRecognitionResult> results = response.getResultsList();
        for (SpeechRecognitionResult result : results) {
            // There can be several alternative transcripts for a given chunk of speech. Just use the
            // first (most likely) one here.
            SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0);
            System.out.printf("Transcription: %s%n", alternative.getTranscript());
            for (WordInfo wordInfo : alternative.getWordsList()) {
                System.out.println(wordInfo.getWord());
                System.out.printf("\t%s.%s sec - %s.%s sec\n", wordInfo.getStartTime().getSeconds(), wordInfo.getStartTime().getNanos() / 100000000, wordInfo.getEndTime().getSeconds(), wordInfo.getEndTime().getNanos() / 100000000);
            }
        }
    }
}
Also used : Path(java.nio.file.Path) SpeechRecognitionAlternative(com.google.cloud.speech.v1.SpeechRecognitionAlternative) RecognitionAudio(com.google.cloud.speech.v1.RecognitionAudio) ByteString(com.google.protobuf.ByteString) RecognitionConfig(com.google.cloud.speech.v1.RecognitionConfig) StreamingRecognitionConfig(com.google.cloud.speech.v1.StreamingRecognitionConfig) SpeechClient(com.google.cloud.speech.v1.SpeechClient) LongRunningRecognizeResponse(com.google.cloud.speech.v1.LongRunningRecognizeResponse) StreamingRecognizeResponse(com.google.cloud.speech.v1.StreamingRecognizeResponse) RecognizeResponse(com.google.cloud.speech.v1.RecognizeResponse) SpeechRecognitionResult(com.google.cloud.speech.v1.SpeechRecognitionResult) WordInfo(com.google.cloud.speech.v1.WordInfo)

Example 3 with SpeechRecognitionAlternative

use of com.google.cloud.videointelligence.v1.SpeechRecognitionAlternative in project java-speech by googleapis.

the class Recognize method asyncRecognizeGcs.

// [END speech_transcribe_async_word_time_offsets_gcs]
// [START speech_transcribe_async_gcs]
/**
 * Performs non-blocking speech recognition on remote FLAC file and prints the transcription.
 *
 * @param gcsUri the path to the remote LINEAR16 audio file to transcribe.
 */
public static void asyncRecognizeGcs(String gcsUri) throws Exception {
    // Configure polling algorithm
    SpeechSettings.Builder speechSettings = SpeechSettings.newBuilder();
    TimedRetryAlgorithm timedRetryAlgorithm = OperationTimedPollAlgorithm.create(RetrySettings.newBuilder().setInitialRetryDelay(Duration.ofMillis(500L)).setRetryDelayMultiplier(1.5).setMaxRetryDelay(Duration.ofMillis(5000L)).setInitialRpcTimeout(// ignored
    Duration.ZERO).setRpcTimeoutMultiplier(// ignored
    1.0).setMaxRpcTimeout(// ignored
    Duration.ZERO).setTotalTimeout(// set polling timeout to 24 hours
    Duration.ofHours(24L)).build());
    speechSettings.longRunningRecognizeOperationSettings().setPollingAlgorithm(timedRetryAlgorithm);
    // Instantiates a client with GOOGLE_APPLICATION_CREDENTIALS
    try (SpeechClient speech = SpeechClient.create(speechSettings.build())) {
        // Configure remote file request for FLAC
        RecognitionConfig config = RecognitionConfig.newBuilder().setEncoding(AudioEncoding.FLAC).setLanguageCode("en-US").setSampleRateHertz(16000).build();
        RecognitionAudio audio = RecognitionAudio.newBuilder().setUri(gcsUri).build();
        // Use non-blocking call for getting file transcription
        OperationFuture<LongRunningRecognizeResponse, LongRunningRecognizeMetadata> response = speech.longRunningRecognizeAsync(config, audio);
        while (!response.isDone()) {
            System.out.println("Waiting for response...");
            Thread.sleep(10000);
        }
        List<SpeechRecognitionResult> results = response.get().getResultsList();
        for (SpeechRecognitionResult result : results) {
            // There can be several alternative transcripts for a given chunk of speech. Just use the
            // first (most likely) one here.
            SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0);
            System.out.printf("Transcription: %s\n", alternative.getTranscript());
        }
    }
}
Also used : TimedRetryAlgorithm(com.google.api.gax.retrying.TimedRetryAlgorithm) LongRunningRecognizeResponse(com.google.cloud.speech.v1.LongRunningRecognizeResponse) SpeechRecognitionAlternative(com.google.cloud.speech.v1.SpeechRecognitionAlternative) RecognitionAudio(com.google.cloud.speech.v1.RecognitionAudio) RecognitionConfig(com.google.cloud.speech.v1.RecognitionConfig) StreamingRecognitionConfig(com.google.cloud.speech.v1.StreamingRecognitionConfig) SpeechSettings(com.google.cloud.speech.v1.SpeechSettings) SpeechClient(com.google.cloud.speech.v1.SpeechClient) SpeechRecognitionResult(com.google.cloud.speech.v1.SpeechRecognitionResult) LongRunningRecognizeMetadata(com.google.cloud.speech.v1.LongRunningRecognizeMetadata)

Example 4 with SpeechRecognitionAlternative

use of com.google.cloud.videointelligence.v1.SpeechRecognitionAlternative in project java-speech by googleapis.

the class Recognize method transcribeFileWithAutomaticPunctuation.

// [END speech_transcribe_streaming]
// [START speech_sync_recognize_punctuation]
/**
 * Performs transcription with automatic punctuation on raw PCM audio data.
 *
 * @param fileName the path to a PCM audio file to transcribe.
 */
public static void transcribeFileWithAutomaticPunctuation(String fileName) throws Exception {
    Path path = Paths.get(fileName);
    byte[] content = Files.readAllBytes(path);
    try (SpeechClient speechClient = SpeechClient.create()) {
        // Configure request with local raw PCM audio
        RecognitionConfig recConfig = RecognitionConfig.newBuilder().setEncoding(AudioEncoding.LINEAR16).setLanguageCode("en-US").setSampleRateHertz(16000).setEnableAutomaticPunctuation(true).build();
        // Get the contents of the local audio file
        RecognitionAudio recognitionAudio = RecognitionAudio.newBuilder().setContent(ByteString.copyFrom(content)).build();
        // Perform the transcription request
        RecognizeResponse recognizeResponse = speechClient.recognize(recConfig, recognitionAudio);
        // Just print the first result here.
        SpeechRecognitionResult result = recognizeResponse.getResultsList().get(0);
        // There can be several alternative transcripts for a given chunk of speech. Just use the
        // first (most likely) one here.
        SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0);
        // Print out the result
        System.out.printf("Transcript : %s\n", alternative.getTranscript());
    }
}
Also used : Path(java.nio.file.Path) SpeechRecognitionAlternative(com.google.cloud.speech.v1.SpeechRecognitionAlternative) RecognitionAudio(com.google.cloud.speech.v1.RecognitionAudio) RecognitionConfig(com.google.cloud.speech.v1.RecognitionConfig) StreamingRecognitionConfig(com.google.cloud.speech.v1.StreamingRecognitionConfig) SpeechClient(com.google.cloud.speech.v1.SpeechClient) LongRunningRecognizeResponse(com.google.cloud.speech.v1.LongRunningRecognizeResponse) StreamingRecognizeResponse(com.google.cloud.speech.v1.StreamingRecognizeResponse) RecognizeResponse(com.google.cloud.speech.v1.RecognizeResponse) SpeechRecognitionResult(com.google.cloud.speech.v1.SpeechRecognitionResult)

Example 5 with SpeechRecognitionAlternative

use of com.google.cloud.videointelligence.v1.SpeechRecognitionAlternative in project java-speech by googleapis.

the class Recognize method transcribeModelSelectionGcs.

// [END speech_transcribe_model_selection]
// [START speech_transcribe_model_selection_gcs]
/**
 * Performs transcription of the remote audio file asynchronously with the selected model.
 *
 * @param gcsUri the path to the remote audio file to transcribe.
 */
public static void transcribeModelSelectionGcs(String gcsUri) throws Exception {
    try (SpeechClient speech = SpeechClient.create()) {
        // Configure request with video media type
        RecognitionConfig config = RecognitionConfig.newBuilder().setEncoding(AudioEncoding.LINEAR16).setLanguageCode("en-US").setSampleRateHertz(16000).setModel("video").build();
        RecognitionAudio audio = RecognitionAudio.newBuilder().setUri(gcsUri).build();
        // Use non-blocking call for getting file transcription
        OperationFuture<LongRunningRecognizeResponse, LongRunningRecognizeMetadata> response = speech.longRunningRecognizeAsync(config, audio);
        while (!response.isDone()) {
            System.out.println("Waiting for response...");
            Thread.sleep(10000);
        }
        List<SpeechRecognitionResult> results = response.get().getResultsList();
        // Just print the first result here.
        SpeechRecognitionResult result = results.get(0);
        // There can be several alternative transcripts for a given chunk of speech. Just use the
        // first (most likely) one here.
        SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0);
        System.out.printf("Transcript : %s\n", alternative.getTranscript());
    }
}
Also used : LongRunningRecognizeResponse(com.google.cloud.speech.v1.LongRunningRecognizeResponse) SpeechRecognitionAlternative(com.google.cloud.speech.v1.SpeechRecognitionAlternative) RecognitionAudio(com.google.cloud.speech.v1.RecognitionAudio) RecognitionConfig(com.google.cloud.speech.v1.RecognitionConfig) StreamingRecognitionConfig(com.google.cloud.speech.v1.StreamingRecognitionConfig) SpeechClient(com.google.cloud.speech.v1.SpeechClient) SpeechRecognitionResult(com.google.cloud.speech.v1.SpeechRecognitionResult) LongRunningRecognizeMetadata(com.google.cloud.speech.v1.LongRunningRecognizeMetadata)

Aggregations

RecognitionConfig (com.google.cloud.speech.v1.RecognitionConfig)24 SpeechClient (com.google.cloud.speech.v1.SpeechClient)24 SpeechRecognitionAlternative (com.google.cloud.speech.v1.SpeechRecognitionAlternative)24 Path (java.nio.file.Path)23 RecognitionConfig (com.google.cloud.speech.v1p1beta1.RecognitionConfig)22 SpeechClient (com.google.cloud.speech.v1p1beta1.SpeechClient)22 SpeechRecognitionAlternative (com.google.cloud.speech.v1p1beta1.SpeechRecognitionAlternative)22 RecognitionAudio (com.google.cloud.speech.v1.RecognitionAudio)21 RecognitionAudio (com.google.cloud.speech.v1p1beta1.RecognitionAudio)20 SpeechRecognitionResult (com.google.cloud.speech.v1.SpeechRecognitionResult)19 SpeechRecognitionResult (com.google.cloud.speech.v1p1beta1.SpeechRecognitionResult)18 LongRunningRecognizeResponse (com.google.cloud.speech.v1p1beta1.LongRunningRecognizeResponse)17 StreamingRecognitionConfig (com.google.cloud.speech.v1.StreamingRecognitionConfig)16 LongRunningRecognizeResponse (com.google.cloud.speech.v1.LongRunningRecognizeResponse)14 RecognizeResponse (com.google.cloud.speech.v1.RecognizeResponse)14 RecognizeResponse (com.google.cloud.speech.v1p1beta1.RecognizeResponse)12 ByteString (com.google.protobuf.ByteString)12 StreamingRecognizeResponse (com.google.cloud.speech.v1.StreamingRecognizeResponse)10 StreamingRecognitionConfig (com.google.cloud.speech.v1p1beta1.StreamingRecognitionConfig)10 LongRunningRecognizeMetadata (com.google.cloud.speech.v1p1beta1.LongRunningRecognizeMetadata)8