use of org.openhab.core.voice.SpeechRecognitionErrorEvent in project openhab-addons by openhab.
the class WatsonSTTService method recognize.
@Override
public STTServiceHandle recognize(STTListener sttListener, AudioStream audioStream, Locale locale, Set<String> set) throws STTException {
if (config.apiKey.isBlank() || config.instanceUrl.isBlank()) {
throw new STTException("service is not correctly configured");
}
String contentType = getContentType(audioStream);
if (contentType == null) {
throw new STTException("Unsupported format, unable to resolve audio content type");
}
logger.debug("Content-Type: {}", contentType);
var speechToText = new SpeechToText(new IamAuthenticator.Builder().apikey(config.apiKey).build());
speechToText.setServiceUrl(config.instanceUrl);
if (config.optOutLogging) {
speechToText.setDefaultHeaders(Map.of("X-Watson-Learning-Opt-Out", "1"));
}
RecognizeWithWebsocketsOptions wsOptions = new RecognizeWithWebsocketsOptions.Builder().audio(audioStream).contentType(contentType).redaction(config.redaction).smartFormatting(config.smartFormatting).model(locale.toLanguageTag() + "_BroadbandModel").interimResults(true).backgroundAudioSuppression(config.backgroundAudioSuppression).speechDetectorSensitivity(config.speechDetectorSensitivity).inactivityTimeout(config.inactivityTimeout).build();
final AtomicReference<@Nullable WebSocket> socketRef = new AtomicReference<>();
final AtomicBoolean aborted = new AtomicBoolean(false);
executor.submit(() -> {
int retries = 2;
while (retries > 0) {
try {
socketRef.set(speechToText.recognizeUsingWebSocket(wsOptions, new TranscriptionListener(sttListener, config, aborted)));
break;
} catch (RuntimeException e) {
var cause = e.getCause();
if (cause instanceof SSLPeerUnverifiedException) {
logger.debug("Retrying on error: {}", cause.getMessage());
retries--;
} else {
var errorMessage = e.getMessage();
logger.warn("Aborting on error: {}", errorMessage);
sttListener.sttEventReceived(new SpeechRecognitionErrorEvent(errorMessage != null ? errorMessage : "Unknown error"));
break;
}
}
}
});
return new STTServiceHandle() {
@Override
public void abort() {
if (!aborted.getAndSet(true)) {
var socket = socketRef.get();
if (socket != null) {
socket.close(1000, null);
socket.cancel();
try {
Thread.sleep(100);
} catch (InterruptedException ignored) {
}
}
}
}
};
}
use of org.openhab.core.voice.SpeechRecognitionErrorEvent in project openhab-addons by openhab.
the class GoogleSTTService method backgroundRecognize.
private Future<?> backgroundRecognize(STTListener sttListener, AudioStream audioStream, AtomicBoolean aborted, Locale locale, Set<String> set) {
Credentials credentials = getCredentials();
return executor.submit(() -> {
logger.debug("Background recognize starting");
ClientStream<StreamingRecognizeRequest> clientStream = null;
try (SpeechClient client = SpeechClient.create(SpeechSettings.newBuilder().setCredentialsProvider(() -> credentials).build())) {
TranscriptionListener responseObserver = new TranscriptionListener(sttListener, config, aborted);
clientStream = client.streamingRecognizeCallable().splitCall(responseObserver);
streamAudio(clientStream, audioStream, responseObserver, aborted, locale);
clientStream.closeSend();
logger.debug("Background recognize done");
} catch (IOException e) {
if (clientStream != null && clientStream.isSendReady()) {
clientStream.closeSendWithError(e);
} else if (!config.errorMessage.isBlank()) {
logger.warn("Error running speech to text: {}", e.getMessage());
sttListener.sttEventReceived(new SpeechRecognitionErrorEvent(config.errorMessage));
}
}
});
}
use of org.openhab.core.voice.SpeechRecognitionErrorEvent in project openhab-addons by openhab.
the class VoskSTTService method backgroundRecognize.
private Future<?> backgroundRecognize(STTListener sttListener, InputStream audioStream, long frequency, AtomicBoolean aborted) {
StringBuilder transcriptBuilder = new StringBuilder();
long maxTranscriptionMillis = (config.maxTranscriptionSeconds * 1000L);
long maxSilenceMillis = (config.maxSilenceSeconds * 1000L);
long startTime = System.currentTimeMillis();
return executor.submit(() -> {
Recognizer recognizer = null;
Model model = null;
try {
model = getModel();
recognizer = new Recognizer(model, frequency);
long lastInputTime = System.currentTimeMillis();
int nbytes;
byte[] b = new byte[4096];
sttListener.sttEventReceived(new RecognitionStartEvent());
while (!aborted.get()) {
nbytes = audioStream.read(b);
if (aborted.get()) {
break;
}
if (isExpiredInterval(maxTranscriptionMillis, startTime)) {
logger.debug("Stops listening, max transcription time reached");
break;
}
if (!config.singleUtteranceMode && isExpiredInterval(maxSilenceMillis, lastInputTime)) {
logger.debug("Stops listening, max silence time reached");
break;
}
if (nbytes == 0) {
trySleep(100);
continue;
}
if (recognizer.acceptWaveForm(b, nbytes)) {
lastInputTime = System.currentTimeMillis();
var result = recognizer.getResult();
logger.debug("Result: {}", result);
ObjectMapper mapper = new ObjectMapper();
var json = mapper.readTree(result);
transcriptBuilder.append(json.get("text").asText()).append(" ");
if (config.singleUtteranceMode) {
break;
}
} else {
logger.debug("Partial: {}", recognizer.getPartialResult());
}
}
if (!aborted.get()) {
sttListener.sttEventReceived(new RecognitionStopEvent());
var transcript = transcriptBuilder.toString().trim();
logger.debug("Final: {}", transcript);
if (!transcript.isBlank()) {
sttListener.sttEventReceived(new SpeechRecognitionEvent(transcript, 1F));
} else {
if (!config.noResultsMessage.isBlank()) {
sttListener.sttEventReceived(new SpeechRecognitionErrorEvent(config.noResultsMessage));
} else {
sttListener.sttEventReceived(new SpeechRecognitionErrorEvent("No results"));
}
}
}
} catch (IOException e) {
logger.warn("Error running speech to text: {}", e.getMessage());
if (config.errorMessage.isBlank()) {
sttListener.sttEventReceived(new SpeechRecognitionErrorEvent("Error"));
} else {
sttListener.sttEventReceived(new SpeechRecognitionErrorEvent(config.errorMessage));
}
} finally {
if (recognizer != null) {
recognizer.close();
}
if (!config.preloadModel && model != null) {
model.close();
}
}
try {
audioStream.close();
} catch (IOException e) {
logger.warn("IOException on close: {}", e.getMessage());
}
});
}
use of org.openhab.core.voice.SpeechRecognitionErrorEvent in project openhab-core by openhab.
the class DialogProcessor method sttEventReceived.
@Override
public synchronized void sttEventReceived(STTEvent sttEvent) {
if (sttEvent instanceof SpeechRecognitionEvent) {
logger.debug("SpeechRecognitionEvent event received");
if (!isSTTServerAborting) {
SpeechRecognitionEvent sre = (SpeechRecognitionEvent) sttEvent;
String question = sre.getTranscript();
logger.debug("Text recognized: {}", question);
toggleProcessing(false);
String answer = "";
String error = null;
for (HumanLanguageInterpreter interpreter : hlis) {
try {
answer = interpreter.interpret(locale, question);
logger.debug("Interpretation result: {}", answer);
error = null;
break;
} catch (InterpretationException e) {
logger.debug("Interpretation exception: {}", e.getMessage());
error = Objects.requireNonNullElse(e.getMessage(), "Unexpected error");
}
}
say(error != null ? error : answer);
abortSTT();
}
} else if (sttEvent instanceof RecognitionStartEvent) {
logger.debug("RecognitionStartEvent event received");
toggleProcessing(true);
} else if (sttEvent instanceof RecognitionStopEvent) {
logger.debug("RecognitionStopEvent event received");
toggleProcessing(false);
} else if (sttEvent instanceof SpeechRecognitionErrorEvent) {
logger.debug("SpeechRecognitionErrorEvent event received");
if (!isSTTServerAborting) {
abortSTT();
toggleProcessing(false);
SpeechRecognitionErrorEvent sre = (SpeechRecognitionErrorEvent) sttEvent;
String text = i18nProvider.getText(bundle, "error.stt-error", null, locale);
say(text == null ? sre.getMessage() : text.replace("{0}", sre.getMessage()));
}
}
}
use of org.openhab.core.voice.SpeechRecognitionErrorEvent in project openhab-core by openhab.
the class STTServiceStub method recognize.
@Override
public STTServiceHandle recognize(STTListener sttListener, AudioStream audioStream, Locale locale, Set<String> grammars) throws STTException {
if (exceptionExpected) {
throw new STTException(EXCEPTION_MESSAGE);
} else {
if (errorExpected) {
sttListener.sttEventReceived(new SpeechRecognitionErrorEvent(ERROR_MESSAGE));
} else {
recognized = true;
sttListener.sttEventReceived(new SpeechRecognitionEvent(RECOGNIZED_TEXT, 0.75f));
}
return new STTServiceHandle() {
// this method will not be used in the tests
@Override
public void abort() {
}
};
}
}
Aggregations