use of com.google.cloud.vision.v1p3beta1.TextAnnotation in project opencast by opencast.
the class SolrIndexManager method getMaxConfidence.
/**
* Gets the maximum confidence for a given keyword in the text annotation.
*
* @param keyword
* @param sortedAnnotations
* @return The maximum confidence value.
*/
static double getMaxConfidence(String keyword, SortedSet<TextAnnotation> sortedAnnotations) {
double max = 0.0;
String needle = null;
TextAnnotation textAnnotation = null;
Iterator<TextAnnotation> textAnnotations = sortedAnnotations.iterator();
while (textAnnotations.hasNext()) {
textAnnotation = textAnnotations.next();
Iterator<KeywordAnnotation> keywordAnnotations = textAnnotation.keywordAnnotations();
while (keywordAnnotations.hasNext()) {
KeywordAnnotation ann = keywordAnnotations.next();
needle = ann.getKeyword().toLowerCase();
if (keyword.equals(needle)) {
if (max < textAnnotation.getConfidence()) {
max = textAnnotation.getConfidence();
}
}
}
}
return max;
}
use of com.google.cloud.vision.v1p3beta1.TextAnnotation in project opencast by opencast.
the class SolrIndexManager method importantKeywordsString.
/**
* Generates a string with the most important kewords from the text annotation.
*
* @param sortedAnnotations
* @return The keyword string.
*/
static StringBuffer importantKeywordsString(SortedSet<TextAnnotation> sortedAnnotations) {
// important keyword:
// - high relevance
// - high confidence
// - occur often
// - more than MAX_CHAR chars
// calculate keyword occurences (histogram) and importance
ArrayList<String> list = new ArrayList<String>();
Iterator<TextAnnotation> textAnnotations = sortedAnnotations.iterator();
TextAnnotation textAnnotation = null;
String keyword = null;
HashMap<String, Integer> histogram = new HashMap<String, Integer>();
HashMap<String, Double> importance = new HashMap<String, Double>();
int occ = 0;
double imp;
while (textAnnotations.hasNext()) {
textAnnotation = textAnnotations.next();
Iterator<KeywordAnnotation> keywordAnnotations = textAnnotation.keywordAnnotations();
while (keywordAnnotations.hasNext()) {
KeywordAnnotation annotation = keywordAnnotations.next();
keyword = annotation.getKeyword().toLowerCase();
if (keyword.length() > MAX_CHAR) {
occ = 0;
if (histogram.keySet().contains(keyword)) {
occ = histogram.get(keyword);
}
histogram.put(keyword, occ + 1);
// here the importance value is calculated
// from relevance, confidence and frequency of occurence.
imp = (RELEVANCE_BOOST * getMaxRelevance(keyword, sortedAnnotations) + getMaxConfidence(keyword, sortedAnnotations)) * (occ + 1);
importance.put(keyword, imp);
}
}
}
// get the MAX_IMPORTANT_COUNT most important keywords
StringBuffer buf = new StringBuffer();
while (list.size() < MAX_IMPORTANT_COUNT && importance.size() > 0) {
double max = 0.0;
String maxKeyword = null;
// get maximum from importance list
for (Entry<String, Double> entry : importance.entrySet()) {
keyword = entry.getKey();
if (max < entry.getValue()) {
max = entry.getValue();
maxKeyword = keyword;
}
}
// pop maximum
importance.remove(maxKeyword);
// append keyword to string
if (buf.length() > 0) {
buf.append(" ");
}
buf.append(maxKeyword);
}
return buf;
}
use of com.google.cloud.vision.v1p3beta1.TextAnnotation in project opencast by opencast.
the class Mpeg7CaptionConverter method exportCaption.
@Override
public void exportCaption(OutputStream outputStream, List<Caption> captions, String language) throws IOException {
Mpeg7Catalog mpeg7 = Mpeg7CatalogImpl.newInstance();
MediaTime mediaTime = new MediaTimeImpl(0, 0);
Audio audioContent = mpeg7.addAudioContent("captions", mediaTime, null);
@SuppressWarnings("unchecked") TemporalDecomposition<AudioSegment> captionDecomposition = (TemporalDecomposition<AudioSegment>) audioContent.getTemporalDecomposition();
int segmentCount = 0;
for (Caption caption : captions) {
// Get all the words/parts for the transcript
String[] words = caption.getCaption();
if (words.length == 0)
continue;
// Create a new segment
AudioSegment segment = captionDecomposition.createSegment("segment-" + segmentCount++);
Time captionST = caption.getStartTime();
Time captionET = caption.getStopTime();
// Calculate start time
Calendar startTime = Calendar.getInstance(TimeZone.getTimeZone("UTC"));
startTime.setTimeInMillis(0);
startTime.add(Calendar.HOUR_OF_DAY, captionST.getHours());
startTime.add(Calendar.MINUTE, captionST.getMinutes());
startTime.add(Calendar.SECOND, captionST.getSeconds());
startTime.add(Calendar.MILLISECOND, captionST.getMilliseconds());
// Calculate end time
Calendar endTime = Calendar.getInstance(TimeZone.getTimeZone("UTC"));
endTime.setTimeInMillis(0);
endTime.add(Calendar.HOUR_OF_DAY, captionET.getHours());
endTime.add(Calendar.MINUTE, captionET.getMinutes());
endTime.add(Calendar.SECOND, captionET.getSeconds());
endTime.add(Calendar.MILLISECOND, captionET.getMilliseconds());
long startTimeInMillis = startTime.getTimeInMillis();
long endTimeInMillis = endTime.getTimeInMillis();
long duration = endTimeInMillis - startTimeInMillis;
segment.setMediaTime(new MediaTimeImpl(startTimeInMillis, duration));
TextAnnotation textAnnotation = segment.createTextAnnotation(0, 0, language);
// Collect all the words in the segment
StringBuffer captionLine = new StringBuffer();
// Add each words/parts as segment to the catalog
for (String word : words) {
if (captionLine.length() > 0)
captionLine.append(' ');
captionLine.append(word);
}
// Append the text to the annotation
textAnnotation.addFreeTextAnnotation(new FreeTextAnnotationImpl(captionLine.toString()));
}
Transformer tf = null;
try {
tf = XmlSafeParser.newTransformerFactory().newTransformer();
DOMSource xmlSource = new DOMSource(mpeg7.toXml());
tf.transform(xmlSource, new StreamResult(outputStream));
} catch (TransformerConfigurationException e) {
logger.warn("Error serializing mpeg7 captions catalog: {}", e.getMessage());
throw new IOException(e);
} catch (TransformerFactoryConfigurationError e) {
logger.warn("Error serializing mpeg7 captions catalog: {}", e.getMessage());
throw new IOException(e);
} catch (TransformerException e) {
logger.warn("Error serializing mpeg7 captions catalog: {}", e.getMessage());
throw new IOException(e);
} catch (ParserConfigurationException e) {
logger.warn("Error serializing mpeg7 captions catalog: {}", e.getMessage());
throw new IOException(e);
}
}
use of com.google.cloud.vision.v1p3beta1.TextAnnotation in project spring-cloud-gcp by GoogleCloudPlatform.
the class DocumentOcrResultSet method getAllPages.
/**
* Returns an {@link Iterator} over all the OCR pages of the document.
*
* @return iterator of {@link TextAnnotation} describing OCR content of each page in the document.
*/
public Iterator<TextAnnotation> getAllPages() {
return new Iterator<TextAnnotation>() {
private final Iterator<OcrPageRange> pageRangeIterator = ocrPageRanges.values().iterator();
private int offset = 0;
private List<TextAnnotation> currentPageRange = Collections.emptyList();
@Override
public boolean hasNext() {
return pageRangeIterator.hasNext() || offset < currentPageRange.size();
}
@Override
public TextAnnotation next() {
if (!hasNext()) {
throw new NoSuchElementException("No more pages left in DocumentOcrResultSet.");
}
if (offset >= currentPageRange.size()) {
OcrPageRange pageRange = pageRangeIterator.next();
offset = 0;
try {
currentPageRange = pageRange.getPages();
} catch (InvalidProtocolBufferException e) {
throw new RuntimeException("Failed to parse OCR output from JSON output file " + pageRange.getBlob().getName(), e);
}
}
TextAnnotation result = currentPageRange.get(offset);
offset++;
return result;
}
};
}
use of com.google.cloud.vision.v1p3beta1.TextAnnotation in project java-video-intelligence by googleapis.
the class TextDetection method detectTextGcs.
// [END video_detect_text]
// [START video_detect_text_gcs]
/**
* Detect Text in a video.
*
* @param gcsUri the path to the video file to analyze.
*/
public static VideoAnnotationResults detectTextGcs(String gcsUri) throws Exception {
try (VideoIntelligenceServiceClient client = VideoIntelligenceServiceClient.create()) {
// Create the request
AnnotateVideoRequest request = AnnotateVideoRequest.newBuilder().setInputUri(gcsUri).addFeatures(Feature.TEXT_DETECTION).build();
// asynchronously perform object tracking on videos
OperationFuture<AnnotateVideoResponse, AnnotateVideoProgress> future = client.annotateVideoAsync(request);
System.out.println("Waiting for operation to complete...");
// The first result is retrieved because a single video was processed.
AnnotateVideoResponse response = future.get(300, TimeUnit.SECONDS);
VideoAnnotationResults results = response.getAnnotationResults(0);
// Get only the first annotation for demo purposes.
TextAnnotation annotation = results.getTextAnnotations(0);
System.out.println("Text: " + annotation.getText());
// Get the first text segment.
TextSegment textSegment = annotation.getSegments(0);
System.out.println("Confidence: " + textSegment.getConfidence());
// For the text segment display it's time offset
VideoSegment videoSegment = textSegment.getSegment();
Duration startTimeOffset = videoSegment.getStartTimeOffset();
Duration endTimeOffset = videoSegment.getEndTimeOffset();
// Display the offset times in seconds, 1e9 is part of the formula to convert nanos to seconds
System.out.println(String.format("Start time: %.2f", startTimeOffset.getSeconds() + startTimeOffset.getNanos() / 1e9));
System.out.println(String.format("End time: %.2f", endTimeOffset.getSeconds() + endTimeOffset.getNanos() / 1e9));
// Show the first result for the first frame in the segment.
TextFrame textFrame = textSegment.getFrames(0);
Duration timeOffset = textFrame.getTimeOffset();
System.out.println(String.format("Time offset for the first frame: %.2f", timeOffset.getSeconds() + timeOffset.getNanos() / 1e9));
// Display the rotated bounding box for where the text is on the frame.
System.out.println("Rotated Bounding Box Vertices:");
List<NormalizedVertex> vertices = textFrame.getRotatedBoundingBox().getVerticesList();
for (NormalizedVertex normalizedVertex : vertices) {
System.out.println(String.format("\tVertex.x: %.2f, Vertex.y: %.2f", normalizedVertex.getX(), normalizedVertex.getY()));
}
return results;
}
}
Aggregations