Search in sources :

Example 31 with TextAnnotation

use of com.google.cloud.videointelligence.v1.TextAnnotation in project opencast by opencast.

the class SolrIndexManager method getMaxConfidence.

/**
 * Gets the maximum confidence for a given keyword in the text annotation.
 *
 * @param keyword
 * @param sortedAnnotations
 * @return The maximum confidence value.
 */
static double getMaxConfidence(String keyword, SortedSet<TextAnnotation> sortedAnnotations) {
    double max = 0.0;
    String needle = null;
    TextAnnotation textAnnotation = null;
    Iterator<TextAnnotation> textAnnotations = sortedAnnotations.iterator();
    while (textAnnotations.hasNext()) {
        textAnnotation = textAnnotations.next();
        Iterator<KeywordAnnotation> keywordAnnotations = textAnnotation.keywordAnnotations();
        while (keywordAnnotations.hasNext()) {
            KeywordAnnotation ann = keywordAnnotations.next();
            needle = ann.getKeyword().toLowerCase();
            if (keyword.equals(needle)) {
                if (max < textAnnotation.getConfidence()) {
                    max = textAnnotation.getConfidence();
                }
            }
        }
    }
    return max;
}
Also used : KeywordAnnotation(org.opencastproject.metadata.mpeg7.KeywordAnnotation) TextAnnotation(org.opencastproject.metadata.mpeg7.TextAnnotation) FreeTextAnnotation(org.opencastproject.metadata.mpeg7.FreeTextAnnotation)

Example 32 with TextAnnotation

use of com.google.cloud.videointelligence.v1.TextAnnotation in project opencast by opencast.

the class SolrIndexManager method importantKeywordsString.

/**
 * Generates a string with the most important kewords from the text annotation.
 *
 * @param sortedAnnotations
 * @return The keyword string.
 */
static StringBuffer importantKeywordsString(SortedSet<TextAnnotation> sortedAnnotations) {
    // important keyword:
    // - high relevance
    // - high confidence
    // - occur often
    // - more than MAX_CHAR chars
    // calculate keyword occurences (histogram) and importance
    ArrayList<String> list = new ArrayList<String>();
    Iterator<TextAnnotation> textAnnotations = sortedAnnotations.iterator();
    TextAnnotation textAnnotation = null;
    String keyword = null;
    HashMap<String, Integer> histogram = new HashMap<String, Integer>();
    HashMap<String, Double> importance = new HashMap<String, Double>();
    int occ = 0;
    double imp;
    while (textAnnotations.hasNext()) {
        textAnnotation = textAnnotations.next();
        Iterator<KeywordAnnotation> keywordAnnotations = textAnnotation.keywordAnnotations();
        while (keywordAnnotations.hasNext()) {
            KeywordAnnotation annotation = keywordAnnotations.next();
            keyword = annotation.getKeyword().toLowerCase();
            if (keyword.length() > MAX_CHAR) {
                occ = 0;
                if (histogram.keySet().contains(keyword)) {
                    occ = histogram.get(keyword);
                }
                histogram.put(keyword, occ + 1);
                // here the importance value is calculated
                // from relevance, confidence and frequency of occurence.
                imp = (RELEVANCE_BOOST * getMaxRelevance(keyword, sortedAnnotations) + getMaxConfidence(keyword, sortedAnnotations)) * (occ + 1);
                importance.put(keyword, imp);
            }
        }
    }
    // get the MAX_IMPORTANT_COUNT most important keywords
    StringBuffer buf = new StringBuffer();
    while (list.size() < MAX_IMPORTANT_COUNT && importance.size() > 0) {
        double max = 0.0;
        String maxKeyword = null;
        // get maximum from importance list
        for (Entry<String, Double> entry : importance.entrySet()) {
            keyword = entry.getKey();
            if (max < entry.getValue()) {
                max = entry.getValue();
                maxKeyword = keyword;
            }
        }
        // pop maximum
        importance.remove(maxKeyword);
        // append keyword to string
        if (buf.length() > 0) {
            buf.append(" ");
        }
        buf.append(maxKeyword);
    }
    return buf;
}
Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) MediaTimePoint(org.opencastproject.metadata.mpeg7.MediaTimePoint) KeywordAnnotation(org.opencastproject.metadata.mpeg7.KeywordAnnotation) TextAnnotation(org.opencastproject.metadata.mpeg7.TextAnnotation) FreeTextAnnotation(org.opencastproject.metadata.mpeg7.FreeTextAnnotation)

Example 33 with TextAnnotation

use of com.google.cloud.videointelligence.v1.TextAnnotation in project opencast by opencast.

the class Mpeg7CaptionConverter method exportCaption.

@Override
public void exportCaption(OutputStream outputStream, List<Caption> captions, String language) throws IOException {
    Mpeg7Catalog mpeg7 = Mpeg7CatalogImpl.newInstance();
    MediaTime mediaTime = new MediaTimeImpl(0, 0);
    Audio audioContent = mpeg7.addAudioContent("captions", mediaTime, null);
    @SuppressWarnings("unchecked") TemporalDecomposition<AudioSegment> captionDecomposition = (TemporalDecomposition<AudioSegment>) audioContent.getTemporalDecomposition();
    int segmentCount = 0;
    for (Caption caption : captions) {
        // Get all the words/parts for the transcript
        String[] words = caption.getCaption();
        if (words.length == 0)
            continue;
        // Create a new segment
        AudioSegment segment = captionDecomposition.createSegment("segment-" + segmentCount++);
        Time captionST = caption.getStartTime();
        Time captionET = caption.getStopTime();
        // Calculate start time
        Calendar startTime = Calendar.getInstance(TimeZone.getTimeZone("UTC"));
        startTime.setTimeInMillis(0);
        startTime.add(Calendar.HOUR_OF_DAY, captionST.getHours());
        startTime.add(Calendar.MINUTE, captionST.getMinutes());
        startTime.add(Calendar.SECOND, captionST.getSeconds());
        startTime.add(Calendar.MILLISECOND, captionST.getMilliseconds());
        // Calculate end time
        Calendar endTime = Calendar.getInstance(TimeZone.getTimeZone("UTC"));
        endTime.setTimeInMillis(0);
        endTime.add(Calendar.HOUR_OF_DAY, captionET.getHours());
        endTime.add(Calendar.MINUTE, captionET.getMinutes());
        endTime.add(Calendar.SECOND, captionET.getSeconds());
        endTime.add(Calendar.MILLISECOND, captionET.getMilliseconds());
        long startTimeInMillis = startTime.getTimeInMillis();
        long endTimeInMillis = endTime.getTimeInMillis();
        long duration = endTimeInMillis - startTimeInMillis;
        segment.setMediaTime(new MediaTimeImpl(startTimeInMillis, duration));
        TextAnnotation textAnnotation = segment.createTextAnnotation(0, 0, language);
        // Collect all the words in the segment
        StringBuffer captionLine = new StringBuffer();
        // Add each words/parts as segment to the catalog
        for (String word : words) {
            if (captionLine.length() > 0)
                captionLine.append(' ');
            captionLine.append(word);
        }
        // Append the text to the annotation
        textAnnotation.addFreeTextAnnotation(new FreeTextAnnotationImpl(captionLine.toString()));
    }
    Transformer tf = null;
    try {
        tf = XmlSafeParser.newTransformerFactory().newTransformer();
        DOMSource xmlSource = new DOMSource(mpeg7.toXml());
        tf.transform(xmlSource, new StreamResult(outputStream));
    } catch (TransformerConfigurationException e) {
        logger.warn("Error serializing mpeg7 captions catalog: {}", e.getMessage());
        throw new IOException(e);
    } catch (TransformerFactoryConfigurationError e) {
        logger.warn("Error serializing mpeg7 captions catalog: {}", e.getMessage());
        throw new IOException(e);
    } catch (TransformerException e) {
        logger.warn("Error serializing mpeg7 captions catalog: {}", e.getMessage());
        throw new IOException(e);
    } catch (ParserConfigurationException e) {
        logger.warn("Error serializing mpeg7 captions catalog: {}", e.getMessage());
        throw new IOException(e);
    }
}
Also used : DOMSource(javax.xml.transform.dom.DOMSource) Transformer(javax.xml.transform.Transformer) TransformerConfigurationException(javax.xml.transform.TransformerConfigurationException) MediaTime(org.opencastproject.metadata.mpeg7.MediaTime) Time(org.opencastproject.caption.api.Time) MediaTimeImpl(org.opencastproject.metadata.mpeg7.MediaTimeImpl) ParserConfigurationException(javax.xml.parsers.ParserConfigurationException) TextAnnotation(org.opencastproject.metadata.mpeg7.TextAnnotation) FreeTextAnnotation(org.opencastproject.metadata.mpeg7.FreeTextAnnotation) TransformerException(javax.xml.transform.TransformerException) TransformerFactoryConfigurationError(javax.xml.transform.TransformerFactoryConfigurationError) StreamResult(javax.xml.transform.stream.StreamResult) Calendar(java.util.Calendar) IOException(java.io.IOException) MediaTimePoint(org.opencastproject.metadata.mpeg7.MediaTimePoint) Caption(org.opencastproject.caption.api.Caption) Mpeg7Catalog(org.opencastproject.metadata.mpeg7.Mpeg7Catalog) MediaTime(org.opencastproject.metadata.mpeg7.MediaTime) FreeTextAnnotationImpl(org.opencastproject.metadata.mpeg7.FreeTextAnnotationImpl) TemporalDecomposition(org.opencastproject.metadata.mpeg7.TemporalDecomposition) Audio(org.opencastproject.metadata.mpeg7.Audio) AudioSegment(org.opencastproject.metadata.mpeg7.AudioSegment)

Example 34 with TextAnnotation

use of com.google.cloud.videointelligence.v1.TextAnnotation in project spring-cloud-gcp by GoogleCloudPlatform.

the class DocumentOcrResultSet method getAllPages.

/**
 * Returns an {@link Iterator} over all the OCR pages of the document.
 *
 * @return iterator of {@link TextAnnotation} describing OCR content of each page in the document.
 */
public Iterator<TextAnnotation> getAllPages() {
    return new Iterator<TextAnnotation>() {

        private final Iterator<OcrPageRange> pageRangeIterator = ocrPageRanges.values().iterator();

        private int offset = 0;

        private List<TextAnnotation> currentPageRange = Collections.emptyList();

        @Override
        public boolean hasNext() {
            return pageRangeIterator.hasNext() || offset < currentPageRange.size();
        }

        @Override
        public TextAnnotation next() {
            if (!hasNext()) {
                throw new NoSuchElementException("No more pages left in DocumentOcrResultSet.");
            }
            if (offset >= currentPageRange.size()) {
                OcrPageRange pageRange = pageRangeIterator.next();
                offset = 0;
                try {
                    currentPageRange = pageRange.getPages();
                } catch (InvalidProtocolBufferException e) {
                    throw new RuntimeException("Failed to parse OCR output from JSON output file " + pageRange.getBlob().getName(), e);
                }
            }
            TextAnnotation result = currentPageRange.get(offset);
            offset++;
            return result;
        }
    };
}
Also used : Iterator(java.util.Iterator) InvalidProtocolBufferException(com.google.protobuf.InvalidProtocolBufferException) List(java.util.List) TextAnnotation(com.google.cloud.vision.v1.TextAnnotation) NoSuchElementException(java.util.NoSuchElementException)

Example 35 with TextAnnotation

use of com.google.cloud.videointelligence.v1.TextAnnotation in project java-video-intelligence by googleapis.

the class TextDetection method detectTextGcs.

// [END video_detect_text]
// [START video_detect_text_gcs]
/**
 * Detect Text in a video.
 *
 * @param gcsUri the path to the video file to analyze.
 */
public static VideoAnnotationResults detectTextGcs(String gcsUri) throws Exception {
    try (VideoIntelligenceServiceClient client = VideoIntelligenceServiceClient.create()) {
        // Create the request
        AnnotateVideoRequest request = AnnotateVideoRequest.newBuilder().setInputUri(gcsUri).addFeatures(Feature.TEXT_DETECTION).build();
        // asynchronously perform object tracking on videos
        OperationFuture<AnnotateVideoResponse, AnnotateVideoProgress> future = client.annotateVideoAsync(request);
        System.out.println("Waiting for operation to complete...");
        // The first result is retrieved because a single video was processed.
        AnnotateVideoResponse response = future.get(300, TimeUnit.SECONDS);
        VideoAnnotationResults results = response.getAnnotationResults(0);
        // Get only the first annotation for demo purposes.
        TextAnnotation annotation = results.getTextAnnotations(0);
        System.out.println("Text: " + annotation.getText());
        // Get the first text segment.
        TextSegment textSegment = annotation.getSegments(0);
        System.out.println("Confidence: " + textSegment.getConfidence());
        // For the text segment display it's time offset
        VideoSegment videoSegment = textSegment.getSegment();
        Duration startTimeOffset = videoSegment.getStartTimeOffset();
        Duration endTimeOffset = videoSegment.getEndTimeOffset();
        // Display the offset times in seconds, 1e9 is part of the formula to convert nanos to seconds
        System.out.println(String.format("Start time: %.2f", startTimeOffset.getSeconds() + startTimeOffset.getNanos() / 1e9));
        System.out.println(String.format("End time: %.2f", endTimeOffset.getSeconds() + endTimeOffset.getNanos() / 1e9));
        // Show the first result for the first frame in the segment.
        TextFrame textFrame = textSegment.getFrames(0);
        Duration timeOffset = textFrame.getTimeOffset();
        System.out.println(String.format("Time offset for the first frame: %.2f", timeOffset.getSeconds() + timeOffset.getNanos() / 1e9));
        // Display the rotated bounding box for where the text is on the frame.
        System.out.println("Rotated Bounding Box Vertices:");
        List<NormalizedVertex> vertices = textFrame.getRotatedBoundingBox().getVerticesList();
        for (NormalizedVertex normalizedVertex : vertices) {
            System.out.println(String.format("\tVertex.x: %.2f, Vertex.y: %.2f", normalizedVertex.getX(), normalizedVertex.getY()));
        }
        return results;
    }
}
Also used : AnnotateVideoRequest(com.google.cloud.videointelligence.v1.AnnotateVideoRequest) Duration(com.google.protobuf.Duration) NormalizedVertex(com.google.cloud.videointelligence.v1.NormalizedVertex) VideoIntelligenceServiceClient(com.google.cloud.videointelligence.v1.VideoIntelligenceServiceClient) AnnotateVideoProgress(com.google.cloud.videointelligence.v1.AnnotateVideoProgress) VideoSegment(com.google.cloud.videointelligence.v1.VideoSegment) VideoAnnotationResults(com.google.cloud.videointelligence.v1.VideoAnnotationResults) TextFrame(com.google.cloud.videointelligence.v1.TextFrame) TextSegment(com.google.cloud.videointelligence.v1.TextSegment) TextAnnotation(com.google.cloud.videointelligence.v1.TextAnnotation) AnnotateVideoResponse(com.google.cloud.videointelligence.v1.AnnotateVideoResponse)

Aggregations

ArrayList (java.util.ArrayList)15 TextAnnotation (com.google.cloud.vision.v1.TextAnnotation)12 Test (org.junit.Test)11 ByteString (com.google.protobuf.ByteString)9 TextAnnotation (com.google.cloud.videointelligence.v1.TextAnnotation)8 VideoAnnotationResults (com.google.cloud.videointelligence.v1.VideoAnnotationResults)8 List (java.util.List)7 TextAnnotation (org.kie.workbench.common.dmn.api.definition.v1_1.TextAnnotation)7 FreeTextAnnotation (org.opencastproject.metadata.mpeg7.FreeTextAnnotation)7 TextAnnotation (org.opencastproject.metadata.mpeg7.TextAnnotation)7 AnnotateImageResponse (com.google.cloud.vision.v1.AnnotateImageResponse)6 Duration (com.google.protobuf.Duration)6 AnnotateVideoProgress (com.google.cloud.videointelligence.v1.AnnotateVideoProgress)4 AnnotateVideoRequest (com.google.cloud.videointelligence.v1.AnnotateVideoRequest)4 AnnotateVideoResponse (com.google.cloud.videointelligence.v1.AnnotateVideoResponse)4 NormalizedVertex (com.google.cloud.videointelligence.v1.NormalizedVertex)4 TextFrame (com.google.cloud.videointelligence.v1.TextFrame)4 TextSegment (com.google.cloud.videointelligence.v1.TextSegment)4 VideoIntelligenceServiceClient (com.google.cloud.videointelligence.v1.VideoIntelligenceServiceClient)4 VideoSegment (com.google.cloud.videointelligence.v1.VideoSegment)4