Search in sources :

Example 16 with TextAnnotation

use of com.google.cloud.vision.v1p3beta1.TextAnnotation in project opencast by opencast.

the class SolrIndexManager method addMpeg7Metadata.

/**
 * Add the mpeg 7 catalog data to the solr document.
 *
 * @param doc
 *          the input document to the solr index
 * @param mpeg7
 *          the mpeg7 catalog
 */
@SuppressWarnings("unchecked")
static void addMpeg7Metadata(SolrInputDocument doc, MediaPackage mediaPackage, Mpeg7Catalog mpeg7) {
    // Check for multimedia content
    if (!mpeg7.multimediaContent().hasNext()) {
        logger.warn("Mpeg-7 doesn't contain  multimedia content");
        return;
    }
    // Get the content duration by looking at the first content track. This
    // of course assumes that all tracks are equally long.
    MultimediaContent<? extends MultimediaContentType> mc = mpeg7.multimediaContent().next();
    MultimediaContentType mct = mc.elements().next();
    MediaTime mediaTime = mct.getMediaTime();
    Schema.setDcExtent(doc, mediaTime.getMediaDuration().getDurationInMilliseconds());
    // Check if the keywords have been filled by (manually) added dublin
    // core data. If not, look for the most relevant fields in mpeg-7.
    SortedSet<TextAnnotation> sortedAnnotations = null;
    if (!"".equals(Schema.getOcKeywords(doc))) {
        sortedAnnotations = new TreeSet<TextAnnotation>(new Comparator<TextAnnotation>() {

            @Override
            public int compare(TextAnnotation a1, TextAnnotation a2) {
                double v1 = RELEVANCE_BOOST * a1.getRelevance() + a1.getConfidence();
                double v2 = RELEVANCE_BOOST * a2.getRelevance() + a2.getConfidence();
                if (v1 > v2) {
                    return -1;
                } else if (v1 < v2) {
                    return 1;
                }
                return 0;
            }
        });
    }
    // Iterate over the tracks and extract keywords and hints
    Iterator<MultimediaContent<? extends MultimediaContentType>> mmIter = mpeg7.multimediaContent();
    int segmentCount = 0;
    while (mmIter.hasNext()) {
        MultimediaContent<?> multimediaContent = mmIter.next();
        // We need to process visual segments first, due to the way they are handled in the ui.
        for (Iterator<?> iterator = multimediaContent.elements(); iterator.hasNext(); ) {
            MultimediaContentType type = (MultimediaContentType) iterator.next();
            if (!(type instanceof Video) && !(type instanceof AudioVisual)) {
                continue;
            }
            // for every segment in the current multimedia content track
            Video video = (Video) type;
            Iterator<VideoSegment> vsegments = (Iterator<VideoSegment>) video.getTemporalDecomposition().segments();
            while (vsegments.hasNext()) {
                VideoSegment segment = vsegments.next();
                StringBuffer segmentText = new StringBuffer();
                StringBuffer hintField = new StringBuffer();
                // Collect the video text elements to a segment text
                SpatioTemporalDecomposition spt = segment.getSpatioTemporalDecomposition();
                if (spt != null) {
                    for (VideoText videoText : spt.getVideoText()) {
                        if (segmentText.length() > 0) {
                            segmentText.append(" ");
                        }
                        segmentText.append(videoText.getText().getText());
                    // TODO: Add hint on bounding box
                    }
                }
                // Add keyword annotations
                Iterator<TextAnnotation> textAnnotations = segment.textAnnotations();
                while (textAnnotations.hasNext()) {
                    TextAnnotation textAnnotation = textAnnotations.next();
                    Iterator<?> kwIter = textAnnotation.keywordAnnotations();
                    while (kwIter.hasNext()) {
                        KeywordAnnotation keywordAnnotation = (KeywordAnnotation) kwIter.next();
                        if (segmentText.length() > 0) {
                            segmentText.append(" ");
                        }
                        segmentText.append(keywordAnnotation.getKeyword());
                    }
                }
                // Add free text annotations
                Iterator<TextAnnotation> freeIter = segment.textAnnotations();
                if (freeIter.hasNext()) {
                    Iterator<FreeTextAnnotation> freeTextIter = freeIter.next().freeTextAnnotations();
                    while (freeTextIter.hasNext()) {
                        FreeTextAnnotation freeTextAnnotation = freeTextIter.next();
                        if (segmentText.length() > 0) {
                            segmentText.append(" ");
                        }
                        segmentText.append(freeTextAnnotation.getText());
                    }
                }
                // add segment text to solr document
                Schema.setSegmentText(doc, new DField<String>(segmentText.toString(), Integer.toString(segmentCount)));
                // get the segments time properties
                MediaTimePoint timepoint = segment.getMediaTime().getMediaTimePoint();
                MediaDuration duration = segment.getMediaTime().getMediaDuration();
                // TODO: define a class with hint field constants
                hintField.append("time=" + timepoint.getTimeInMilliseconds() + "\n");
                hintField.append("duration=" + duration.getDurationInMilliseconds() + "\n");
                // Look for preview images. Their characteristics are that they are
                // attached as attachments with a flavor of preview/<something>.
                String time = timepoint.toString();
                for (Attachment slide : mediaPackage.getAttachments(MediaPackageElements.PRESENTATION_SEGMENT_PREVIEW)) {
                    MediaPackageReference ref = slide.getReference();
                    if (ref != null && time.equals(ref.getProperty("time"))) {
                        hintField.append("preview");
                        hintField.append(".");
                        hintField.append(ref.getIdentifier());
                        hintField.append("=");
                        hintField.append(slide.getURI().toString());
                        hintField.append("\n");
                    }
                }
                logger.trace("Adding segment: {}", timepoint);
                Schema.setSegmentHint(doc, new DField<>(hintField.toString(), Integer.toString(segmentCount)));
                // increase segment counter
                segmentCount++;
            }
        }
    }
    // Put the most important keywords into a special solr field
    if (sortedAnnotations != null) {
        Schema.setOcKeywords(doc, importantKeywordsString(sortedAnnotations).toString());
    }
}
Also used : Attachment(org.opencastproject.mediapackage.Attachment) VideoText(org.opencastproject.metadata.mpeg7.VideoText) FreeTextAnnotation(org.opencastproject.metadata.mpeg7.FreeTextAnnotation) Comparator(java.util.Comparator) Iterator(java.util.Iterator) KeywordAnnotation(org.opencastproject.metadata.mpeg7.KeywordAnnotation) TextAnnotation(org.opencastproject.metadata.mpeg7.TextAnnotation) FreeTextAnnotation(org.opencastproject.metadata.mpeg7.FreeTextAnnotation) AudioVisual(org.opencastproject.metadata.mpeg7.AudioVisual) MultimediaContentType(org.opencastproject.metadata.mpeg7.MultimediaContentType) SpatioTemporalDecomposition(org.opencastproject.metadata.mpeg7.SpatioTemporalDecomposition) MediaTimePoint(org.opencastproject.metadata.mpeg7.MediaTimePoint) MediaTimePoint(org.opencastproject.metadata.mpeg7.MediaTimePoint) MediaPackageReference(org.opencastproject.mediapackage.MediaPackageReference) VideoSegment(org.opencastproject.metadata.mpeg7.VideoSegment) Video(org.opencastproject.metadata.mpeg7.Video) MediaTime(org.opencastproject.metadata.mpeg7.MediaTime) MediaDuration(org.opencastproject.metadata.mpeg7.MediaDuration) MultimediaContent(org.opencastproject.metadata.mpeg7.MultimediaContent)

Example 17 with TextAnnotation

use of com.google.cloud.vision.v1p3beta1.TextAnnotation in project opencast by opencast.

the class SolrIndexManager method getMaxRelevance.

/**
 * Gets the maximum relevance for a given keyword in the text annotation.
 *
 * @param keyword
 * @param sortedAnnotations
 * @return The maximum relevance value.
 */
static double getMaxRelevance(String keyword, SortedSet<TextAnnotation> sortedAnnotations) {
    double max = 0.0;
    String needle = null;
    TextAnnotation textAnnotation = null;
    Iterator<TextAnnotation> textAnnotations = sortedAnnotations.iterator();
    while (textAnnotations.hasNext()) {
        textAnnotation = textAnnotations.next();
        Iterator<KeywordAnnotation> keywordAnnotations = textAnnotation.keywordAnnotations();
        while (keywordAnnotations.hasNext()) {
            KeywordAnnotation ann = keywordAnnotations.next();
            needle = ann.getKeyword().toLowerCase();
            if (keyword.equals(needle)) {
                if (max < textAnnotation.getRelevance()) {
                    max = textAnnotation.getRelevance();
                }
            }
        }
    }
    return max;
}
Also used : KeywordAnnotation(org.opencastproject.metadata.mpeg7.KeywordAnnotation) TextAnnotation(org.opencastproject.metadata.mpeg7.TextAnnotation) FreeTextAnnotation(org.opencastproject.metadata.mpeg7.FreeTextAnnotation)

Example 18 with TextAnnotation

use of com.google.cloud.vision.v1p3beta1.TextAnnotation in project opencast by opencast.

the class Mpeg7CaptionConverter method getLanguageList.

/**
 * @see org.opencastproject.caption.api.CaptionConverter#getLanguageList(java.io.InputStream)
 */
@SuppressWarnings("unchecked")
@Override
public String[] getLanguageList(InputStream inputStream) throws CaptionConverterException {
    Set<String> languages = new HashSet<String>();
    Mpeg7Catalog catalog = new Mpeg7CatalogImpl(inputStream);
    Iterator<Audio> audioContentIterator = catalog.audioContent();
    if (audioContentIterator == null)
        return languages.toArray(new String[languages.size()]);
    content: while (audioContentIterator.hasNext()) {
        Audio audioContent = audioContentIterator.next();
        TemporalDecomposition<AudioSegment> audioSegments = (TemporalDecomposition<AudioSegment>) audioContent.getTemporalDecomposition();
        Iterator<AudioSegment> audioSegmentIterator = audioSegments.segments();
        if (audioSegmentIterator == null)
            continue content;
        while (audioSegmentIterator.hasNext()) {
            AudioSegment segment = audioSegmentIterator.next();
            Iterator<TextAnnotation> annotationIterator = segment.textAnnotations();
            if (annotationIterator == null)
                continue content;
            while (annotationIterator.hasNext()) {
                TextAnnotation annotation = annotationIterator.next();
                String language = annotation.getLanguage();
                if (language != null)
                    languages.add(language);
            }
        }
    }
    return languages.toArray(new String[languages.size()]);
}
Also used : Mpeg7Catalog(org.opencastproject.metadata.mpeg7.Mpeg7Catalog) Iterator(java.util.Iterator) Mpeg7CatalogImpl(org.opencastproject.metadata.mpeg7.Mpeg7CatalogImpl) TemporalDecomposition(org.opencastproject.metadata.mpeg7.TemporalDecomposition) Audio(org.opencastproject.metadata.mpeg7.Audio) TextAnnotation(org.opencastproject.metadata.mpeg7.TextAnnotation) FreeTextAnnotation(org.opencastproject.metadata.mpeg7.FreeTextAnnotation) HashSet(java.util.HashSet) AudioSegment(org.opencastproject.metadata.mpeg7.AudioSegment)

Example 19 with TextAnnotation

use of com.google.cloud.vision.v1p3beta1.TextAnnotation in project opencast by opencast.

the class Mpeg7CaptionConverter method importCaption.

/**
 * @see org.opencastproject.caption.api.CaptionConverter#importCaption(java.io.InputStream, java.lang.String)
 */
@SuppressWarnings("unchecked")
@Override
public List<Caption> importCaption(InputStream inputStream, String language) throws CaptionConverterException {
    List<Caption> captions = new ArrayList<Caption>();
    Mpeg7Catalog catalog = new Mpeg7CatalogImpl(inputStream);
    Iterator<Audio> audioContentIterator = catalog.audioContent();
    if (audioContentIterator == null)
        return captions;
    content: while (audioContentIterator.hasNext()) {
        Audio audioContent = audioContentIterator.next();
        TemporalDecomposition<AudioSegment> audioSegments = (TemporalDecomposition<AudioSegment>) audioContent.getTemporalDecomposition();
        Iterator<AudioSegment> audioSegmentIterator = audioSegments.segments();
        if (audioSegmentIterator == null)
            continue content;
        while (audioSegmentIterator.hasNext()) {
            AudioSegment segment = audioSegmentIterator.next();
            Iterator<TextAnnotation> annotationIterator = segment.textAnnotations();
            if (annotationIterator == null)
                continue content;
            while (annotationIterator.hasNext()) {
                TextAnnotation annotation = annotationIterator.next();
                if (!annotation.getLanguage().equals(language)) {
                    logger.debug("Skipping audio content '{}' because of language mismatch", audioContent.getId());
                    continue content;
                }
                List<String> captionLines = new ArrayList<String>();
                Iterator<FreeTextAnnotation> freeTextAnnotationIterator = annotation.freeTextAnnotations();
                if (freeTextAnnotationIterator == null)
                    continue;
                while (freeTextAnnotationIterator.hasNext()) {
                    FreeTextAnnotation freeTextAnnotation = freeTextAnnotationIterator.next();
                    captionLines.add(freeTextAnnotation.getText());
                }
                MediaTime segmentTime = segment.getMediaTime();
                MediaTimePoint stp = segmentTime.getMediaTimePoint();
                MediaDuration d = segmentTime.getMediaDuration();
                Calendar startCalendar = Calendar.getInstance();
                int millisAtStart = (int) (stp.getTimeInMilliseconds() - (((stp.getHour() * 60 + stp.getMinutes()) * 60 + stp.getSeconds()) * 1000));
                int millisAtEnd = (int) (d.getDurationInMilliseconds() - (((d.getHours() * 60 + d.getMinutes()) * 60 + d.getSeconds()) * 1000));
                startCalendar.set(Calendar.HOUR, stp.getHour());
                startCalendar.set(Calendar.MINUTE, stp.getMinutes());
                startCalendar.set(Calendar.SECOND, stp.getSeconds());
                startCalendar.set(Calendar.MILLISECOND, millisAtStart);
                startCalendar.add(Calendar.HOUR, d.getHours());
                startCalendar.add(Calendar.MINUTE, d.getMinutes());
                startCalendar.add(Calendar.SECOND, d.getSeconds());
                startCalendar.set(Calendar.MILLISECOND, millisAtEnd);
                try {
                    Time startTime = new TimeImpl(stp.getHour(), stp.getMinutes(), stp.getSeconds(), millisAtStart);
                    Time endTime = new TimeImpl(startCalendar.get(Calendar.HOUR), startCalendar.get(Calendar.MINUTE), startCalendar.get(Calendar.SECOND), startCalendar.get(Calendar.MILLISECOND));
                    Caption caption = new CaptionImpl(startTime, endTime, captionLines.toArray(new String[captionLines.size()]));
                    captions.add(caption);
                } catch (IllegalTimeFormatException e) {
                    logger.warn("Error setting caption time: {}", e.getMessage());
                }
            }
        }
    }
    return captions;
}
Also used : IllegalTimeFormatException(org.opencastproject.caption.api.IllegalTimeFormatException) MediaTimePoint(org.opencastproject.metadata.mpeg7.MediaTimePoint) Calendar(java.util.Calendar) ArrayList(java.util.ArrayList) MediaTime(org.opencastproject.metadata.mpeg7.MediaTime) Time(org.opencastproject.caption.api.Time) Caption(org.opencastproject.caption.api.Caption) FreeTextAnnotation(org.opencastproject.metadata.mpeg7.FreeTextAnnotation) TimeImpl(org.opencastproject.caption.impl.TimeImpl) MediaTimeImpl(org.opencastproject.metadata.mpeg7.MediaTimeImpl) Mpeg7Catalog(org.opencastproject.metadata.mpeg7.Mpeg7Catalog) CaptionImpl(org.opencastproject.caption.impl.CaptionImpl) MediaTime(org.opencastproject.metadata.mpeg7.MediaTime) Iterator(java.util.Iterator) MediaDuration(org.opencastproject.metadata.mpeg7.MediaDuration) Mpeg7CatalogImpl(org.opencastproject.metadata.mpeg7.Mpeg7CatalogImpl) ArrayList(java.util.ArrayList) List(java.util.List) TemporalDecomposition(org.opencastproject.metadata.mpeg7.TemporalDecomposition) Audio(org.opencastproject.metadata.mpeg7.Audio) TextAnnotation(org.opencastproject.metadata.mpeg7.TextAnnotation) FreeTextAnnotation(org.opencastproject.metadata.mpeg7.FreeTextAnnotation) AudioSegment(org.opencastproject.metadata.mpeg7.AudioSegment)

Example 20 with TextAnnotation

use of com.google.cloud.vision.v1p3beta1.TextAnnotation in project java-vision by googleapis.

the class DetectBeta method detectHandwrittenOcr.

// [END vision_localize_objects_gcs_beta]
// [START vision_handwritten_ocr_beta]
/**
 * Performs handwritten text detection on a local image file.
 *
 * @param filePath The path to the local file to detect handwritten text on.
 * @param out A {@link PrintStream} to write the results to.
 * @throws Exception on errors while closing the client.
 * @throws IOException on Input/Output errors.
 */
public static void detectHandwrittenOcr(String filePath, PrintStream out) throws Exception {
    List<AnnotateImageRequest> requests = new ArrayList<>();
    ByteString imgBytes = ByteString.readFrom(new FileInputStream(filePath));
    Image img = Image.newBuilder().setContent(imgBytes).build();
    Feature feat = Feature.newBuilder().setType(Type.DOCUMENT_TEXT_DETECTION).build();
    // Set the Language Hint codes for handwritten OCR
    ImageContext imageContext = ImageContext.newBuilder().addLanguageHints("en-t-i0-handwrit").build();
    AnnotateImageRequest request = AnnotateImageRequest.newBuilder().addFeatures(feat).setImage(img).setImageContext(imageContext).build();
    requests.add(request);
    try (ImageAnnotatorClient client = ImageAnnotatorClient.create()) {
        BatchAnnotateImagesResponse response = client.batchAnnotateImages(requests);
        List<AnnotateImageResponse> responses = response.getResponsesList();
        client.close();
        for (AnnotateImageResponse res : responses) {
            if (res.hasError()) {
                out.printf("Error: %s\n", res.getError().getMessage());
                return;
            }
            // For full list of available annotations, see http://g.co/cloud/vision/docs
            TextAnnotation annotation = res.getFullTextAnnotation();
            for (Page page : annotation.getPagesList()) {
                String pageText = "";
                for (Block block : page.getBlocksList()) {
                    String blockText = "";
                    for (Paragraph para : block.getParagraphsList()) {
                        String paraText = "";
                        for (Word word : para.getWordsList()) {
                            String wordText = "";
                            for (Symbol symbol : word.getSymbolsList()) {
                                wordText = wordText + symbol.getText();
                                out.format("Symbol text: %s (confidence: %f)\n", symbol.getText(), symbol.getConfidence());
                            }
                            out.format("Word text: %s (confidence: %f)\n\n", wordText, word.getConfidence());
                            paraText = String.format("%s %s", paraText, wordText);
                        }
                        // Output Example using Paragraph:
                        out.println("\nParagraph: \n" + paraText);
                        out.format("Paragraph Confidence: %f\n", para.getConfidence());
                        blockText = blockText + paraText;
                    }
                    pageText = pageText + blockText;
                }
            }
            out.println("\nComplete annotation:");
            out.println(annotation.getText());
        }
    }
}
Also used : Word(com.google.cloud.vision.v1p3beta1.Word) ByteString(com.google.protobuf.ByteString) Symbol(com.google.cloud.vision.v1p3beta1.Symbol) ImageAnnotatorClient(com.google.cloud.vision.v1p3beta1.ImageAnnotatorClient) ArrayList(java.util.ArrayList) Page(com.google.cloud.vision.v1p3beta1.Page) ByteString(com.google.protobuf.ByteString) Image(com.google.cloud.vision.v1p3beta1.Image) Feature(com.google.cloud.vision.v1p3beta1.Feature) FileInputStream(java.io.FileInputStream) Paragraph(com.google.cloud.vision.v1p3beta1.Paragraph) AnnotateImageRequest(com.google.cloud.vision.v1p3beta1.AnnotateImageRequest) AnnotateImageResponse(com.google.cloud.vision.v1p3beta1.AnnotateImageResponse) Block(com.google.cloud.vision.v1p3beta1.Block) TextAnnotation(com.google.cloud.vision.v1p3beta1.TextAnnotation) ImageContext(com.google.cloud.vision.v1p3beta1.ImageContext) BatchAnnotateImagesResponse(com.google.cloud.vision.v1p3beta1.BatchAnnotateImagesResponse)

Aggregations

ArrayList (java.util.ArrayList)15 TextAnnotation (com.google.cloud.vision.v1.TextAnnotation)12 Test (org.junit.Test)11 ByteString (com.google.protobuf.ByteString)9 TextAnnotation (com.google.cloud.videointelligence.v1.TextAnnotation)8 VideoAnnotationResults (com.google.cloud.videointelligence.v1.VideoAnnotationResults)8 List (java.util.List)7 TextAnnotation (org.kie.workbench.common.dmn.api.definition.v1_1.TextAnnotation)7 FreeTextAnnotation (org.opencastproject.metadata.mpeg7.FreeTextAnnotation)7 TextAnnotation (org.opencastproject.metadata.mpeg7.TextAnnotation)7 AnnotateImageResponse (com.google.cloud.vision.v1.AnnotateImageResponse)6 Duration (com.google.protobuf.Duration)6 AnnotateVideoProgress (com.google.cloud.videointelligence.v1.AnnotateVideoProgress)4 AnnotateVideoRequest (com.google.cloud.videointelligence.v1.AnnotateVideoRequest)4 AnnotateVideoResponse (com.google.cloud.videointelligence.v1.AnnotateVideoResponse)4 NormalizedVertex (com.google.cloud.videointelligence.v1.NormalizedVertex)4 TextFrame (com.google.cloud.videointelligence.v1.TextFrame)4 TextSegment (com.google.cloud.videointelligence.v1.TextSegment)4 VideoIntelligenceServiceClient (com.google.cloud.videointelligence.v1.VideoIntelligenceServiceClient)4 VideoSegment (com.google.cloud.videointelligence.v1.VideoSegment)4