Search in sources :

Example 1 with KeywordAnnotation

use of org.opencastproject.metadata.mpeg7.KeywordAnnotation in project opencast by opencast.

the class SolrIndexManager method addMpeg7Metadata.

/**
 * Add the mpeg 7 catalog data to the solr document.
 *
 * @param doc
 *          the input document to the solr index
 * @param mpeg7
 *          the mpeg7 catalog
 */
@SuppressWarnings("unchecked")
static void addMpeg7Metadata(SolrInputDocument doc, MediaPackage mediaPackage, Mpeg7Catalog mpeg7) {
    // Check for multimedia content
    if (!mpeg7.multimediaContent().hasNext()) {
        logger.warn("Mpeg-7 doesn't contain  multimedia content");
        return;
    }
    // Get the content duration by looking at the first content track. This
    // of course assumes that all tracks are equally long.
    MultimediaContent<? extends MultimediaContentType> mc = mpeg7.multimediaContent().next();
    MultimediaContentType mct = mc.elements().next();
    MediaTime mediaTime = mct.getMediaTime();
    Schema.setDcExtent(doc, mediaTime.getMediaDuration().getDurationInMilliseconds());
    // Check if the keywords have been filled by (manually) added dublin
    // core data. If not, look for the most relevant fields in mpeg-7.
    SortedSet<TextAnnotation> sortedAnnotations = null;
    if (!"".equals(Schema.getOcKeywords(doc))) {
        sortedAnnotations = new TreeSet<TextAnnotation>(new Comparator<TextAnnotation>() {

            @Override
            public int compare(TextAnnotation a1, TextAnnotation a2) {
                if ((RELEVANCE_BOOST * a1.getRelevance() + a1.getConfidence()) > (RELEVANCE_BOOST * a2.getRelevance() + a2.getConfidence()))
                    return -1;
                else if ((RELEVANCE_BOOST * a1.getRelevance() + a1.getConfidence()) < (RELEVANCE_BOOST * a2.getRelevance() + a2.getConfidence()))
                    return 1;
                return 0;
            }
        });
    }
    // Iterate over the tracks and extract keywords and hints
    Iterator<MultimediaContent<? extends MultimediaContentType>> mmIter = mpeg7.multimediaContent();
    int segmentCount = 0;
    while (mmIter.hasNext()) {
        MultimediaContent<?> multimediaContent = mmIter.next();
        // We need to process visual segments first, due to the way they are handled in the ui.
        for (Iterator<?> iterator = multimediaContent.elements(); iterator.hasNext(); ) {
            MultimediaContentType type = (MultimediaContentType) iterator.next();
            if (!(type instanceof Video) && !(type instanceof AudioVisual))
                continue;
            // for every segment in the current multimedia content track
            Video video = (Video) type;
            Iterator<VideoSegment> vsegments = (Iterator<VideoSegment>) video.getTemporalDecomposition().segments();
            while (vsegments.hasNext()) {
                VideoSegment segment = vsegments.next();
                StringBuffer segmentText = new StringBuffer();
                StringBuffer hintField = new StringBuffer();
                // Collect the video text elements to a segment text
                SpatioTemporalDecomposition spt = segment.getSpatioTemporalDecomposition();
                if (spt != null) {
                    for (VideoText videoText : spt.getVideoText()) {
                        if (segmentText.length() > 0)
                            segmentText.append(" ");
                        segmentText.append(videoText.getText().getText());
                    // TODO: Add hint on bounding box
                    }
                }
                // Add keyword annotations
                Iterator<TextAnnotation> textAnnotations = segment.textAnnotations();
                while (textAnnotations.hasNext()) {
                    TextAnnotation textAnnotation = textAnnotations.next();
                    Iterator<?> kwIter = textAnnotation.keywordAnnotations();
                    while (kwIter.hasNext()) {
                        KeywordAnnotation keywordAnnotation = (KeywordAnnotation) kwIter.next();
                        if (segmentText.length() > 0)
                            segmentText.append(" ");
                        segmentText.append(keywordAnnotation.getKeyword());
                    }
                }
                // Add free text annotations
                Iterator<TextAnnotation> freeIter = segment.textAnnotations();
                if (freeIter.hasNext()) {
                    Iterator<FreeTextAnnotation> freeTextIter = freeIter.next().freeTextAnnotations();
                    while (freeTextIter.hasNext()) {
                        FreeTextAnnotation freeTextAnnotation = freeTextIter.next();
                        if (segmentText.length() > 0)
                            segmentText.append(" ");
                        segmentText.append(freeTextAnnotation.getText());
                    }
                }
                // add segment text to solr document
                Schema.setSegmentText(doc, new DField<String>(segmentText.toString(), Integer.toString(segmentCount)));
                // get the segments time properties
                MediaTimePoint timepoint = segment.getMediaTime().getMediaTimePoint();
                MediaDuration duration = segment.getMediaTime().getMediaDuration();
                // TODO: define a class with hint field constants
                hintField.append("time=" + timepoint.getTimeInMilliseconds() + "\n");
                hintField.append("duration=" + duration.getDurationInMilliseconds() + "\n");
                // Look for preview images. Their characteristics are that they are
                // attached as attachments with a flavor of preview/<something>.
                String time = timepoint.toString();
                for (Attachment slide : mediaPackage.getAttachments(MediaPackageElements.PRESENTATION_SEGMENT_PREVIEW)) {
                    MediaPackageReference ref = slide.getReference();
                    if (ref != null && time.equals(ref.getProperty("time"))) {
                        hintField.append("preview");
                        hintField.append(".");
                        hintField.append(ref.getIdentifier());
                        hintField.append("=");
                        hintField.append(slide.getURI().toString());
                        hintField.append("\n");
                    }
                }
                logger.trace("Adding segment: " + timepoint.toString());
                Schema.setSegmentHint(doc, new DField<String>(hintField.toString(), Integer.toString(segmentCount)));
                // increase segment counter
                segmentCount++;
            }
        }
    }
    // Put the most important keywords into a special solr field
    if (sortedAnnotations != null) {
        Schema.setOcKeywords(doc, importantKeywordsString(sortedAnnotations).toString());
    }
}
Also used : Attachment(org.opencastproject.mediapackage.Attachment) VideoText(org.opencastproject.metadata.mpeg7.VideoText) FreeTextAnnotation(org.opencastproject.metadata.mpeg7.FreeTextAnnotation) Comparator(java.util.Comparator) Iterator(java.util.Iterator) KeywordAnnotation(org.opencastproject.metadata.mpeg7.KeywordAnnotation) TextAnnotation(org.opencastproject.metadata.mpeg7.TextAnnotation) FreeTextAnnotation(org.opencastproject.metadata.mpeg7.FreeTextAnnotation) AudioVisual(org.opencastproject.metadata.mpeg7.AudioVisual) MultimediaContentType(org.opencastproject.metadata.mpeg7.MultimediaContentType) SpatioTemporalDecomposition(org.opencastproject.metadata.mpeg7.SpatioTemporalDecomposition) MediaTimePoint(org.opencastproject.metadata.mpeg7.MediaTimePoint) MediaTimePoint(org.opencastproject.metadata.mpeg7.MediaTimePoint) MediaPackageReference(org.opencastproject.mediapackage.MediaPackageReference) VideoSegment(org.opencastproject.metadata.mpeg7.VideoSegment) Video(org.opencastproject.metadata.mpeg7.Video) MediaTime(org.opencastproject.metadata.mpeg7.MediaTime) MediaDuration(org.opencastproject.metadata.mpeg7.MediaDuration) MultimediaContent(org.opencastproject.metadata.mpeg7.MultimediaContent)

Example 2 with KeywordAnnotation

use of org.opencastproject.metadata.mpeg7.KeywordAnnotation in project opencast by opencast.

the class SolrIndexManager method getMaxRelevance.

/**
 * Gets the maximum relevance for a given keyword in the text annotation.
 *
 * @param keyword
 * @param sortedAnnotations
 * @return The maximum relevance value.
 */
static double getMaxRelevance(String keyword, SortedSet<TextAnnotation> sortedAnnotations) {
    double max = 0.0;
    String needle = null;
    TextAnnotation textAnnotation = null;
    Iterator<TextAnnotation> textAnnotations = sortedAnnotations.iterator();
    while (textAnnotations.hasNext()) {
        textAnnotation = textAnnotations.next();
        Iterator<KeywordAnnotation> keywordAnnotations = textAnnotation.keywordAnnotations();
        while (keywordAnnotations.hasNext()) {
            KeywordAnnotation ann = keywordAnnotations.next();
            needle = ann.getKeyword().toLowerCase();
            if (keyword.equals(needle)) {
                if (max < textAnnotation.getRelevance()) {
                    max = textAnnotation.getRelevance();
                }
            }
        }
    }
    return max;
}
Also used : KeywordAnnotation(org.opencastproject.metadata.mpeg7.KeywordAnnotation) TextAnnotation(org.opencastproject.metadata.mpeg7.TextAnnotation) FreeTextAnnotation(org.opencastproject.metadata.mpeg7.FreeTextAnnotation)

Example 3 with KeywordAnnotation

use of org.opencastproject.metadata.mpeg7.KeywordAnnotation in project opencast by opencast.

the class SolrIndexManager method importantKeywordsString.

/**
 * Generates a string with the most important kewords from the text annotation.
 *
 * @param sortedAnnotations
 * @return The keyword string.
 */
static StringBuffer importantKeywordsString(SortedSet<TextAnnotation> sortedAnnotations) {
    // important keyword:
    // - high relevance
    // - high confidence
    // - occur often
    // - more than MAX_CHAR chars
    // calculate keyword occurences (histogram) and importance
    ArrayList<String> list = new ArrayList<String>();
    Iterator<TextAnnotation> textAnnotations = sortedAnnotations.iterator();
    TextAnnotation textAnnotation = null;
    String keyword = null;
    HashMap<String, Integer> histogram = new HashMap<String, Integer>();
    HashMap<String, Double> importance = new HashMap<String, Double>();
    int occ = 0;
    double imp;
    while (textAnnotations.hasNext()) {
        textAnnotation = textAnnotations.next();
        Iterator<KeywordAnnotation> keywordAnnotations = textAnnotation.keywordAnnotations();
        while (keywordAnnotations.hasNext()) {
            KeywordAnnotation annotation = keywordAnnotations.next();
            keyword = annotation.getKeyword().toLowerCase();
            if (keyword.length() > MAX_CHAR) {
                occ = 0;
                if (histogram.keySet().contains(keyword)) {
                    occ = histogram.get(keyword);
                }
                histogram.put(keyword, occ + 1);
                // here the importance value is calculated
                // from relevance, confidence and frequency of occurence.
                imp = (RELEVANCE_BOOST * getMaxRelevance(keyword, sortedAnnotations) + getMaxConfidence(keyword, sortedAnnotations)) * (occ + 1);
                importance.put(keyword, imp);
            }
        }
    }
    // get the MAX_IMPORTANT_COUNT most important keywords
    StringBuffer buf = new StringBuffer();
    while (list.size() < MAX_IMPORTANT_COUNT && importance.size() > 0) {
        double max = 0.0;
        String maxKeyword = null;
        // get maximum from importance list
        for (Entry<String, Double> entry : importance.entrySet()) {
            keyword = entry.getKey();
            if (max < entry.getValue()) {
                max = entry.getValue();
                maxKeyword = keyword;
            }
        }
        // pop maximum
        importance.remove(maxKeyword);
        // append keyword to string
        if (buf.length() > 0)
            buf.append(" ");
        buf.append(maxKeyword);
    }
    return buf;
}
Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) MediaTimePoint(org.opencastproject.metadata.mpeg7.MediaTimePoint) KeywordAnnotation(org.opencastproject.metadata.mpeg7.KeywordAnnotation) TextAnnotation(org.opencastproject.metadata.mpeg7.TextAnnotation) FreeTextAnnotation(org.opencastproject.metadata.mpeg7.FreeTextAnnotation)

Example 4 with KeywordAnnotation

use of org.opencastproject.metadata.mpeg7.KeywordAnnotation in project opencast by opencast.

the class SolrIndexManager method getMaxConfidence.

/**
 * Gets the maximum confidence for a given keyword in the text annotation.
 *
 * @param keyword
 * @param sortedAnnotations
 * @return The maximum confidence value.
 */
static double getMaxConfidence(String keyword, SortedSet<TextAnnotation> sortedAnnotations) {
    double max = 0.0;
    String needle = null;
    TextAnnotation textAnnotation = null;
    Iterator<TextAnnotation> textAnnotations = sortedAnnotations.iterator();
    while (textAnnotations.hasNext()) {
        textAnnotation = textAnnotations.next();
        Iterator<KeywordAnnotation> keywordAnnotations = textAnnotation.keywordAnnotations();
        while (keywordAnnotations.hasNext()) {
            KeywordAnnotation ann = keywordAnnotations.next();
            needle = ann.getKeyword().toLowerCase();
            if (keyword.equals(needle)) {
                if (max < textAnnotation.getConfidence()) {
                    max = textAnnotation.getConfidence();
                }
            }
        }
    }
    return max;
}
Also used : KeywordAnnotation(org.opencastproject.metadata.mpeg7.KeywordAnnotation) TextAnnotation(org.opencastproject.metadata.mpeg7.TextAnnotation) FreeTextAnnotation(org.opencastproject.metadata.mpeg7.FreeTextAnnotation)

Aggregations

FreeTextAnnotation (org.opencastproject.metadata.mpeg7.FreeTextAnnotation)4 KeywordAnnotation (org.opencastproject.metadata.mpeg7.KeywordAnnotation)4 TextAnnotation (org.opencastproject.metadata.mpeg7.TextAnnotation)4 MediaTimePoint (org.opencastproject.metadata.mpeg7.MediaTimePoint)2 ArrayList (java.util.ArrayList)1 Comparator (java.util.Comparator)1 HashMap (java.util.HashMap)1 Iterator (java.util.Iterator)1 Attachment (org.opencastproject.mediapackage.Attachment)1 MediaPackageReference (org.opencastproject.mediapackage.MediaPackageReference)1 AudioVisual (org.opencastproject.metadata.mpeg7.AudioVisual)1 MediaDuration (org.opencastproject.metadata.mpeg7.MediaDuration)1 MediaTime (org.opencastproject.metadata.mpeg7.MediaTime)1 MultimediaContent (org.opencastproject.metadata.mpeg7.MultimediaContent)1 MultimediaContentType (org.opencastproject.metadata.mpeg7.MultimediaContentType)1 SpatioTemporalDecomposition (org.opencastproject.metadata.mpeg7.SpatioTemporalDecomposition)1 Video (org.opencastproject.metadata.mpeg7.Video)1 VideoSegment (org.opencastproject.metadata.mpeg7.VideoSegment)1 VideoText (org.opencastproject.metadata.mpeg7.VideoText)1