use of org.opencastproject.metadata.mpeg7.KeywordAnnotation in project opencast by opencast.
the class SolrIndexManager method addMpeg7Metadata.
/**
* Add the mpeg 7 catalog data to the solr document.
*
* @param doc
* the input document to the solr index
* @param mpeg7
* the mpeg7 catalog
*/
@SuppressWarnings("unchecked")
static void addMpeg7Metadata(SolrInputDocument doc, MediaPackage mediaPackage, Mpeg7Catalog mpeg7) {
// Check for multimedia content
if (!mpeg7.multimediaContent().hasNext()) {
logger.warn("Mpeg-7 doesn't contain multimedia content");
return;
}
// Get the content duration by looking at the first content track. This
// of course assumes that all tracks are equally long.
MultimediaContent<? extends MultimediaContentType> mc = mpeg7.multimediaContent().next();
MultimediaContentType mct = mc.elements().next();
MediaTime mediaTime = mct.getMediaTime();
Schema.setDcExtent(doc, mediaTime.getMediaDuration().getDurationInMilliseconds());
// Check if the keywords have been filled by (manually) added dublin
// core data. If not, look for the most relevant fields in mpeg-7.
SortedSet<TextAnnotation> sortedAnnotations = null;
if (!"".equals(Schema.getOcKeywords(doc))) {
sortedAnnotations = new TreeSet<TextAnnotation>(new Comparator<TextAnnotation>() {
@Override
public int compare(TextAnnotation a1, TextAnnotation a2) {
if ((RELEVANCE_BOOST * a1.getRelevance() + a1.getConfidence()) > (RELEVANCE_BOOST * a2.getRelevance() + a2.getConfidence()))
return -1;
else if ((RELEVANCE_BOOST * a1.getRelevance() + a1.getConfidence()) < (RELEVANCE_BOOST * a2.getRelevance() + a2.getConfidence()))
return 1;
return 0;
}
});
}
// Iterate over the tracks and extract keywords and hints
Iterator<MultimediaContent<? extends MultimediaContentType>> mmIter = mpeg7.multimediaContent();
int segmentCount = 0;
while (mmIter.hasNext()) {
MultimediaContent<?> multimediaContent = mmIter.next();
// We need to process visual segments first, due to the way they are handled in the ui.
for (Iterator<?> iterator = multimediaContent.elements(); iterator.hasNext(); ) {
MultimediaContentType type = (MultimediaContentType) iterator.next();
if (!(type instanceof Video) && !(type instanceof AudioVisual))
continue;
// for every segment in the current multimedia content track
Video video = (Video) type;
Iterator<VideoSegment> vsegments = (Iterator<VideoSegment>) video.getTemporalDecomposition().segments();
while (vsegments.hasNext()) {
VideoSegment segment = vsegments.next();
StringBuffer segmentText = new StringBuffer();
StringBuffer hintField = new StringBuffer();
// Collect the video text elements to a segment text
SpatioTemporalDecomposition spt = segment.getSpatioTemporalDecomposition();
if (spt != null) {
for (VideoText videoText : spt.getVideoText()) {
if (segmentText.length() > 0)
segmentText.append(" ");
segmentText.append(videoText.getText().getText());
// TODO: Add hint on bounding box
}
}
// Add keyword annotations
Iterator<TextAnnotation> textAnnotations = segment.textAnnotations();
while (textAnnotations.hasNext()) {
TextAnnotation textAnnotation = textAnnotations.next();
Iterator<?> kwIter = textAnnotation.keywordAnnotations();
while (kwIter.hasNext()) {
KeywordAnnotation keywordAnnotation = (KeywordAnnotation) kwIter.next();
if (segmentText.length() > 0)
segmentText.append(" ");
segmentText.append(keywordAnnotation.getKeyword());
}
}
// Add free text annotations
Iterator<TextAnnotation> freeIter = segment.textAnnotations();
if (freeIter.hasNext()) {
Iterator<FreeTextAnnotation> freeTextIter = freeIter.next().freeTextAnnotations();
while (freeTextIter.hasNext()) {
FreeTextAnnotation freeTextAnnotation = freeTextIter.next();
if (segmentText.length() > 0)
segmentText.append(" ");
segmentText.append(freeTextAnnotation.getText());
}
}
// add segment text to solr document
Schema.setSegmentText(doc, new DField<String>(segmentText.toString(), Integer.toString(segmentCount)));
// get the segments time properties
MediaTimePoint timepoint = segment.getMediaTime().getMediaTimePoint();
MediaDuration duration = segment.getMediaTime().getMediaDuration();
// TODO: define a class with hint field constants
hintField.append("time=" + timepoint.getTimeInMilliseconds() + "\n");
hintField.append("duration=" + duration.getDurationInMilliseconds() + "\n");
// Look for preview images. Their characteristics are that they are
// attached as attachments with a flavor of preview/<something>.
String time = timepoint.toString();
for (Attachment slide : mediaPackage.getAttachments(MediaPackageElements.PRESENTATION_SEGMENT_PREVIEW)) {
MediaPackageReference ref = slide.getReference();
if (ref != null && time.equals(ref.getProperty("time"))) {
hintField.append("preview");
hintField.append(".");
hintField.append(ref.getIdentifier());
hintField.append("=");
hintField.append(slide.getURI().toString());
hintField.append("\n");
}
}
logger.trace("Adding segment: " + timepoint.toString());
Schema.setSegmentHint(doc, new DField<String>(hintField.toString(), Integer.toString(segmentCount)));
// increase segment counter
segmentCount++;
}
}
}
// Put the most important keywords into a special solr field
if (sortedAnnotations != null) {
Schema.setOcKeywords(doc, importantKeywordsString(sortedAnnotations).toString());
}
}
use of org.opencastproject.metadata.mpeg7.KeywordAnnotation in project opencast by opencast.
the class SolrIndexManager method getMaxRelevance.
/**
* Gets the maximum relevance for a given keyword in the text annotation.
*
* @param keyword
* @param sortedAnnotations
* @return The maximum relevance value.
*/
static double getMaxRelevance(String keyword, SortedSet<TextAnnotation> sortedAnnotations) {
double max = 0.0;
String needle = null;
TextAnnotation textAnnotation = null;
Iterator<TextAnnotation> textAnnotations = sortedAnnotations.iterator();
while (textAnnotations.hasNext()) {
textAnnotation = textAnnotations.next();
Iterator<KeywordAnnotation> keywordAnnotations = textAnnotation.keywordAnnotations();
while (keywordAnnotations.hasNext()) {
KeywordAnnotation ann = keywordAnnotations.next();
needle = ann.getKeyword().toLowerCase();
if (keyword.equals(needle)) {
if (max < textAnnotation.getRelevance()) {
max = textAnnotation.getRelevance();
}
}
}
}
return max;
}
use of org.opencastproject.metadata.mpeg7.KeywordAnnotation in project opencast by opencast.
the class SolrIndexManager method importantKeywordsString.
/**
* Generates a string with the most important kewords from the text annotation.
*
* @param sortedAnnotations
* @return The keyword string.
*/
static StringBuffer importantKeywordsString(SortedSet<TextAnnotation> sortedAnnotations) {
// important keyword:
// - high relevance
// - high confidence
// - occur often
// - more than MAX_CHAR chars
// calculate keyword occurences (histogram) and importance
ArrayList<String> list = new ArrayList<String>();
Iterator<TextAnnotation> textAnnotations = sortedAnnotations.iterator();
TextAnnotation textAnnotation = null;
String keyword = null;
HashMap<String, Integer> histogram = new HashMap<String, Integer>();
HashMap<String, Double> importance = new HashMap<String, Double>();
int occ = 0;
double imp;
while (textAnnotations.hasNext()) {
textAnnotation = textAnnotations.next();
Iterator<KeywordAnnotation> keywordAnnotations = textAnnotation.keywordAnnotations();
while (keywordAnnotations.hasNext()) {
KeywordAnnotation annotation = keywordAnnotations.next();
keyword = annotation.getKeyword().toLowerCase();
if (keyword.length() > MAX_CHAR) {
occ = 0;
if (histogram.keySet().contains(keyword)) {
occ = histogram.get(keyword);
}
histogram.put(keyword, occ + 1);
// here the importance value is calculated
// from relevance, confidence and frequency of occurence.
imp = (RELEVANCE_BOOST * getMaxRelevance(keyword, sortedAnnotations) + getMaxConfidence(keyword, sortedAnnotations)) * (occ + 1);
importance.put(keyword, imp);
}
}
}
// get the MAX_IMPORTANT_COUNT most important keywords
StringBuffer buf = new StringBuffer();
while (list.size() < MAX_IMPORTANT_COUNT && importance.size() > 0) {
double max = 0.0;
String maxKeyword = null;
// get maximum from importance list
for (Entry<String, Double> entry : importance.entrySet()) {
keyword = entry.getKey();
if (max < entry.getValue()) {
max = entry.getValue();
maxKeyword = keyword;
}
}
// pop maximum
importance.remove(maxKeyword);
// append keyword to string
if (buf.length() > 0)
buf.append(" ");
buf.append(maxKeyword);
}
return buf;
}
use of org.opencastproject.metadata.mpeg7.KeywordAnnotation in project opencast by opencast.
the class SolrIndexManager method getMaxConfidence.
/**
* Gets the maximum confidence for a given keyword in the text annotation.
*
* @param keyword
* @param sortedAnnotations
* @return The maximum confidence value.
*/
static double getMaxConfidence(String keyword, SortedSet<TextAnnotation> sortedAnnotations) {
double max = 0.0;
String needle = null;
TextAnnotation textAnnotation = null;
Iterator<TextAnnotation> textAnnotations = sortedAnnotations.iterator();
while (textAnnotations.hasNext()) {
textAnnotation = textAnnotations.next();
Iterator<KeywordAnnotation> keywordAnnotations = textAnnotation.keywordAnnotations();
while (keywordAnnotations.hasNext()) {
KeywordAnnotation ann = keywordAnnotations.next();
needle = ann.getKeyword().toLowerCase();
if (keyword.equals(needle)) {
if (max < textAnnotation.getConfidence()) {
max = textAnnotation.getConfidence();
}
}
}
}
return max;
}
Aggregations