use of org.opencastproject.metadata.mpeg7.VideoSegment in project opencast by opencast.
the class SolrIndexManager method addMpeg7Metadata.
/**
* Add the mpeg 7 catalog data to the solr document.
*
* @param doc
* the input document to the solr index
* @param mpeg7
* the mpeg7 catalog
*/
@SuppressWarnings("unchecked")
static void addMpeg7Metadata(SolrInputDocument doc, MediaPackage mediaPackage, Mpeg7Catalog mpeg7) {
// Check for multimedia content
if (!mpeg7.multimediaContent().hasNext()) {
logger.warn("Mpeg-7 doesn't contain multimedia content");
return;
}
// Get the content duration by looking at the first content track. This
// of course assumes that all tracks are equally long.
MultimediaContent<? extends MultimediaContentType> mc = mpeg7.multimediaContent().next();
MultimediaContentType mct = mc.elements().next();
MediaTime mediaTime = mct.getMediaTime();
Schema.setDcExtent(doc, mediaTime.getMediaDuration().getDurationInMilliseconds());
// Check if the keywords have been filled by (manually) added dublin
// core data. If not, look for the most relevant fields in mpeg-7.
SortedSet<TextAnnotation> sortedAnnotations = null;
if (!"".equals(Schema.getOcKeywords(doc))) {
sortedAnnotations = new TreeSet<TextAnnotation>(new Comparator<TextAnnotation>() {
@Override
public int compare(TextAnnotation a1, TextAnnotation a2) {
if ((RELEVANCE_BOOST * a1.getRelevance() + a1.getConfidence()) > (RELEVANCE_BOOST * a2.getRelevance() + a2.getConfidence()))
return -1;
else if ((RELEVANCE_BOOST * a1.getRelevance() + a1.getConfidence()) < (RELEVANCE_BOOST * a2.getRelevance() + a2.getConfidence()))
return 1;
return 0;
}
});
}
// Iterate over the tracks and extract keywords and hints
Iterator<MultimediaContent<? extends MultimediaContentType>> mmIter = mpeg7.multimediaContent();
int segmentCount = 0;
while (mmIter.hasNext()) {
MultimediaContent<?> multimediaContent = mmIter.next();
// We need to process visual segments first, due to the way they are handled in the ui.
for (Iterator<?> iterator = multimediaContent.elements(); iterator.hasNext(); ) {
MultimediaContentType type = (MultimediaContentType) iterator.next();
if (!(type instanceof Video) && !(type instanceof AudioVisual))
continue;
// for every segment in the current multimedia content track
Video video = (Video) type;
Iterator<VideoSegment> vsegments = (Iterator<VideoSegment>) video.getTemporalDecomposition().segments();
while (vsegments.hasNext()) {
VideoSegment segment = vsegments.next();
StringBuffer segmentText = new StringBuffer();
StringBuffer hintField = new StringBuffer();
// Collect the video text elements to a segment text
SpatioTemporalDecomposition spt = segment.getSpatioTemporalDecomposition();
if (spt != null) {
for (VideoText videoText : spt.getVideoText()) {
if (segmentText.length() > 0)
segmentText.append(" ");
segmentText.append(videoText.getText().getText());
// TODO: Add hint on bounding box
}
}
// Add keyword annotations
Iterator<TextAnnotation> textAnnotations = segment.textAnnotations();
while (textAnnotations.hasNext()) {
TextAnnotation textAnnotation = textAnnotations.next();
Iterator<?> kwIter = textAnnotation.keywordAnnotations();
while (kwIter.hasNext()) {
KeywordAnnotation keywordAnnotation = (KeywordAnnotation) kwIter.next();
if (segmentText.length() > 0)
segmentText.append(" ");
segmentText.append(keywordAnnotation.getKeyword());
}
}
// Add free text annotations
Iterator<TextAnnotation> freeIter = segment.textAnnotations();
if (freeIter.hasNext()) {
Iterator<FreeTextAnnotation> freeTextIter = freeIter.next().freeTextAnnotations();
while (freeTextIter.hasNext()) {
FreeTextAnnotation freeTextAnnotation = freeTextIter.next();
if (segmentText.length() > 0)
segmentText.append(" ");
segmentText.append(freeTextAnnotation.getText());
}
}
// add segment text to solr document
Schema.setSegmentText(doc, new DField<String>(segmentText.toString(), Integer.toString(segmentCount)));
// get the segments time properties
MediaTimePoint timepoint = segment.getMediaTime().getMediaTimePoint();
MediaDuration duration = segment.getMediaTime().getMediaDuration();
// TODO: define a class with hint field constants
hintField.append("time=" + timepoint.getTimeInMilliseconds() + "\n");
hintField.append("duration=" + duration.getDurationInMilliseconds() + "\n");
// Look for preview images. Their characteristics are that they are
// attached as attachments with a flavor of preview/<something>.
String time = timepoint.toString();
for (Attachment slide : mediaPackage.getAttachments(MediaPackageElements.PRESENTATION_SEGMENT_PREVIEW)) {
MediaPackageReference ref = slide.getReference();
if (ref != null && time.equals(ref.getProperty("time"))) {
hintField.append("preview");
hintField.append(".");
hintField.append(ref.getIdentifier());
hintField.append("=");
hintField.append(slide.getURI().toString());
hintField.append("\n");
}
}
logger.trace("Adding segment: " + timepoint.toString());
Schema.setSegmentHint(doc, new DField<String>(hintField.toString(), Integer.toString(segmentCount)));
// increase segment counter
segmentCount++;
}
}
}
// Put the most important keywords into a special solr field
if (sortedAnnotations != null) {
Schema.setOcKeywords(doc, importantKeywordsString(sortedAnnotations).toString());
}
}
use of org.opencastproject.metadata.mpeg7.VideoSegment in project opencast by opencast.
the class VideoSegmenterServiceImpl method segment.
/**
* Starts segmentation on the video track identified by
* <code>mediapackageId</code> and <code>elementId</code> and returns a
* receipt containing the final result in the form of anMpeg7Catalog.
*
* @param track
* the element to analyze
* @return a receipt containing the resulting mpeg-7 catalog
* @throws VideoSegmenterException
*/
protected Catalog segment(Job job, Track track) throws VideoSegmenterException, MediaPackageException {
// implementation
if (!track.hasVideo()) {
logger.warn("Element {} is not a video track", track);
throw new VideoSegmenterException("Element is not a video track");
}
try {
Mpeg7Catalog mpeg7;
File mediaFile = null;
URL mediaUrl = null;
try {
mediaFile = workspace.get(track.getURI());
mediaUrl = mediaFile.toURI().toURL();
} catch (NotFoundException e) {
throw new VideoSegmenterException("Error finding the video file in the workspace", e);
} catch (IOException e) {
throw new VideoSegmenterException("Error reading the video file in the workspace", e);
}
if (track.getDuration() == null)
throw new MediaPackageException("Track " + track + " does not have a duration");
logger.info("Track {} loaded, duration is {} s", mediaUrl, track.getDuration() / 1000);
MediaTime contentTime = new MediaRelTimeImpl(0, track.getDuration());
MediaLocator contentLocator = new MediaLocatorImpl(track.getURI());
Video videoContent;
logger.debug("changesThreshold: {}, stabilityThreshold: {}", changesThreshold, stabilityThreshold);
logger.debug("prefNumber: {}, maxCycles: {}", prefNumber, maxCycles);
boolean endOptimization = false;
int cycleCount = 0;
LinkedList<Segment> segments;
LinkedList<OptimizationStep> optimizationList = new LinkedList<OptimizationStep>();
LinkedList<OptimizationStep> unusedResultsList = new LinkedList<OptimizationStep>();
OptimizationStep stepBest = new OptimizationStep();
// local copy of changesThreshold, that can safely be changed over optimization iterations
float changesThresholdLocal = changesThreshold;
// local copies of prefNumber, absoluteMin and absoluteMax, to make a dependency on track length possible
int prefNumberLocal = prefNumber;
int absoluteMaxLocal = absoluteMax;
int absoluteMinLocal = absoluteMin;
// absoluteMax and absoluteMin with the duration of the track
if (durationDependent) {
double trackDurationInHours = track.getDuration() / 3600000.0;
prefNumberLocal = (int) Math.round(trackDurationInHours * prefNumberLocal);
absoluteMaxLocal = (int) Math.round(trackDurationInHours * absoluteMax);
absoluteMinLocal = (int) Math.round(trackDurationInHours * absoluteMin);
// make sure prefNumberLocal will never be 0 or negative
if (prefNumberLocal <= 0) {
prefNumberLocal = 1;
}
logger.info("Numbers of segments are set to be relative to track duration. Therefore for {} the preferred " + "number of segments is {}", mediaUrl, prefNumberLocal);
}
logger.info("Starting video segmentation of {}", mediaUrl);
// to the desired number of segments
while (!endOptimization) {
mpeg7 = mpeg7CatalogService.newInstance();
videoContent = mpeg7.addVideoContent("videosegment", contentTime, contentLocator);
// run the segmentation with FFmpeg
segments = runSegmentationFFmpeg(track, videoContent, mediaFile, changesThresholdLocal);
// calculate errors for "normal" and filtered segmentation
// and compare them to find better optimization.
// "normal"
OptimizationStep currentStep = new OptimizationStep(stabilityThreshold, changesThresholdLocal, segments.size(), prefNumberLocal, mpeg7, segments);
// filtered
LinkedList<Segment> segmentsNew = new LinkedList<Segment>();
OptimizationStep currentStepFiltered = new OptimizationStep(stabilityThreshold, changesThresholdLocal, 0, prefNumberLocal, filterSegmentation(segments, track, segmentsNew, stabilityThreshold * 1000), segments);
currentStepFiltered.setSegmentNumAndRecalcErrors(segmentsNew.size());
logger.info("Segmentation yields {} segments after filtering", segmentsNew.size());
OptimizationStep currentStepBest;
// - and the filtered segmentation is not already better than the maximum error
if (currentStep.getErrorAbs() <= currentStepFiltered.getErrorAbs() || (segmentsNew.size() < prefNumberLocal && currentStep.getSegmentNum() > (track.getDuration() / 1000.0f) / (stabilityThreshold / 2) && !(currentStepFiltered.getErrorAbs() <= maxError))) {
optimizationList.add(currentStep);
Collections.sort(optimizationList);
currentStepBest = currentStep;
unusedResultsList.add(currentStepFiltered);
} else {
optimizationList.add(currentStepFiltered);
Collections.sort(optimizationList);
currentStepBest = currentStepFiltered;
}
cycleCount++;
logger.debug("errorAbs = {}, error = {}", currentStep.getErrorAbs(), currentStep.getError());
logger.debug("changesThreshold = {}", changesThresholdLocal);
logger.debug("cycleCount = {}", cycleCount);
// end optimization if maximum number of cycles is reached or if the segmentation is good enough
if (cycleCount >= maxCycles || currentStepBest.getErrorAbs() <= maxError) {
endOptimization = true;
if (optimizationList.size() > 0) {
if (optimizationList.getFirst().getErrorAbs() <= optimizationList.getLast().getErrorAbs() && optimizationList.getFirst().getError() >= 0) {
stepBest = optimizationList.getFirst();
} else {
stepBest = optimizationList.getLast();
}
}
// just to be sure, check if one of the unused results was better
for (OptimizationStep currentUnusedStep : unusedResultsList) {
if (currentUnusedStep.getErrorAbs() < stepBest.getErrorAbs()) {
stepBest = unusedResultsList.getFirst();
}
}
// continue optimization, calculate new changes threshold for next iteration of optimization
} else {
OptimizationStep first = optimizationList.getFirst();
OptimizationStep last = optimizationList.getLast();
// estimate a new changesThreshold based on the one yielding the smallest error
if (optimizationList.size() == 1 || first.getError() < 0 || last.getError() > 0) {
if (currentStepBest.getError() >= 0) {
// if the error is smaller or equal to 1, increase changes threshold weighted with the error
if (currentStepBest.getError() <= 1) {
changesThresholdLocal += changesThresholdLocal * currentStepBest.getError();
} else {
// to faster reach reasonable segment numbers
if (cycleCount <= 1 && currentStep.getSegmentNum() > 2000) {
changesThresholdLocal = 0.2f;
// if the error is bigger than one, double the changes threshold, because multiplying
// with a large error can yield a much too high changes threshold
} else {
changesThresholdLocal *= 2;
}
}
} else {
changesThresholdLocal /= 2;
}
logger.debug("onesided optimization yields new changesThreshold = {}", changesThresholdLocal);
// if there are already iterations with positive and negative errors, choose a changesThreshold between those
} else {
// for simplicity a linear relationship between the changesThreshold
// and the number of generated segments is assumed and based on that
// the expected correct changesThreshold is calculated
// the new changesThreshold is calculated by averaging the the mean and the mean weighted with errors
// because this seemed to yield better results in several cases
float x = (first.getSegmentNum() - prefNumberLocal) / (float) (first.getSegmentNum() - last.getSegmentNum());
float newX = ((x + 0.5f) * 0.5f);
changesThresholdLocal = first.getChangesThreshold() * (1 - newX) + last.getChangesThreshold() * newX;
logger.debug("doublesided optimization yields new changesThreshold = {}", changesThresholdLocal);
}
}
}
// after optimization of the changes threshold, the minimum duration for a segment
// (stability threshold) is optimized if the result is still not good enough
int threshLow = stabilityThreshold * 1000;
int threshHigh = threshLow + (threshLow / 2);
LinkedList<Segment> tmpSegments;
float smallestError = Float.MAX_VALUE;
int bestI = threshLow;
segments = stepBest.getSegments();
// is smaller than the maximum error, the stability threshold will not be optimized
if (stepBest.getError() <= maxError) {
threshHigh = stabilityThreshold * 1000;
}
for (int i = threshLow; i <= threshHigh; i = i + 1000) {
tmpSegments = new LinkedList<Segment>();
filterSegmentation(segments, track, tmpSegments, i);
float newError = OptimizationStep.calculateErrorAbs(tmpSegments.size(), prefNumberLocal);
if (newError < smallestError) {
smallestError = newError;
bestI = i;
}
}
tmpSegments = new LinkedList<Segment>();
mpeg7 = filterSegmentation(segments, track, tmpSegments, bestI);
// for debugging: output of final segmentation after optimization
logger.debug("result segments:");
for (int i = 0; i < tmpSegments.size(); i++) {
int[] tmpLog2 = new int[7];
tmpLog2[0] = tmpSegments.get(i).getMediaTime().getMediaTimePoint().getHour();
tmpLog2[1] = tmpSegments.get(i).getMediaTime().getMediaTimePoint().getMinutes();
tmpLog2[2] = tmpSegments.get(i).getMediaTime().getMediaTimePoint().getSeconds();
tmpLog2[3] = tmpSegments.get(i).getMediaTime().getMediaDuration().getHours();
tmpLog2[4] = tmpSegments.get(i).getMediaTime().getMediaDuration().getMinutes();
tmpLog2[5] = tmpSegments.get(i).getMediaTime().getMediaDuration().getSeconds();
Object[] tmpLog1 = { tmpLog2[0], tmpLog2[1], tmpLog2[2], tmpLog2[3], tmpLog2[4], tmpLog2[5], tmpLog2[6] };
tmpLog1[6] = tmpSegments.get(i).getIdentifier();
logger.debug("s:{}:{}:{}, d:{}:{}:{}, {}", tmpLog1);
}
logger.info("Optimized Segmentation yields (after {} iteration" + (cycleCount == 1 ? "" : "s") + ") {} segments", cycleCount, tmpSegments.size());
// if no reasonable segmentation could be found, instead return a uniform segmentation
if (tmpSegments.size() < absoluteMinLocal || tmpSegments.size() > absoluteMaxLocal) {
mpeg7 = uniformSegmentation(track, tmpSegments, prefNumberLocal);
logger.info("Since no reasonable segmentation could be found, a uniform segmentation was created");
}
Catalog mpeg7Catalog = (Catalog) MediaPackageElementBuilderFactory.newInstance().newElementBuilder().newElement(Catalog.TYPE, MediaPackageElements.SEGMENTS);
URI uri;
try {
uri = workspace.putInCollection(COLLECTION_ID, job.getId() + ".xml", mpeg7CatalogService.serialize(mpeg7));
} catch (IOException e) {
throw new VideoSegmenterException("Unable to put the mpeg7 catalog into the workspace", e);
}
mpeg7Catalog.setURI(uri);
logger.info("Finished video segmentation of {}", mediaUrl);
return mpeg7Catalog;
} catch (Exception e) {
logger.warn("Error segmenting " + track, e);
if (e instanceof VideoSegmenterException) {
throw (VideoSegmenterException) e;
} else {
throw new VideoSegmenterException(e);
}
}
}
use of org.opencastproject.metadata.mpeg7.VideoSegment in project opencast by opencast.
the class VideoSegmenterTest method testAnalyzeSegmentMerging.
@Test
public void testAnalyzeSegmentMerging() {
Mpeg7CatalogService mpeg7catalogService = vsegmenter.mpeg7CatalogService;
MediaTime contentTime = new MediaRelTimeImpl(0, track.getDuration());
MediaLocator contentLocator = new MediaLocatorImpl(track.getURI());
Mpeg7Catalog mpeg7 = mpeg7catalogService.newInstance();
Video videoContent = mpeg7.addVideoContent("videosegment", contentTime, contentLocator);
LinkedList<Segment> segments;
LinkedList<Segment> result;
int segmentcount = 1;
track.setDuration(47000L);
// list of segment durations (starttimes can be calculated from those)
int[] segmentArray1 = { 3000, 2000, 8000, 3000, 1000, 6000, 3000, 2000, 4000, 11000, 2000, 2000 };
int[] segmentArray2 = { 1000, 2000, 8000, 3000, 1000, 6000, 3000, 2000, 4000, 11000, 2000, 4000 };
int[] segmentArray3 = { 1000, 2000, 4000, 3000, 1000, 2000, 3000, 2000, 4000, 1000, 2000, 4000 };
int[] segmentArray4 = { 6000, 7000, 13000, 9000, 8000, 11000, 5000, 16000 };
// predicted outcome of filtering the segmentation
int[] prediction1 = { 5000, 10000, 8000, 9000, 15000 };
int[] prediction2 = { 13000, 8000, 9000, 11000, 6000 };
int[] prediction3 = { 29000 };
int[] prediction4 = { 6000, 7000, 13000, 9000, 8000, 11000, 5000, 16000 };
// total duration of respective segment arrays
long duration1 = 47000L;
long duration2 = 47000L;
long duration3 = 29000L;
long duration4 = 75000L;
int[][] segmentArray = { segmentArray1, segmentArray2, segmentArray3, segmentArray4 };
int[][] prediction = { prediction1, prediction2, prediction3, prediction4 };
long[] durations = { duration1, duration2, duration3, duration4 };
// check for all test segmentations if "filterSegmentation" yields the expected result
for (int k = 0; k < segmentArray.length; k++) {
segments = new LinkedList<Segment>();
result = new LinkedList<Segment>();
track.setDuration(durations[k]);
int previous = 0;
for (int i = 0; i < segmentArray[k].length; i++) {
Segment s = videoContent.getTemporalDecomposition().createSegment("segment-" + segmentcount++);
s.setMediaTime(new MediaRelTimeImpl(previous, segmentArray[k][i]));
segments.add(s);
previous += segmentArray[k][i];
}
vsegmenter.filterSegmentation(segments, track, result, 5000);
assertEquals("segment merging yields wrong number of segments", prediction[k].length, result.size());
previous = 0;
for (int i = 0; i < prediction[k].length; i++) {
String message = "segment " + i + " in set " + k + " has the wrong start time.";
String message1 = "segment " + i + " in set " + k + " has the wrong duration.";
assertEquals(message, previous, result.get(i).getMediaTime().getMediaTimePoint().getTimeInMilliseconds());
assertEquals(message1, prediction[k][i], result.get(i).getMediaTime().getMediaDuration().getDurationInMilliseconds());
previous += prediction[k][i];
}
}
}
use of org.opencastproject.metadata.mpeg7.VideoSegment in project opencast by opencast.
the class TextAnalyzerServiceImpl method extract.
/**
* Starts text extraction on the image and returns a receipt containing the final result in the form of an
* Mpeg7Catalog.
*
* @param image
* the element to analyze
* @param block
* <code>true</code> to make this operation synchronous
* @return a receipt containing the resulting mpeg-7 catalog
* @throws TextAnalyzerException
*/
private Catalog extract(Job job, Attachment image) throws TextAnalyzerException, MediaPackageException {
final Attachment attachment = image;
final URI imageUrl = attachment.getURI();
File imageFile = null;
try {
Mpeg7CatalogImpl mpeg7 = Mpeg7CatalogImpl.newInstance();
logger.info("Starting text extraction from {}", imageUrl);
try {
imageFile = workspace.get(imageUrl);
} catch (NotFoundException e) {
throw new TextAnalyzerException("Image " + imageUrl + " not found in workspace", e);
} catch (IOException e) {
throw new TextAnalyzerException("Unable to access " + imageUrl + " in workspace", e);
}
VideoText[] videoTexts = analyze(imageFile, image.getIdentifier());
// Create a temporal decomposition
MediaTime mediaTime = new MediaTimeImpl(0, 0);
Video avContent = mpeg7.addVideoContent(image.getIdentifier(), mediaTime, null);
TemporalDecomposition<VideoSegment> temporalDecomposition = (TemporalDecomposition<VideoSegment>) avContent.getTemporalDecomposition();
// Add a segment
VideoSegment videoSegment = temporalDecomposition.createSegment("segment-0");
videoSegment.setMediaTime(mediaTime);
// Add the video text to the spacio temporal decomposition of the segment
SpatioTemporalDecomposition spatioTemporalDecomposition = videoSegment.createSpatioTemporalDecomposition(true, false);
for (VideoText videoText : videoTexts) {
spatioTemporalDecomposition.addVideoText(videoText);
}
logger.info("Text extraction of {} finished, {} lines found", attachment.getURI(), videoTexts.length);
URI uri;
InputStream in;
try {
in = mpeg7CatalogService.serialize(mpeg7);
} catch (IOException e) {
throw new TextAnalyzerException("Error serializing mpeg7", e);
}
try {
uri = workspace.putInCollection(COLLECTION_ID, job.getId() + ".xml", in);
} catch (IOException e) {
throw new TextAnalyzerException("Unable to put mpeg7 into the workspace", e);
}
Catalog catalog = (Catalog) MediaPackageElementBuilderFactory.newInstance().newElementBuilder().newElement(Catalog.TYPE, MediaPackageElements.TEXTS);
catalog.setURI(uri);
logger.debug("Created MPEG7 catalog for {}", imageUrl);
return catalog;
} catch (Exception e) {
logger.warn("Error extracting text from " + imageUrl, e);
if (e instanceof TextAnalyzerException) {
throw (TextAnalyzerException) e;
} else {
throw new TextAnalyzerException(e);
}
} finally {
try {
workspace.delete(imageUrl);
} catch (Exception e) {
logger.warn("Unable to delete temporary text analysis image {}: {}", imageUrl, e);
}
}
}
use of org.opencastproject.metadata.mpeg7.VideoSegment in project java-docs-samples by GoogleCloudPlatform.
the class Detect method analyzeShots.
/**
* Performs shot analysis on the video at the provided Cloud Storage path.
*
* @param gcsUri the path to the video file to analyze.
*/
public static void analyzeShots(String gcsUri) throws Exception {
// Instantiate a com.google.cloud.videointelligence.v1.VideoIntelligenceServiceClient
try (VideoIntelligenceServiceClient client = VideoIntelligenceServiceClient.create()) {
// Provide path to file hosted on GCS as "gs://bucket-name/..."
AnnotateVideoRequest request = AnnotateVideoRequest.newBuilder().setInputUri(gcsUri).addFeatures(Feature.SHOT_CHANGE_DETECTION).build();
// Create an operation that will contain the response when the operation completes.
OperationFuture<AnnotateVideoResponse, AnnotateVideoProgress> response = client.annotateVideoAsync(request);
System.out.println("Waiting for operation to complete...");
// Print detected shot changes and their location ranges in the analyzed video.
for (VideoAnnotationResults result : response.get().getAnnotationResultsList()) {
if (result.getShotAnnotationsCount() > 0) {
System.out.println("Shots: ");
for (VideoSegment segment : result.getShotAnnotationsList()) {
double startTime = segment.getStartTimeOffset().getSeconds() + segment.getStartTimeOffset().getNanos() / 1e9;
double endTime = segment.getEndTimeOffset().getSeconds() + segment.getEndTimeOffset().getNanos() / 1e9;
System.out.printf("Location: %.3f:%.3f\n", startTime, endTime);
}
} else {
System.out.println("No shot changes detected in " + gcsUri);
}
}
}
// [END detect_shots]
}
Aggregations