use of org.opencastproject.metadata.mpeg7.Mpeg7Catalog in project opencast by opencast.
the class VideoSegmenterTest method testAnalyzeSegmentMerging.
@Test
public void testAnalyzeSegmentMerging() {
Mpeg7CatalogService mpeg7catalogService = vsegmenter.mpeg7CatalogService;
MediaTime contentTime = new MediaRelTimeImpl(0, track.getDuration());
MediaLocator contentLocator = new MediaLocatorImpl(track.getURI());
Mpeg7Catalog mpeg7 = mpeg7catalogService.newInstance();
Video videoContent = mpeg7.addVideoContent("videosegment", contentTime, contentLocator);
LinkedList<Segment> segments;
LinkedList<Segment> result;
int segmentcount = 1;
track.setDuration(47000L);
// list of segment durations (starttimes can be calculated from those)
int[] segmentArray1 = { 3000, 2000, 8000, 3000, 1000, 6000, 3000, 2000, 4000, 11000, 2000, 2000 };
int[] segmentArray2 = { 1000, 2000, 8000, 3000, 1000, 6000, 3000, 2000, 4000, 11000, 2000, 4000 };
int[] segmentArray3 = { 1000, 2000, 4000, 3000, 1000, 2000, 3000, 2000, 4000, 1000, 2000, 4000 };
int[] segmentArray4 = { 6000, 7000, 13000, 9000, 8000, 11000, 5000, 16000 };
// predicted outcome of filtering the segmentation
int[] prediction1 = { 5000, 10000, 8000, 9000, 15000 };
int[] prediction2 = { 13000, 8000, 9000, 11000, 6000 };
int[] prediction3 = { 29000 };
int[] prediction4 = { 6000, 7000, 13000, 9000, 8000, 11000, 5000, 16000 };
// total duration of respective segment arrays
long duration1 = 47000L;
long duration2 = 47000L;
long duration3 = 29000L;
long duration4 = 75000L;
int[][] segmentArray = { segmentArray1, segmentArray2, segmentArray3, segmentArray4 };
int[][] prediction = { prediction1, prediction2, prediction3, prediction4 };
long[] durations = { duration1, duration2, duration3, duration4 };
// check for all test segmentations if "filterSegmentation" yields the expected result
for (int k = 0; k < segmentArray.length; k++) {
segments = new LinkedList<Segment>();
result = new LinkedList<Segment>();
track.setDuration(durations[k]);
int previous = 0;
for (int i = 0; i < segmentArray[k].length; i++) {
Segment s = videoContent.getTemporalDecomposition().createSegment("segment-" + segmentcount++);
s.setMediaTime(new MediaRelTimeImpl(previous, segmentArray[k][i]));
segments.add(s);
previous += segmentArray[k][i];
}
vsegmenter.filterSegmentation(segments, track, result, 5000);
assertEquals("segment merging yields wrong number of segments", prediction[k].length, result.size());
previous = 0;
for (int i = 0; i < prediction[k].length; i++) {
String message = "segment " + i + " in set " + k + " has the wrong start time.";
String message1 = "segment " + i + " in set " + k + " has the wrong duration.";
assertEquals(message, previous, result.get(i).getMediaTime().getMediaTimePoint().getTimeInMilliseconds());
assertEquals(message1, prediction[k][i], result.get(i).getMediaTime().getMediaDuration().getDurationInMilliseconds());
previous += prediction[k][i];
}
}
}
use of org.opencastproject.metadata.mpeg7.Mpeg7Catalog in project opencast by opencast.
the class TextAnalyzerServiceImpl method extract.
/**
* Starts text extraction on the image and returns a receipt containing the final result in the form of an
* Mpeg7Catalog.
*
* @param image
* the element to analyze
* @param block
* <code>true</code> to make this operation synchronous
* @return a receipt containing the resulting mpeg-7 catalog
* @throws TextAnalyzerException
*/
private Catalog extract(Job job, Attachment image) throws TextAnalyzerException, MediaPackageException {
final Attachment attachment = image;
final URI imageUrl = attachment.getURI();
File imageFile = null;
try {
Mpeg7CatalogImpl mpeg7 = Mpeg7CatalogImpl.newInstance();
logger.info("Starting text extraction from {}", imageUrl);
try {
imageFile = workspace.get(imageUrl);
} catch (NotFoundException e) {
throw new TextAnalyzerException("Image " + imageUrl + " not found in workspace", e);
} catch (IOException e) {
throw new TextAnalyzerException("Unable to access " + imageUrl + " in workspace", e);
}
VideoText[] videoTexts = analyze(imageFile, image.getIdentifier());
// Create a temporal decomposition
MediaTime mediaTime = new MediaTimeImpl(0, 0);
Video avContent = mpeg7.addVideoContent(image.getIdentifier(), mediaTime, null);
TemporalDecomposition<VideoSegment> temporalDecomposition = (TemporalDecomposition<VideoSegment>) avContent.getTemporalDecomposition();
// Add a segment
VideoSegment videoSegment = temporalDecomposition.createSegment("segment-0");
videoSegment.setMediaTime(mediaTime);
// Add the video text to the spacio temporal decomposition of the segment
SpatioTemporalDecomposition spatioTemporalDecomposition = videoSegment.createSpatioTemporalDecomposition(true, false);
for (VideoText videoText : videoTexts) {
spatioTemporalDecomposition.addVideoText(videoText);
}
logger.info("Text extraction of {} finished, {} lines found", attachment.getURI(), videoTexts.length);
URI uri;
InputStream in;
try {
in = mpeg7CatalogService.serialize(mpeg7);
} catch (IOException e) {
throw new TextAnalyzerException("Error serializing mpeg7", e);
}
try {
uri = workspace.putInCollection(COLLECTION_ID, job.getId() + ".xml", in);
} catch (IOException e) {
throw new TextAnalyzerException("Unable to put mpeg7 into the workspace", e);
}
Catalog catalog = (Catalog) MediaPackageElementBuilderFactory.newInstance().newElementBuilder().newElement(Catalog.TYPE, MediaPackageElements.TEXTS);
catalog.setURI(uri);
logger.debug("Created MPEG7 catalog for {}", imageUrl);
return catalog;
} catch (Exception e) {
logger.warn("Error extracting text from " + imageUrl, e);
if (e instanceof TextAnalyzerException) {
throw (TextAnalyzerException) e;
} else {
throw new TextAnalyzerException(e);
}
} finally {
try {
workspace.delete(imageUrl);
} catch (Exception e) {
logger.warn("Unable to delete temporary text analysis image {}: {}", imageUrl, e);
}
}
}
use of org.opencastproject.metadata.mpeg7.Mpeg7Catalog in project opencast by opencast.
the class Mpeg7CaptionConverter method exportCaption.
@Override
public void exportCaption(OutputStream outputStream, List<Caption> captions, String language) throws IOException {
Mpeg7Catalog mpeg7 = Mpeg7CatalogImpl.newInstance();
MediaTime mediaTime = new MediaTimeImpl(0, 0);
Audio audioContent = mpeg7.addAudioContent("captions", mediaTime, null);
@SuppressWarnings("unchecked") TemporalDecomposition<AudioSegment> captionDecomposition = (TemporalDecomposition<AudioSegment>) audioContent.getTemporalDecomposition();
int segmentCount = 0;
for (Caption caption : captions) {
// Get all the words/parts for the transcript
String[] words = caption.getCaption();
if (words.length == 0)
continue;
// Create a new segment
AudioSegment segment = captionDecomposition.createSegment("segment-" + segmentCount++);
Time captionST = caption.getStartTime();
Time captionET = caption.getStopTime();
// Calculate start time
Calendar startTime = Calendar.getInstance(TimeZone.getTimeZone("UTC"));
startTime.setTimeInMillis(0);
startTime.add(Calendar.HOUR_OF_DAY, captionST.getHours());
startTime.add(Calendar.MINUTE, captionST.getMinutes());
startTime.add(Calendar.SECOND, captionST.getSeconds());
startTime.add(Calendar.MILLISECOND, captionST.getMilliseconds());
// Calculate end time
Calendar endTime = Calendar.getInstance(TimeZone.getTimeZone("UTC"));
endTime.setTimeInMillis(0);
endTime.add(Calendar.HOUR_OF_DAY, captionET.getHours());
endTime.add(Calendar.MINUTE, captionET.getMinutes());
endTime.add(Calendar.SECOND, captionET.getSeconds());
endTime.add(Calendar.MILLISECOND, captionET.getMilliseconds());
long startTimeInMillis = startTime.getTimeInMillis();
long endTimeInMillis = endTime.getTimeInMillis();
long duration = endTimeInMillis - startTimeInMillis;
segment.setMediaTime(new MediaTimeImpl(startTimeInMillis, duration));
TextAnnotation textAnnotation = segment.createTextAnnotation(0, 0, language);
// Collect all the words in the segment
StringBuffer captionLine = new StringBuffer();
// Add each words/parts as segment to the catalog
for (String word : words) {
if (captionLine.length() > 0)
captionLine.append(' ');
captionLine.append(word);
}
// Append the text to the annotation
textAnnotation.addFreeTextAnnotation(new FreeTextAnnotationImpl(captionLine.toString()));
}
Transformer tf = null;
try {
tf = TransformerFactory.newInstance().newTransformer();
DOMSource xmlSource = new DOMSource(mpeg7.toXml());
tf.transform(xmlSource, new StreamResult(outputStream));
} catch (TransformerConfigurationException e) {
logger.warn("Error serializing mpeg7 captions catalog: {}", e.getMessage());
throw new IOException(e);
} catch (TransformerFactoryConfigurationError e) {
logger.warn("Error serializing mpeg7 captions catalog: {}", e.getMessage());
throw new IOException(e);
} catch (TransformerException e) {
logger.warn("Error serializing mpeg7 captions catalog: {}", e.getMessage());
throw new IOException(e);
} catch (ParserConfigurationException e) {
logger.warn("Error serializing mpeg7 captions catalog: {}", e.getMessage());
throw new IOException(e);
}
}
use of org.opencastproject.metadata.mpeg7.Mpeg7Catalog in project opencast by opencast.
the class TextAnalysisWorkflowOperationHandler method extractVideoText.
/**
* Runs the text analysis service on each of the video segments found.
*
* @param mediaPackage
* the original mediapackage
* @param operation
* the workflow operation
* @throws ExecutionException
* @throws InterruptedException
* @throws NotFoundException
* @throws WorkflowOperationException
*/
protected WorkflowOperationResult extractVideoText(final MediaPackage mediaPackage, WorkflowOperationInstance operation) throws EncoderException, InterruptedException, ExecutionException, IOException, NotFoundException, MediaPackageException, TextAnalyzerException, WorkflowOperationException, ServiceRegistryException {
long totalTimeInQueue = 0;
List<String> sourceTagSet = asList(operation.getConfiguration("source-tags"));
List<String> targetTagSet = asList(operation.getConfiguration("target-tags"));
// Select the catalogs according to the tags
Map<Catalog, Mpeg7Catalog> catalogs = loadSegmentCatalogs(mediaPackage, operation);
// Was there at least one matching catalog
if (catalogs.size() == 0) {
logger.debug("Mediapackage {} has no suitable mpeg-7 catalogs based on tags {} to to run text analysis", mediaPackage, sourceTagSet);
return createResult(mediaPackage, Action.CONTINUE);
}
// Loop over all existing segment catalogs
for (Entry<Catalog, Mpeg7Catalog> mapEntry : catalogs.entrySet()) {
Map<VideoSegment, Job> jobs = new HashMap<VideoSegment, Job>();
List<Attachment> images = new LinkedList<Attachment>();
Catalog segmentCatalog = mapEntry.getKey();
try {
MediaPackageReference catalogRef = segmentCatalog.getReference();
// Make sure we can figure out the source track
if (catalogRef == null) {
logger.info("Skipping catalog {} since we can't determine the source track", segmentCatalog);
} else if (mediaPackage.getElementByReference(catalogRef) == null) {
logger.info("Skipping catalog {} since we can't determine the source track", segmentCatalog);
} else if (!(mediaPackage.getElementByReference(catalogRef) instanceof Track)) {
logger.info("Skipping catalog {} since it's source was not a track", segmentCatalog);
}
logger.info("Analyzing mpeg-7 segments catalog {} for text", segmentCatalog);
// Create a copy that will contain the segments enriched with the video text elements
Mpeg7Catalog textCatalog = mapEntry.getValue().clone();
Track sourceTrack = mediaPackage.getTrack(catalogRef.getIdentifier());
// Load the temporal decomposition (segments)
Video videoContent = textCatalog.videoContent().next();
TemporalDecomposition<? extends Segment> decomposition = videoContent.getTemporalDecomposition();
Iterator<? extends Segment> segmentIterator = decomposition.segments();
// For every segment, try to find the still image and run text analysis on it
List<VideoSegment> videoSegments = new LinkedList<VideoSegment>();
while (segmentIterator.hasNext()) {
Segment segment = segmentIterator.next();
if ((segment instanceof VideoSegment))
videoSegments.add((VideoSegment) segment);
}
// argument array for image extraction
long[] times = new long[videoSegments.size()];
for (int i = 0; i < videoSegments.size(); i++) {
VideoSegment videoSegment = videoSegments.get(i);
MediaTimePoint segmentTimePoint = videoSegment.getMediaTime().getMediaTimePoint();
MediaDuration segmentDuration = videoSegment.getMediaTime().getMediaDuration();
// Choose a time
MediaPackageReference reference = null;
if (catalogRef == null)
reference = new MediaPackageReferenceImpl();
else
reference = new MediaPackageReferenceImpl(catalogRef.getType(), catalogRef.getIdentifier());
reference.setProperty("time", segmentTimePoint.toString());
// Have the time for ocr image created. To circumvent problems with slowly building slides, we take the image
// that is
// almost at the end of the segment, it should contain the most content and is stable as well.
long startTimeSeconds = segmentTimePoint.getTimeInMilliseconds() / 1000;
long durationSeconds = segmentDuration.getDurationInMilliseconds() / 1000;
times[i] = Math.max(startTimeSeconds + durationSeconds - stabilityThreshold + 1, 0);
}
// Have the ocr image(s) created.
// TODO: Note that the way of having one image extracted after the other is suited for
// the ffmpeg-based encoder. When switching to other encoding engines such as gstreamer, it might be preferable
// to pass in all timepoints to the image extraction method at once.
SortedMap<Long, Job> extractImageJobs = new TreeMap<Long, Job>();
try {
for (long time : times) {
extractImageJobs.put(time, composer.image(sourceTrack, IMAGE_EXTRACTION_PROFILE, time));
}
if (!waitForStatus(extractImageJobs.values().toArray(new Job[extractImageJobs.size()])).isSuccess())
throw new WorkflowOperationException("Extracting scene image from " + sourceTrack + " failed");
for (Map.Entry<Long, Job> entry : extractImageJobs.entrySet()) {
Job job = serviceRegistry.getJob(entry.getValue().getId());
Attachment image = (Attachment) MediaPackageElementParser.getFromXml(job.getPayload());
images.add(image);
totalTimeInQueue += job.getQueueTime();
}
} catch (EncoderException e) {
logger.error("Error creating still image(s) from {}", sourceTrack);
throw e;
}
// Run text extraction on each of the images
Iterator<VideoSegment> it = videoSegments.iterator();
for (MediaPackageElement element : images) {
Attachment image = (Attachment) element;
VideoSegment videoSegment = it.next();
jobs.put(videoSegment, analysisService.extract(image));
}
// Wait for all jobs to be finished
if (!waitForStatus(jobs.values().toArray(new Job[jobs.size()])).isSuccess()) {
throw new WorkflowOperationException("Text extraction failed on images from " + sourceTrack);
}
// Process the text extraction results
for (Map.Entry<VideoSegment, Job> entry : jobs.entrySet()) {
Job job = serviceRegistry.getJob(entry.getValue().getId());
totalTimeInQueue += job.getQueueTime();
VideoSegment videoSegment = entry.getKey();
MediaDuration segmentDuration = videoSegment.getMediaTime().getMediaDuration();
Catalog catalog = (Catalog) MediaPackageElementParser.getFromXml(job.getPayload());
if (catalog == null) {
logger.warn("Text analysis did not return a valid mpeg7 for segment {}", videoSegment);
continue;
}
Mpeg7Catalog videoTextCatalog = loadMpeg7Catalog(catalog);
if (videoTextCatalog == null)
throw new IllegalStateException("Text analysis service did not return a valid mpeg7");
// Add the spatiotemporal decompositions from the new catalog to the existing video segments
Iterator<Video> videoTextContents = videoTextCatalog.videoContent();
if (videoTextContents == null || !videoTextContents.hasNext()) {
logger.debug("Text analysis was not able to extract any text from {}", job.getArguments().get(0));
break;
}
try {
Video textVideoContent = videoTextContents.next();
VideoSegment textVideoSegment = (VideoSegment) textVideoContent.getTemporalDecomposition().segments().next();
VideoText[] videoTexts = textVideoSegment.getSpatioTemporalDecomposition().getVideoText();
SpatioTemporalDecomposition std = videoSegment.createSpatioTemporalDecomposition(true, false);
for (VideoText videoText : videoTexts) {
MediaTime mediaTime = new MediaTimeImpl(new MediaRelTimePointImpl(0), segmentDuration);
SpatioTemporalLocator locator = new SpatioTemporalLocatorImpl(mediaTime);
videoText.setSpatioTemporalLocator(locator);
std.addVideoText(videoText);
}
} catch (Exception e) {
logger.warn("The mpeg-7 structure returned by the text analyzer is not what is expected", e);
continue;
}
}
// Put the catalog into the workspace and add it to the media package
MediaPackageElementBuilder builder = MediaPackageElementBuilderFactory.newInstance().newElementBuilder();
Catalog catalog = (Catalog) builder.newElement(MediaPackageElement.Type.Catalog, MediaPackageElements.TEXTS);
catalog.setIdentifier(null);
catalog.setReference(segmentCatalog.getReference());
// the catalog now has an ID, so we can store the file properly
mediaPackage.add(catalog);
InputStream in = mpeg7CatalogService.serialize(textCatalog);
String filename = "slidetext.xml";
URI workspaceURI = workspace.put(mediaPackage.getIdentifier().toString(), catalog.getIdentifier(), filename, in);
catalog.setURI(workspaceURI);
// Since we've enriched and stored the mpeg7 catalog, remove the original
try {
mediaPackage.remove(segmentCatalog);
workspace.delete(segmentCatalog.getURI());
} catch (Exception e) {
logger.warn("Unable to delete segment catalog {}: {}", segmentCatalog.getURI(), e);
}
// Add flavor and target tags
catalog.setFlavor(MediaPackageElements.TEXTS);
for (String tag : targetTagSet) {
catalog.addTag(tag);
}
} finally {
// Remove images that were created for text extraction
logger.debug("Removing temporary images");
for (Attachment image : images) {
try {
workspace.delete(image.getURI());
} catch (Exception e) {
logger.warn("Unable to delete temporary image {}: {}", image.getURI(), e);
}
}
// Remove the temporary text
for (Job j : jobs.values()) {
Catalog catalog = null;
try {
Job job = serviceRegistry.getJob(j.getId());
if (!Job.Status.FINISHED.equals(job.getStatus()))
continue;
catalog = (Catalog) MediaPackageElementParser.getFromXml(job.getPayload());
if (catalog != null)
workspace.delete(catalog.getURI());
} catch (Exception e) {
if (catalog != null) {
logger.warn("Unable to delete temporary text file {}: {}", catalog.getURI(), e);
} else {
logger.warn("Unable to parse textextraction payload of job {}", j.getId());
}
}
}
}
}
logger.debug("Text analysis completed");
return createResult(mediaPackage, Action.CONTINUE, totalTimeInQueue);
}
use of org.opencastproject.metadata.mpeg7.Mpeg7Catalog in project opencast by opencast.
the class TextAnalysisWorkflowOperationHandler method loadSegmentCatalogs.
/**
* Extracts the catalogs from the media package that match the requirements of flavor and tags specified in the
* operation handler.
*
* @param mediaPackage
* the media package
* @param operation
* the workflow operation
* @return a map of catalog elements and their mpeg-7 representations
* @throws IOException
* if there is a problem reading the mpeg7
*/
protected Map<Catalog, Mpeg7Catalog> loadSegmentCatalogs(MediaPackage mediaPackage, WorkflowOperationInstance operation) throws IOException {
HashMap<Catalog, Mpeg7Catalog> catalogs = new HashMap<Catalog, Mpeg7Catalog>();
String sourceFlavor = StringUtils.trimToNull(operation.getConfiguration("source-flavor"));
List<String> sourceTagSet = asList(operation.getConfiguration("source-tags"));
Catalog[] catalogsWithTags = mediaPackage.getCatalogsByTags(sourceTagSet);
for (Catalog mediaPackageCatalog : catalogsWithTags) {
if (!MediaPackageElements.SEGMENTS.equals(mediaPackageCatalog.getFlavor())) {
continue;
}
if (sourceFlavor != null) {
if (mediaPackageCatalog.getReference() == null)
continue;
Track t = mediaPackage.getTrack(mediaPackageCatalog.getReference().getIdentifier());
if (t == null || !t.getFlavor().matches(MediaPackageElementFlavor.parseFlavor(sourceFlavor)))
continue;
}
// Make sure the catalog features at least one of the required tags
if (!mediaPackageCatalog.containsTag(sourceTagSet))
continue;
Mpeg7Catalog mpeg7 = loadMpeg7Catalog(mediaPackageCatalog);
// Make sure there is video content
if (mpeg7.videoContent() == null || !mpeg7.videoContent().hasNext()) {
logger.debug("Mpeg-7 segments catalog {} does not contain any video content", mpeg7);
continue;
}
// Make sure there is a temporal decomposition
Video videoContent = mpeg7.videoContent().next();
TemporalDecomposition<? extends Segment> decomposition = videoContent.getTemporalDecomposition();
if (decomposition == null || !decomposition.hasSegments()) {
logger.debug("Mpeg-7 catalog {} does not contain a temporal decomposition", mpeg7);
continue;
}
catalogs.put(mediaPackageCatalog, mpeg7);
}
return catalogs;
}
Aggregations