Search in sources :

Example 66 with Catalog

use of org.opencastproject.mediapackage.Catalog in project opencast by opencast.

the class TextAnalysisWorkflowOperationHandler method extractVideoText.

/**
 * Runs the text analysis service on each of the video segments found.
 *
 * @param mediaPackage
 *          the original mediapackage
 * @param operation
 *          the workflow operation
 * @throws ExecutionException
 * @throws InterruptedException
 * @throws NotFoundException
 * @throws WorkflowOperationException
 */
protected WorkflowOperationResult extractVideoText(final MediaPackage mediaPackage, WorkflowOperationInstance operation) throws EncoderException, InterruptedException, ExecutionException, IOException, NotFoundException, MediaPackageException, TextAnalyzerException, WorkflowOperationException, ServiceRegistryException {
    long totalTimeInQueue = 0;
    List<String> sourceTagSet = asList(operation.getConfiguration("source-tags"));
    List<String> targetTagSet = asList(operation.getConfiguration("target-tags"));
    // Select the catalogs according to the tags
    Map<Catalog, Mpeg7Catalog> catalogs = loadSegmentCatalogs(mediaPackage, operation);
    // Was there at least one matching catalog
    if (catalogs.size() == 0) {
        logger.debug("Mediapackage {} has no suitable mpeg-7 catalogs based on tags {} to to run text analysis", mediaPackage, sourceTagSet);
        return createResult(mediaPackage, Action.CONTINUE);
    }
    // Loop over all existing segment catalogs
    for (Entry<Catalog, Mpeg7Catalog> mapEntry : catalogs.entrySet()) {
        Map<VideoSegment, Job> jobs = new HashMap<VideoSegment, Job>();
        List<Attachment> images = new LinkedList<Attachment>();
        Catalog segmentCatalog = mapEntry.getKey();
        try {
            MediaPackageReference catalogRef = segmentCatalog.getReference();
            // Make sure we can figure out the source track
            if (catalogRef == null) {
                logger.info("Skipping catalog {} since we can't determine the source track", segmentCatalog);
            } else if (mediaPackage.getElementByReference(catalogRef) == null) {
                logger.info("Skipping catalog {} since we can't determine the source track", segmentCatalog);
            } else if (!(mediaPackage.getElementByReference(catalogRef) instanceof Track)) {
                logger.info("Skipping catalog {} since it's source was not a track", segmentCatalog);
            }
            logger.info("Analyzing mpeg-7 segments catalog {} for text", segmentCatalog);
            // Create a copy that will contain the segments enriched with the video text elements
            Mpeg7Catalog textCatalog = mapEntry.getValue().clone();
            Track sourceTrack = mediaPackage.getTrack(catalogRef.getIdentifier());
            // Load the temporal decomposition (segments)
            Video videoContent = textCatalog.videoContent().next();
            TemporalDecomposition<? extends Segment> decomposition = videoContent.getTemporalDecomposition();
            Iterator<? extends Segment> segmentIterator = decomposition.segments();
            // For every segment, try to find the still image and run text analysis on it
            List<VideoSegment> videoSegments = new LinkedList<VideoSegment>();
            while (segmentIterator.hasNext()) {
                Segment segment = segmentIterator.next();
                if ((segment instanceof VideoSegment))
                    videoSegments.add((VideoSegment) segment);
            }
            // argument array for image extraction
            long[] times = new long[videoSegments.size()];
            for (int i = 0; i < videoSegments.size(); i++) {
                VideoSegment videoSegment = videoSegments.get(i);
                MediaTimePoint segmentTimePoint = videoSegment.getMediaTime().getMediaTimePoint();
                MediaDuration segmentDuration = videoSegment.getMediaTime().getMediaDuration();
                // Choose a time
                MediaPackageReference reference = null;
                if (catalogRef == null)
                    reference = new MediaPackageReferenceImpl();
                else
                    reference = new MediaPackageReferenceImpl(catalogRef.getType(), catalogRef.getIdentifier());
                reference.setProperty("time", segmentTimePoint.toString());
                // Have the time for ocr image created. To circumvent problems with slowly building slides, we take the image
                // that is
                // almost at the end of the segment, it should contain the most content and is stable as well.
                long startTimeSeconds = segmentTimePoint.getTimeInMilliseconds() / 1000;
                long durationSeconds = segmentDuration.getDurationInMilliseconds() / 1000;
                times[i] = Math.max(startTimeSeconds + durationSeconds - stabilityThreshold + 1, 0);
            }
            // Have the ocr image(s) created.
            // TODO: Note that the way of having one image extracted after the other is suited for
            // the ffmpeg-based encoder. When switching to other encoding engines such as gstreamer, it might be preferable
            // to pass in all timepoints to the image extraction method at once.
            SortedMap<Long, Job> extractImageJobs = new TreeMap<Long, Job>();
            try {
                for (long time : times) {
                    extractImageJobs.put(time, composer.image(sourceTrack, IMAGE_EXTRACTION_PROFILE, time));
                }
                if (!waitForStatus(extractImageJobs.values().toArray(new Job[extractImageJobs.size()])).isSuccess())
                    throw new WorkflowOperationException("Extracting scene image from " + sourceTrack + " failed");
                for (Map.Entry<Long, Job> entry : extractImageJobs.entrySet()) {
                    Job job = serviceRegistry.getJob(entry.getValue().getId());
                    Attachment image = (Attachment) MediaPackageElementParser.getFromXml(job.getPayload());
                    images.add(image);
                    totalTimeInQueue += job.getQueueTime();
                }
            } catch (EncoderException e) {
                logger.error("Error creating still image(s) from {}", sourceTrack);
                throw e;
            }
            // Run text extraction on each of the images
            Iterator<VideoSegment> it = videoSegments.iterator();
            for (MediaPackageElement element : images) {
                Attachment image = (Attachment) element;
                VideoSegment videoSegment = it.next();
                jobs.put(videoSegment, analysisService.extract(image));
            }
            // Wait for all jobs to be finished
            if (!waitForStatus(jobs.values().toArray(new Job[jobs.size()])).isSuccess()) {
                throw new WorkflowOperationException("Text extraction failed on images from " + sourceTrack);
            }
            // Process the text extraction results
            for (Map.Entry<VideoSegment, Job> entry : jobs.entrySet()) {
                Job job = serviceRegistry.getJob(entry.getValue().getId());
                totalTimeInQueue += job.getQueueTime();
                VideoSegment videoSegment = entry.getKey();
                MediaDuration segmentDuration = videoSegment.getMediaTime().getMediaDuration();
                Catalog catalog = (Catalog) MediaPackageElementParser.getFromXml(job.getPayload());
                if (catalog == null) {
                    logger.warn("Text analysis did not return a valid mpeg7 for segment {}", videoSegment);
                    continue;
                }
                Mpeg7Catalog videoTextCatalog = loadMpeg7Catalog(catalog);
                if (videoTextCatalog == null)
                    throw new IllegalStateException("Text analysis service did not return a valid mpeg7");
                // Add the spatiotemporal decompositions from the new catalog to the existing video segments
                Iterator<Video> videoTextContents = videoTextCatalog.videoContent();
                if (videoTextContents == null || !videoTextContents.hasNext()) {
                    logger.debug("Text analysis was not able to extract any text from {}", job.getArguments().get(0));
                    break;
                }
                try {
                    Video textVideoContent = videoTextContents.next();
                    VideoSegment textVideoSegment = (VideoSegment) textVideoContent.getTemporalDecomposition().segments().next();
                    VideoText[] videoTexts = textVideoSegment.getSpatioTemporalDecomposition().getVideoText();
                    SpatioTemporalDecomposition std = videoSegment.createSpatioTemporalDecomposition(true, false);
                    for (VideoText videoText : videoTexts) {
                        MediaTime mediaTime = new MediaTimeImpl(new MediaRelTimePointImpl(0), segmentDuration);
                        SpatioTemporalLocator locator = new SpatioTemporalLocatorImpl(mediaTime);
                        videoText.setSpatioTemporalLocator(locator);
                        std.addVideoText(videoText);
                    }
                } catch (Exception e) {
                    logger.warn("The mpeg-7 structure returned by the text analyzer is not what is expected", e);
                    continue;
                }
            }
            // Put the catalog into the workspace and add it to the media package
            MediaPackageElementBuilder builder = MediaPackageElementBuilderFactory.newInstance().newElementBuilder();
            Catalog catalog = (Catalog) builder.newElement(MediaPackageElement.Type.Catalog, MediaPackageElements.TEXTS);
            catalog.setIdentifier(null);
            catalog.setReference(segmentCatalog.getReference());
            // the catalog now has an ID, so we can store the file properly
            mediaPackage.add(catalog);
            InputStream in = mpeg7CatalogService.serialize(textCatalog);
            String filename = "slidetext.xml";
            URI workspaceURI = workspace.put(mediaPackage.getIdentifier().toString(), catalog.getIdentifier(), filename, in);
            catalog.setURI(workspaceURI);
            // Since we've enriched and stored the mpeg7 catalog, remove the original
            try {
                mediaPackage.remove(segmentCatalog);
                workspace.delete(segmentCatalog.getURI());
            } catch (Exception e) {
                logger.warn("Unable to delete segment catalog {}: {}", segmentCatalog.getURI(), e);
            }
            // Add flavor and target tags
            catalog.setFlavor(MediaPackageElements.TEXTS);
            for (String tag : targetTagSet) {
                catalog.addTag(tag);
            }
        } finally {
            // Remove images that were created for text extraction
            logger.debug("Removing temporary images");
            for (Attachment image : images) {
                try {
                    workspace.delete(image.getURI());
                } catch (Exception e) {
                    logger.warn("Unable to delete temporary image {}: {}", image.getURI(), e);
                }
            }
            // Remove the temporary text
            for (Job j : jobs.values()) {
                Catalog catalog = null;
                try {
                    Job job = serviceRegistry.getJob(j.getId());
                    if (!Job.Status.FINISHED.equals(job.getStatus()))
                        continue;
                    catalog = (Catalog) MediaPackageElementParser.getFromXml(job.getPayload());
                    if (catalog != null)
                        workspace.delete(catalog.getURI());
                } catch (Exception e) {
                    if (catalog != null) {
                        logger.warn("Unable to delete temporary text file {}: {}", catalog.getURI(), e);
                    } else {
                        logger.warn("Unable to parse textextraction payload of job {}", j.getId());
                    }
                }
            }
        }
    }
    logger.debug("Text analysis completed");
    return createResult(mediaPackage, Action.CONTINUE, totalTimeInQueue);
}
Also used : HashMap(java.util.HashMap) Attachment(org.opencastproject.mediapackage.Attachment) MediaPackageElementBuilder(org.opencastproject.mediapackage.MediaPackageElementBuilder) MediaTimeImpl(org.opencastproject.metadata.mpeg7.MediaTimeImpl) MediaRelTimePointImpl(org.opencastproject.metadata.mpeg7.MediaRelTimePointImpl) MediaPackageElement(org.opencastproject.mediapackage.MediaPackageElement) Job(org.opencastproject.job.api.Job) Catalog(org.opencastproject.mediapackage.Catalog) Mpeg7Catalog(org.opencastproject.metadata.mpeg7.Mpeg7Catalog) LinkedList(java.util.LinkedList) Mpeg7Catalog(org.opencastproject.metadata.mpeg7.Mpeg7Catalog) SpatioTemporalLocator(org.opencastproject.metadata.mpeg7.SpatioTemporalLocator) Video(org.opencastproject.metadata.mpeg7.Video) SpatioTemporalLocatorImpl(org.opencastproject.metadata.mpeg7.SpatioTemporalLocatorImpl) MediaTime(org.opencastproject.metadata.mpeg7.MediaTime) MediaPackageReferenceImpl(org.opencastproject.mediapackage.MediaPackageReferenceImpl) Map(java.util.Map) SortedMap(java.util.SortedMap) HashMap(java.util.HashMap) TreeMap(java.util.TreeMap) VideoText(org.opencastproject.metadata.mpeg7.VideoText) URI(java.net.URI) Segment(org.opencastproject.metadata.mpeg7.Segment) VideoSegment(org.opencastproject.metadata.mpeg7.VideoSegment) WorkflowOperationException(org.opencastproject.workflow.api.WorkflowOperationException) SpatioTemporalDecomposition(org.opencastproject.metadata.mpeg7.SpatioTemporalDecomposition) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) MediaTimePoint(org.opencastproject.metadata.mpeg7.MediaTimePoint) TreeMap(java.util.TreeMap) MediaTimePoint(org.opencastproject.metadata.mpeg7.MediaTimePoint) ConfigurationException(org.osgi.service.cm.ConfigurationException) ServiceRegistryException(org.opencastproject.serviceregistry.api.ServiceRegistryException) WorkflowOperationException(org.opencastproject.workflow.api.WorkflowOperationException) TextAnalyzerException(org.opencastproject.textanalyzer.api.TextAnalyzerException) MediaPackageException(org.opencastproject.mediapackage.MediaPackageException) NotFoundException(org.opencastproject.util.NotFoundException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) EncoderException(org.opencastproject.composer.api.EncoderException) MediaPackageReference(org.opencastproject.mediapackage.MediaPackageReference) EncoderException(org.opencastproject.composer.api.EncoderException) VideoSegment(org.opencastproject.metadata.mpeg7.VideoSegment) MediaDuration(org.opencastproject.metadata.mpeg7.MediaDuration) Track(org.opencastproject.mediapackage.Track)

Example 67 with Catalog

use of org.opencastproject.mediapackage.Catalog in project opencast by opencast.

the class TextAnalysisWorkflowOperationHandler method loadSegmentCatalogs.

/**
 * Extracts the catalogs from the media package that match the requirements of flavor and tags specified in the
 * operation handler.
 *
 * @param mediaPackage
 *          the media package
 * @param operation
 *          the workflow operation
 * @return a map of catalog elements and their mpeg-7 representations
 * @throws IOException
 *           if there is a problem reading the mpeg7
 */
protected Map<Catalog, Mpeg7Catalog> loadSegmentCatalogs(MediaPackage mediaPackage, WorkflowOperationInstance operation) throws IOException {
    HashMap<Catalog, Mpeg7Catalog> catalogs = new HashMap<Catalog, Mpeg7Catalog>();
    String sourceFlavor = StringUtils.trimToNull(operation.getConfiguration("source-flavor"));
    List<String> sourceTagSet = asList(operation.getConfiguration("source-tags"));
    Catalog[] catalogsWithTags = mediaPackage.getCatalogsByTags(sourceTagSet);
    for (Catalog mediaPackageCatalog : catalogsWithTags) {
        if (!MediaPackageElements.SEGMENTS.equals(mediaPackageCatalog.getFlavor())) {
            continue;
        }
        if (sourceFlavor != null) {
            if (mediaPackageCatalog.getReference() == null)
                continue;
            Track t = mediaPackage.getTrack(mediaPackageCatalog.getReference().getIdentifier());
            if (t == null || !t.getFlavor().matches(MediaPackageElementFlavor.parseFlavor(sourceFlavor)))
                continue;
        }
        // Make sure the catalog features at least one of the required tags
        if (!mediaPackageCatalog.containsTag(sourceTagSet))
            continue;
        Mpeg7Catalog mpeg7 = loadMpeg7Catalog(mediaPackageCatalog);
        // Make sure there is video content
        if (mpeg7.videoContent() == null || !mpeg7.videoContent().hasNext()) {
            logger.debug("Mpeg-7 segments catalog {} does not contain any video content", mpeg7);
            continue;
        }
        // Make sure there is a temporal decomposition
        Video videoContent = mpeg7.videoContent().next();
        TemporalDecomposition<? extends Segment> decomposition = videoContent.getTemporalDecomposition();
        if (decomposition == null || !decomposition.hasSegments()) {
            logger.debug("Mpeg-7 catalog {} does not contain a temporal decomposition", mpeg7);
            continue;
        }
        catalogs.put(mediaPackageCatalog, mpeg7);
    }
    return catalogs;
}
Also used : Mpeg7Catalog(org.opencastproject.metadata.mpeg7.Mpeg7Catalog) HashMap(java.util.HashMap) Video(org.opencastproject.metadata.mpeg7.Video) Catalog(org.opencastproject.mediapackage.Catalog) Mpeg7Catalog(org.opencastproject.metadata.mpeg7.Mpeg7Catalog) Track(org.opencastproject.mediapackage.Track)

Example 68 with Catalog

use of org.opencastproject.mediapackage.Catalog in project opencast by opencast.

the class SchedulerServiceImpl method updateDublincCoreCatalog.

/**
 * @param mp
 *          the mediapackage to update
 * @param dc
 *          the dublincore metadata to use to update the mediapackage
 * @return the updated mediapackage
 * @throws IOException
 *           Thrown if an IO error occurred adding the dc catalog file
 * @throws MediaPackageException
 *           Thrown if an error occurred updating the mediapackage or the mediapackage does not contain a catalog
 */
private MediaPackage updateDublincCoreCatalog(MediaPackage mp, DublinCoreCatalog dc) throws IOException, MediaPackageException {
    try (InputStream inputStream = IOUtils.toInputStream(dc.toXmlString(), "UTF-8")) {
        // Update dublincore catalog
        Catalog[] catalogs = mp.getCatalogs(MediaPackageElements.EPISODE);
        if (catalogs.length > 0) {
            Catalog catalog = catalogs[0];
            URI uri = workspace.put(mp.getIdentifier().toString(), catalog.getIdentifier(), "dublincore.xml", inputStream);
            catalog.setURI(uri);
            // setting the URI to a new source so the checksum will most like be invalid
            catalog.setChecksum(null);
        } else {
            throw new MediaPackageException("Unable to find catalog");
        }
    }
    return mp;
}
Also used : MediaPackageException(org.opencastproject.mediapackage.MediaPackageException) InputStream(java.io.InputStream) URI(java.net.URI) DublinCoreCatalog(org.opencastproject.metadata.dublincore.DublinCoreCatalog) Catalog(org.opencastproject.mediapackage.Catalog)

Example 69 with Catalog

use of org.opencastproject.mediapackage.Catalog in project opencast by opencast.

the class TagByDublinCoreTermWOHTest method testMisMatchDefaultDCTerm.

@Test
public void testMisMatchDefaultDCTerm() throws Exception {
    // Match != Default Value
    operation.setConfiguration(TagByDublinCoreTermWOH.SOURCE_FLAVORS_PROPERTY, "dublincore/*");
    operation.setConfiguration(TagByDublinCoreTermWOH.DCCATALOG_PROPERTY, "episode");
    operation.setConfiguration(TagByDublinCoreTermWOH.DCTERM_PROPERTY, "source");
    operation.setConfiguration(TagByDublinCoreTermWOH.DEFAULT_VALUE_PROPERTY, "Cairo");
    operation.setConfiguration(TagByDublinCoreTermWOH.MATCH_VALUE_PROPERTY, "Timbuktu");
    operation.setConfiguration(TagByDublinCoreTermWOH.TARGET_TAGS_PROPERTY, "tag1,tag2");
    operation.setConfiguration(TagByDublinCoreTermWOH.COPY_PROPERTY, "false");
    WorkflowOperationResult result = operationHandler.start(instance, null);
    MediaPackage resultingMediapackage = result.getMediaPackage();
    Catalog catalog = resultingMediapackage.getCatalog("catalog-1");
    Assert.assertEquals(1, catalog.getTags().length);
    Assert.assertEquals("archive", catalog.getTags()[0]);
}
Also used : MediaPackage(org.opencastproject.mediapackage.MediaPackage) WorkflowOperationResult(org.opencastproject.workflow.api.WorkflowOperationResult) Catalog(org.opencastproject.mediapackage.Catalog) Test(org.junit.Test)

Example 70 with Catalog

use of org.opencastproject.mediapackage.Catalog in project opencast by opencast.

the class TagByDublinCoreTermWOHTest method testMissingNoDefaultDCTerm.

@Test
public void testMissingNoDefaultDCTerm() throws Exception {
    // No Default Value
    operation.setConfiguration(TagByDublinCoreTermWOH.SOURCE_FLAVORS_PROPERTY, "dublincore/*");
    operation.setConfiguration(TagByDublinCoreTermWOH.DCCATALOG_PROPERTY, "episode");
    operation.setConfiguration(TagByDublinCoreTermWOH.DCTERM_PROPERTY, "source");
    operation.setConfiguration(TagByDublinCoreTermWOH.MATCH_VALUE_PROPERTY, "Timbuktu");
    operation.setConfiguration(TagByDublinCoreTermWOH.TARGET_TAGS_PROPERTY, "tag1,tag2");
    operation.setConfiguration(TagByDublinCoreTermWOH.COPY_PROPERTY, "false");
    WorkflowOperationResult result = operationHandler.start(instance, null);
    MediaPackage resultingMediapackage = result.getMediaPackage();
    Catalog catalog = resultingMediapackage.getCatalog("catalog-1");
    Assert.assertEquals(1, catalog.getTags().length);
    Assert.assertEquals("archive", catalog.getTags()[0]);
}
Also used : MediaPackage(org.opencastproject.mediapackage.MediaPackage) WorkflowOperationResult(org.opencastproject.workflow.api.WorkflowOperationResult) Catalog(org.opencastproject.mediapackage.Catalog) Test(org.junit.Test)

Aggregations

Catalog (org.opencastproject.mediapackage.Catalog)77 MediaPackage (org.opencastproject.mediapackage.MediaPackage)35 DublinCoreCatalog (org.opencastproject.metadata.dublincore.DublinCoreCatalog)34 URI (java.net.URI)32 Test (org.junit.Test)24 InputStream (java.io.InputStream)21 IOException (java.io.IOException)18 Job (org.opencastproject.job.api.Job)17 NotFoundException (org.opencastproject.util.NotFoundException)17 MediaPackageElementFlavor (org.opencastproject.mediapackage.MediaPackageElementFlavor)16 Track (org.opencastproject.mediapackage.Track)15 Attachment (org.opencastproject.mediapackage.Attachment)14 MediaPackageException (org.opencastproject.mediapackage.MediaPackageException)11 Mpeg7Catalog (org.opencastproject.metadata.mpeg7.Mpeg7Catalog)11 WorkflowOperationException (org.opencastproject.workflow.api.WorkflowOperationException)11 WorkflowOperationResult (org.opencastproject.workflow.api.WorkflowOperationResult)10 File (java.io.File)9 MediaPackageElement (org.opencastproject.mediapackage.MediaPackageElement)9 ArrayList (java.util.ArrayList)8 WorkflowOperationInstance (org.opencastproject.workflow.api.WorkflowOperationInstance)8