Search in sources :

Example 11 with VideoSegment

use of com.google.cloud.videointelligence.v1p2beta1.VideoSegment in project java-video-intelligence by googleapis.

the class DetectPersonGcs method detectPersonGcs.

// Detects people in a video stored in Google Cloud Storage using
// the Cloud Video Intelligence API.
public static void detectPersonGcs(String gcsUri) throws Exception {
    try (VideoIntelligenceServiceClient videoIntelligenceServiceClient = VideoIntelligenceServiceClient.create()) {
        // Reads a local video file and converts it to base64.
        PersonDetectionConfig personDetectionConfig = PersonDetectionConfig.newBuilder().setIncludeBoundingBoxes(true).setIncludePoseLandmarks(true).setIncludeAttributes(true).build();
        VideoContext videoContext = VideoContext.newBuilder().setPersonDetectionConfig(personDetectionConfig).build();
        AnnotateVideoRequest request = AnnotateVideoRequest.newBuilder().setInputUri(gcsUri).addFeatures(Feature.PERSON_DETECTION).setVideoContext(videoContext).build();
        // Detects people in a video
        OperationFuture<AnnotateVideoResponse, AnnotateVideoProgress> future = videoIntelligenceServiceClient.annotateVideoAsync(request);
        System.out.println("Waiting for operation to complete...");
        AnnotateVideoResponse response = future.get();
        // Get the first response, since we sent only one video.
        VideoAnnotationResults annotationResult = response.getAnnotationResultsList().get(0);
        // Annotations for list of people detected, tracked and recognized in video.
        for (PersonDetectionAnnotation personDetectionAnnotation : annotationResult.getPersonDetectionAnnotationsList()) {
            System.out.print("Person detected:\n");
            for (Track track : personDetectionAnnotation.getTracksList()) {
                VideoSegment segment = track.getSegment();
                System.out.printf("\tStart: %d.%.0fs\n", segment.getStartTimeOffset().getSeconds(), segment.getStartTimeOffset().getNanos() / 1e6);
                System.out.printf("\tEnd: %d.%.0fs\n", segment.getEndTimeOffset().getSeconds(), segment.getEndTimeOffset().getNanos() / 1e6);
                // Each segment includes timestamped objects that include characteristic--e.g. clothes,
                // posture of the person detected.
                TimestampedObject firstTimestampedObject = track.getTimestampedObjects(0);
                // of the person detected.
                for (DetectedAttribute attribute : firstTimestampedObject.getAttributesList()) {
                    System.out.printf("\tAttribute: %s; Value: %s\n", attribute.getName(), attribute.getValue());
                }
                // Landmarks in person detection include body parts.
                for (DetectedLandmark attribute : firstTimestampedObject.getLandmarksList()) {
                    System.out.printf("\tLandmark: %s; Vertex: %f, %f\n", attribute.getName(), attribute.getPoint().getX(), attribute.getPoint().getY());
                }
            }
        }
    }
}
Also used : AnnotateVideoRequest(com.google.cloud.videointelligence.v1.AnnotateVideoRequest) VideoContext(com.google.cloud.videointelligence.v1.VideoContext) PersonDetectionAnnotation(com.google.cloud.videointelligence.v1.PersonDetectionAnnotation) PersonDetectionConfig(com.google.cloud.videointelligence.v1.PersonDetectionConfig) VideoIntelligenceServiceClient(com.google.cloud.videointelligence.v1.VideoIntelligenceServiceClient) AnnotateVideoProgress(com.google.cloud.videointelligence.v1.AnnotateVideoProgress) VideoSegment(com.google.cloud.videointelligence.v1.VideoSegment) DetectedLandmark(com.google.cloud.videointelligence.v1.DetectedLandmark) TimestampedObject(com.google.cloud.videointelligence.v1.TimestampedObject) VideoAnnotationResults(com.google.cloud.videointelligence.v1.VideoAnnotationResults) DetectedAttribute(com.google.cloud.videointelligence.v1.DetectedAttribute) Track(com.google.cloud.videointelligence.v1.Track) AnnotateVideoResponse(com.google.cloud.videointelligence.v1.AnnotateVideoResponse)

Example 12 with VideoSegment

use of com.google.cloud.videointelligence.v1p2beta1.VideoSegment in project opencast by opencast.

the class SolrIndexManager method addMpeg7Metadata.

/**
 * Add the mpeg 7 catalog data to the solr document.
 *
 * @param doc
 *          the input document to the solr index
 * @param mpeg7
 *          the mpeg7 catalog
 */
@SuppressWarnings("unchecked")
static void addMpeg7Metadata(SolrInputDocument doc, MediaPackage mediaPackage, Mpeg7Catalog mpeg7) {
    // Check for multimedia content
    if (!mpeg7.multimediaContent().hasNext()) {
        logger.warn("Mpeg-7 doesn't contain  multimedia content");
        return;
    }
    // Get the content duration by looking at the first content track. This
    // of course assumes that all tracks are equally long.
    MultimediaContent<? extends MultimediaContentType> mc = mpeg7.multimediaContent().next();
    MultimediaContentType mct = mc.elements().next();
    MediaTime mediaTime = mct.getMediaTime();
    Schema.setDcExtent(doc, mediaTime.getMediaDuration().getDurationInMilliseconds());
    // Check if the keywords have been filled by (manually) added dublin
    // core data. If not, look for the most relevant fields in mpeg-7.
    SortedSet<TextAnnotation> sortedAnnotations = null;
    if (!"".equals(Schema.getOcKeywords(doc))) {
        sortedAnnotations = new TreeSet<TextAnnotation>(new Comparator<TextAnnotation>() {

            @Override
            public int compare(TextAnnotation a1, TextAnnotation a2) {
                double v1 = RELEVANCE_BOOST * a1.getRelevance() + a1.getConfidence();
                double v2 = RELEVANCE_BOOST * a2.getRelevance() + a2.getConfidence();
                if (v1 > v2) {
                    return -1;
                } else if (v1 < v2) {
                    return 1;
                }
                return 0;
            }
        });
    }
    // Iterate over the tracks and extract keywords and hints
    Iterator<MultimediaContent<? extends MultimediaContentType>> mmIter = mpeg7.multimediaContent();
    int segmentCount = 0;
    while (mmIter.hasNext()) {
        MultimediaContent<?> multimediaContent = mmIter.next();
        // We need to process visual segments first, due to the way they are handled in the ui.
        for (Iterator<?> iterator = multimediaContent.elements(); iterator.hasNext(); ) {
            MultimediaContentType type = (MultimediaContentType) iterator.next();
            if (!(type instanceof Video) && !(type instanceof AudioVisual)) {
                continue;
            }
            // for every segment in the current multimedia content track
            Video video = (Video) type;
            Iterator<VideoSegment> vsegments = (Iterator<VideoSegment>) video.getTemporalDecomposition().segments();
            while (vsegments.hasNext()) {
                VideoSegment segment = vsegments.next();
                StringBuffer segmentText = new StringBuffer();
                StringBuffer hintField = new StringBuffer();
                // Collect the video text elements to a segment text
                SpatioTemporalDecomposition spt = segment.getSpatioTemporalDecomposition();
                if (spt != null) {
                    for (VideoText videoText : spt.getVideoText()) {
                        if (segmentText.length() > 0) {
                            segmentText.append(" ");
                        }
                        segmentText.append(videoText.getText().getText());
                    // TODO: Add hint on bounding box
                    }
                }
                // Add keyword annotations
                Iterator<TextAnnotation> textAnnotations = segment.textAnnotations();
                while (textAnnotations.hasNext()) {
                    TextAnnotation textAnnotation = textAnnotations.next();
                    Iterator<?> kwIter = textAnnotation.keywordAnnotations();
                    while (kwIter.hasNext()) {
                        KeywordAnnotation keywordAnnotation = (KeywordAnnotation) kwIter.next();
                        if (segmentText.length() > 0) {
                            segmentText.append(" ");
                        }
                        segmentText.append(keywordAnnotation.getKeyword());
                    }
                }
                // Add free text annotations
                Iterator<TextAnnotation> freeIter = segment.textAnnotations();
                if (freeIter.hasNext()) {
                    Iterator<FreeTextAnnotation> freeTextIter = freeIter.next().freeTextAnnotations();
                    while (freeTextIter.hasNext()) {
                        FreeTextAnnotation freeTextAnnotation = freeTextIter.next();
                        if (segmentText.length() > 0) {
                            segmentText.append(" ");
                        }
                        segmentText.append(freeTextAnnotation.getText());
                    }
                }
                // add segment text to solr document
                Schema.setSegmentText(doc, new DField<String>(segmentText.toString(), Integer.toString(segmentCount)));
                // get the segments time properties
                MediaTimePoint timepoint = segment.getMediaTime().getMediaTimePoint();
                MediaDuration duration = segment.getMediaTime().getMediaDuration();
                // TODO: define a class with hint field constants
                hintField.append("time=" + timepoint.getTimeInMilliseconds() + "\n");
                hintField.append("duration=" + duration.getDurationInMilliseconds() + "\n");
                // Look for preview images. Their characteristics are that they are
                // attached as attachments with a flavor of preview/<something>.
                String time = timepoint.toString();
                for (Attachment slide : mediaPackage.getAttachments(MediaPackageElements.PRESENTATION_SEGMENT_PREVIEW)) {
                    MediaPackageReference ref = slide.getReference();
                    if (ref != null && time.equals(ref.getProperty("time"))) {
                        hintField.append("preview");
                        hintField.append(".");
                        hintField.append(ref.getIdentifier());
                        hintField.append("=");
                        hintField.append(slide.getURI().toString());
                        hintField.append("\n");
                    }
                }
                logger.trace("Adding segment: {}", timepoint);
                Schema.setSegmentHint(doc, new DField<>(hintField.toString(), Integer.toString(segmentCount)));
                // increase segment counter
                segmentCount++;
            }
        }
    }
    // Put the most important keywords into a special solr field
    if (sortedAnnotations != null) {
        Schema.setOcKeywords(doc, importantKeywordsString(sortedAnnotations).toString());
    }
}
Also used : Attachment(org.opencastproject.mediapackage.Attachment) VideoText(org.opencastproject.metadata.mpeg7.VideoText) FreeTextAnnotation(org.opencastproject.metadata.mpeg7.FreeTextAnnotation) Comparator(java.util.Comparator) Iterator(java.util.Iterator) KeywordAnnotation(org.opencastproject.metadata.mpeg7.KeywordAnnotation) TextAnnotation(org.opencastproject.metadata.mpeg7.TextAnnotation) FreeTextAnnotation(org.opencastproject.metadata.mpeg7.FreeTextAnnotation) AudioVisual(org.opencastproject.metadata.mpeg7.AudioVisual) MultimediaContentType(org.opencastproject.metadata.mpeg7.MultimediaContentType) SpatioTemporalDecomposition(org.opencastproject.metadata.mpeg7.SpatioTemporalDecomposition) MediaTimePoint(org.opencastproject.metadata.mpeg7.MediaTimePoint) MediaTimePoint(org.opencastproject.metadata.mpeg7.MediaTimePoint) MediaPackageReference(org.opencastproject.mediapackage.MediaPackageReference) VideoSegment(org.opencastproject.metadata.mpeg7.VideoSegment) Video(org.opencastproject.metadata.mpeg7.Video) MediaTime(org.opencastproject.metadata.mpeg7.MediaTime) MediaDuration(org.opencastproject.metadata.mpeg7.MediaDuration) MultimediaContent(org.opencastproject.metadata.mpeg7.MultimediaContent)

Example 13 with VideoSegment

use of com.google.cloud.videointelligence.v1p2beta1.VideoSegment in project opencast by opencast.

the class TextAnalyzerServiceImpl method extract.

/**
 * Starts text extraction on the image and returns a receipt containing the final result in the form of an
 * Mpeg7Catalog.
 *
 * @param image
 *          the element to analyze
 * @param block
 *          <code>true</code> to make this operation synchronous
 * @return a receipt containing the resulting mpeg-7 catalog
 * @throws TextAnalyzerException
 */
private Catalog extract(Job job, Attachment image) throws TextAnalyzerException, MediaPackageException {
    final Attachment attachment = image;
    final URI imageUrl = attachment.getURI();
    File imageFile = null;
    try {
        Mpeg7CatalogImpl mpeg7 = Mpeg7CatalogImpl.newInstance();
        logger.info("Starting text extraction from {}", imageUrl);
        try {
            imageFile = workspace.get(imageUrl);
        } catch (NotFoundException e) {
            throw new TextAnalyzerException("Image " + imageUrl + " not found in workspace", e);
        } catch (IOException e) {
            throw new TextAnalyzerException("Unable to access " + imageUrl + " in workspace", e);
        }
        VideoText[] videoTexts = analyze(imageFile, image.getIdentifier());
        // Create a temporal decomposition
        MediaTime mediaTime = new MediaTimeImpl(0, 0);
        Video avContent = mpeg7.addVideoContent(image.getIdentifier(), mediaTime, null);
        TemporalDecomposition<VideoSegment> temporalDecomposition = (TemporalDecomposition<VideoSegment>) avContent.getTemporalDecomposition();
        // Add a segment
        VideoSegment videoSegment = temporalDecomposition.createSegment("segment-0");
        videoSegment.setMediaTime(mediaTime);
        // Add the video text to the spacio temporal decomposition of the segment
        SpatioTemporalDecomposition spatioTemporalDecomposition = videoSegment.createSpatioTemporalDecomposition(true, false);
        for (VideoText videoText : videoTexts) {
            spatioTemporalDecomposition.addVideoText(videoText);
        }
        logger.info("Text extraction of {} finished, {} lines found", attachment.getURI(), videoTexts.length);
        URI uri;
        InputStream in;
        try {
            in = mpeg7CatalogService.serialize(mpeg7);
        } catch (IOException e) {
            throw new TextAnalyzerException("Error serializing mpeg7", e);
        }
        try {
            uri = workspace.putInCollection(COLLECTION_ID, job.getId() + ".xml", in);
        } catch (IOException e) {
            throw new TextAnalyzerException("Unable to put mpeg7 into the workspace", e);
        }
        Catalog catalog = (Catalog) MediaPackageElementBuilderFactory.newInstance().newElementBuilder().newElement(Catalog.TYPE, MediaPackageElements.TEXTS);
        catalog.setURI(uri);
        logger.debug("Created MPEG7 catalog for {}", imageUrl);
        return catalog;
    } catch (Exception e) {
        logger.warn("Error extracting text from " + imageUrl, e);
        if (e instanceof TextAnalyzerException) {
            throw (TextAnalyzerException) e;
        } else {
            throw new TextAnalyzerException(e);
        }
    } finally {
        try {
            workspace.delete(imageUrl);
        } catch (Exception e) {
            logger.warn("Unable to delete temporary text analysis image {}: {}", imageUrl, e);
        }
    }
}
Also used : InputStream(java.io.InputStream) NotFoundException(org.opencastproject.util.NotFoundException) Attachment(org.opencastproject.mediapackage.Attachment) IOException(java.io.IOException) URI(java.net.URI) VideoText(org.opencastproject.metadata.mpeg7.VideoText) Catalog(org.opencastproject.mediapackage.Catalog) TextExtractorException(org.opencastproject.textextractor.api.TextExtractorException) ConfigurationException(org.osgi.service.cm.ConfigurationException) ServiceRegistryException(org.opencastproject.serviceregistry.api.ServiceRegistryException) TextAnalyzerException(org.opencastproject.textanalyzer.api.TextAnalyzerException) MediaPackageException(org.opencastproject.mediapackage.MediaPackageException) NotFoundException(org.opencastproject.util.NotFoundException) IOException(java.io.IOException) TextAnalyzerException(org.opencastproject.textanalyzer.api.TextAnalyzerException) VideoSegment(org.opencastproject.metadata.mpeg7.VideoSegment) MediaTimeImpl(org.opencastproject.metadata.mpeg7.MediaTimeImpl) Video(org.opencastproject.metadata.mpeg7.Video) MediaTime(org.opencastproject.metadata.mpeg7.MediaTime) Mpeg7CatalogImpl(org.opencastproject.metadata.mpeg7.Mpeg7CatalogImpl) TemporalDecomposition(org.opencastproject.metadata.mpeg7.TemporalDecomposition) SpatioTemporalDecomposition(org.opencastproject.metadata.mpeg7.SpatioTemporalDecomposition) File(java.io.File) SpatioTemporalDecomposition(org.opencastproject.metadata.mpeg7.SpatioTemporalDecomposition)

Example 14 with VideoSegment

use of com.google.cloud.videointelligence.v1p2beta1.VideoSegment in project java-docs-samples by GoogleCloudPlatform.

the class Detect method analyzeShots.

/**
 * Performs shot analysis on the video at the provided Cloud Storage path.
 *
 * @param gcsUri the path to the video file to analyze.
 */
public static void analyzeShots(String gcsUri) throws Exception {
    // Instantiate a com.google.cloud.videointelligence.v1.VideoIntelligenceServiceClient
    try (VideoIntelligenceServiceClient client = VideoIntelligenceServiceClient.create()) {
        // Provide path to file hosted on GCS as "gs://bucket-name/..."
        AnnotateVideoRequest request = AnnotateVideoRequest.newBuilder().setInputUri(gcsUri).addFeatures(Feature.SHOT_CHANGE_DETECTION).build();
        // Create an operation that will contain the response when the operation completes.
        OperationFuture<AnnotateVideoResponse, AnnotateVideoProgress> response = client.annotateVideoAsync(request);
        System.out.println("Waiting for operation to complete...");
        // Print detected shot changes and their location ranges in the analyzed video.
        for (VideoAnnotationResults result : response.get().getAnnotationResultsList()) {
            if (result.getShotAnnotationsCount() > 0) {
                System.out.println("Shots: ");
                for (VideoSegment segment : result.getShotAnnotationsList()) {
                    double startTime = segment.getStartTimeOffset().getSeconds() + segment.getStartTimeOffset().getNanos() / 1e9;
                    double endTime = segment.getEndTimeOffset().getSeconds() + segment.getEndTimeOffset().getNanos() / 1e9;
                    System.out.printf("Location: %.3f:%.3f\n", startTime, endTime);
                }
            } else {
                System.out.println("No shot changes detected in " + gcsUri);
            }
        }
    }
// [END detect_shots]
}
Also used : VideoIntelligenceServiceClient(com.google.cloud.videointelligence.v1.VideoIntelligenceServiceClient) AnnotateVideoProgress(com.google.cloud.videointelligence.v1.AnnotateVideoProgress) VideoSegment(com.google.cloud.videointelligence.v1.VideoSegment) AnnotateVideoRequest(com.google.cloud.videointelligence.v1.AnnotateVideoRequest) VideoAnnotationResults(com.google.cloud.videointelligence.v1.VideoAnnotationResults) AnnotateVideoResponse(com.google.cloud.videointelligence.v1.AnnotateVideoResponse)

Example 15 with VideoSegment

use of com.google.cloud.videointelligence.v1p2beta1.VideoSegment in project opencast by opencast.

the class TextAnalysisWorkflowOperationHandler method extractVideoText.

/**
 * Runs the text analysis service on each of the video segments found.
 *
 * @param mediaPackage
 *          the original mediapackage
 * @param operation
 *          the workflow operation
 * @throws ExecutionException
 * @throws InterruptedException
 * @throws NotFoundException
 * @throws WorkflowOperationException
 */
protected WorkflowOperationResult extractVideoText(final MediaPackage mediaPackage, WorkflowOperationInstance operation) throws EncoderException, InterruptedException, ExecutionException, IOException, NotFoundException, MediaPackageException, TextAnalyzerException, WorkflowOperationException, ServiceRegistryException {
    long totalTimeInQueue = 0;
    List<String> sourceTagSet = asList(operation.getConfiguration("source-tags"));
    List<String> targetTagSet = asList(operation.getConfiguration("target-tags"));
    // Select the catalogs according to the tags
    Map<Catalog, Mpeg7Catalog> catalogs = loadSegmentCatalogs(mediaPackage, operation);
    // Was there at least one matching catalog
    if (catalogs.size() == 0) {
        logger.debug("Mediapackage {} has no suitable mpeg-7 catalogs based on tags {} to to run text analysis", mediaPackage, sourceTagSet);
        return createResult(mediaPackage, Action.CONTINUE);
    }
    // Loop over all existing segment catalogs
    for (Entry<Catalog, Mpeg7Catalog> mapEntry : catalogs.entrySet()) {
        Map<VideoSegment, Job> jobs = new HashMap<VideoSegment, Job>();
        List<Attachment> images = new LinkedList<Attachment>();
        Catalog segmentCatalog = mapEntry.getKey();
        try {
            MediaPackageReference catalogRef = segmentCatalog.getReference();
            // Make sure we can figure out the source track
            if (catalogRef == null) {
                logger.info("Skipping catalog {} since we can't determine the source track", segmentCatalog);
            } else if (mediaPackage.getElementByReference(catalogRef) == null) {
                logger.info("Skipping catalog {} since we can't determine the source track", segmentCatalog);
            } else if (!(mediaPackage.getElementByReference(catalogRef) instanceof Track)) {
                logger.info("Skipping catalog {} since it's source was not a track", segmentCatalog);
            }
            logger.info("Analyzing mpeg-7 segments catalog {} for text", segmentCatalog);
            // Create a copy that will contain the segments enriched with the video text elements
            Mpeg7Catalog textCatalog = mapEntry.getValue().clone();
            Track sourceTrack = mediaPackage.getTrack(catalogRef.getIdentifier());
            // Load the temporal decomposition (segments)
            Video videoContent = textCatalog.videoContent().next();
            TemporalDecomposition<? extends Segment> decomposition = videoContent.getTemporalDecomposition();
            Iterator<? extends Segment> segmentIterator = decomposition.segments();
            // For every segment, try to find the still image and run text analysis on it
            List<VideoSegment> videoSegments = new LinkedList<VideoSegment>();
            while (segmentIterator.hasNext()) {
                Segment segment = segmentIterator.next();
                if ((segment instanceof VideoSegment)) {
                    videoSegments.add((VideoSegment) segment);
                }
            }
            // argument array for image extraction
            double[] times = new double[videoSegments.size()];
            for (int i = 0; i < videoSegments.size(); i++) {
                VideoSegment videoSegment = videoSegments.get(i);
                MediaTimePoint segmentTimePoint = videoSegment.getMediaTime().getMediaTimePoint();
                MediaDuration segmentDuration = videoSegment.getMediaTime().getMediaDuration();
                // Choose a time
                MediaPackageReference reference = null;
                if (catalogRef == null) {
                    reference = new MediaPackageReferenceImpl();
                } else {
                    reference = new MediaPackageReferenceImpl(catalogRef.getType(), catalogRef.getIdentifier());
                }
                reference.setProperty("time", segmentTimePoint.toString());
                // Have the time for ocr image created. To circumvent problems with slowly building slides, we take the image
                // that is
                // almost at the end of the segment, it should contain the most content and is stable as well.
                long startTimeSeconds = segmentTimePoint.getTimeInMilliseconds() / 1000;
                long durationSeconds = segmentDuration.getDurationInMilliseconds() / 1000;
                times[i] = Math.max(startTimeSeconds + durationSeconds - stabilityThreshold + 1, 0);
            }
            // Have the ocr image(s) created.
            Job imageJob = composer.image(sourceTrack, IMAGE_EXTRACTION_PROFILE, times);
            if (!waitForStatus(imageJob).isSuccess()) {
                throw new WorkflowOperationException("Extracting scene images from " + sourceTrack + " failed");
            }
            if (imageJob.getPayload() == null) {
                throw new WorkflowOperationException("The payload of extracting images job from " + sourceTrack + " was null");
            }
            totalTimeInQueue += imageJob.getQueueTime();
            for (MediaPackageElement imageMpe : MediaPackageElementParser.getArrayFromXml(imageJob.getPayload())) {
                Attachment image = (Attachment) imageMpe;
                images.add(image);
            }
            if (images.isEmpty() || images.size() != times.length) {
                throw new WorkflowOperationException("There are no images produced for " + sourceTrack + " or the images count isn't equal the count of the video segments.");
            }
            // Run text extraction on each of the images
            Iterator<VideoSegment> it = videoSegments.iterator();
            for (MediaPackageElement element : images) {
                Attachment image = (Attachment) element;
                VideoSegment videoSegment = it.next();
                jobs.put(videoSegment, analysisService.extract(image));
            }
            // Wait for all jobs to be finished
            if (!waitForStatus(jobs.values().toArray(new Job[jobs.size()])).isSuccess()) {
                throw new WorkflowOperationException("Text extraction failed on images from " + sourceTrack);
            }
            // Process the text extraction results
            for (Map.Entry<VideoSegment, Job> entry : jobs.entrySet()) {
                Job job = serviceRegistry.getJob(entry.getValue().getId());
                totalTimeInQueue += job.getQueueTime();
                VideoSegment videoSegment = entry.getKey();
                MediaDuration segmentDuration = videoSegment.getMediaTime().getMediaDuration();
                Catalog catalog = (Catalog) MediaPackageElementParser.getFromXml(job.getPayload());
                if (catalog == null) {
                    logger.warn("Text analysis did not return a valid mpeg7 for segment {}", videoSegment);
                    continue;
                }
                Mpeg7Catalog videoTextCatalog = loadMpeg7Catalog(catalog);
                if (videoTextCatalog == null) {
                    throw new IllegalStateException("Text analysis service did not return a valid mpeg7");
                }
                // Add the spatiotemporal decompositions from the new catalog to the existing video segments
                Iterator<Video> videoTextContents = videoTextCatalog.videoContent();
                if (videoTextContents == null || !videoTextContents.hasNext()) {
                    logger.debug("Text analysis was not able to extract any text from {}", job.getArguments().get(0));
                    break;
                }
                try {
                    Video textVideoContent = videoTextContents.next();
                    VideoSegment textVideoSegment = (VideoSegment) textVideoContent.getTemporalDecomposition().segments().next();
                    VideoText[] videoTexts = textVideoSegment.getSpatioTemporalDecomposition().getVideoText();
                    SpatioTemporalDecomposition std = videoSegment.createSpatioTemporalDecomposition(true, false);
                    for (VideoText videoText : videoTexts) {
                        MediaTime mediaTime = new MediaTimeImpl(new MediaRelTimePointImpl(0), segmentDuration);
                        SpatioTemporalLocator locator = new SpatioTemporalLocatorImpl(mediaTime);
                        videoText.setSpatioTemporalLocator(locator);
                        std.addVideoText(videoText);
                    }
                } catch (Exception e) {
                    logger.warn("The mpeg-7 structure returned by the text analyzer is not what is expected", e);
                    continue;
                }
            }
            // Put the catalog into the workspace and add it to the media package
            MediaPackageElementBuilder builder = MediaPackageElementBuilderFactory.newInstance().newElementBuilder();
            Catalog catalog = (Catalog) builder.newElement(MediaPackageElement.Type.Catalog, MediaPackageElements.TEXTS);
            catalog.setIdentifier(null);
            catalog.setReference(segmentCatalog.getReference());
            // the catalog now has an ID, so we can store the file properly
            mediaPackage.add(catalog);
            InputStream in = mpeg7CatalogService.serialize(textCatalog);
            String filename = "slidetext.xml";
            URI workspaceURI = workspace.put(mediaPackage.getIdentifier().toString(), catalog.getIdentifier(), filename, in);
            catalog.setURI(workspaceURI);
            // Since we've enriched and stored the mpeg7 catalog, remove the original
            try {
                mediaPackage.remove(segmentCatalog);
                workspace.delete(segmentCatalog.getURI());
            } catch (Exception e) {
                logger.warn("Unable to delete segment catalog {}: {}", segmentCatalog.getURI(), e);
            }
            // Add flavor and target tags
            catalog.setFlavor(MediaPackageElements.TEXTS);
            for (String tag : targetTagSet) {
                catalog.addTag(tag);
            }
        } finally {
            // Remove images that were created for text extraction
            logger.debug("Removing temporary images");
            for (Attachment image : images) {
                try {
                    workspace.delete(image.getURI());
                } catch (Exception e) {
                    logger.warn("Unable to delete temporary image {}: {}", image.getURI(), e);
                }
            }
            // Remove the temporary text
            for (Job j : jobs.values()) {
                Catalog catalog = null;
                try {
                    Job job = serviceRegistry.getJob(j.getId());
                    if (!Job.Status.FINISHED.equals(job.getStatus())) {
                        continue;
                    }
                    catalog = (Catalog) MediaPackageElementParser.getFromXml(job.getPayload());
                    if (catalog != null) {
                        workspace.delete(catalog.getURI());
                    }
                } catch (Exception e) {
                    if (catalog != null) {
                        logger.warn("Unable to delete temporary text file {}: {}", catalog.getURI(), e);
                    } else {
                        logger.warn("Unable to parse textextraction payload of job {}", j.getId());
                    }
                }
            }
        }
    }
    logger.debug("Text analysis completed");
    return createResult(mediaPackage, Action.CONTINUE, totalTimeInQueue);
}
Also used : HashMap(java.util.HashMap) Attachment(org.opencastproject.mediapackage.Attachment) VideoText(org.opencastproject.metadata.mpeg7.VideoText) URI(java.net.URI) Segment(org.opencastproject.metadata.mpeg7.Segment) VideoSegment(org.opencastproject.metadata.mpeg7.VideoSegment) MediaPackageElementBuilder(org.opencastproject.mediapackage.MediaPackageElementBuilder) MediaTimeImpl(org.opencastproject.metadata.mpeg7.MediaTimeImpl) MediaRelTimePointImpl(org.opencastproject.metadata.mpeg7.MediaRelTimePointImpl) MediaPackageElement(org.opencastproject.mediapackage.MediaPackageElement) WorkflowOperationException(org.opencastproject.workflow.api.WorkflowOperationException) Job(org.opencastproject.job.api.Job) SpatioTemporalDecomposition(org.opencastproject.metadata.mpeg7.SpatioTemporalDecomposition) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) MediaTimePoint(org.opencastproject.metadata.mpeg7.MediaTimePoint) Catalog(org.opencastproject.mediapackage.Catalog) Mpeg7Catalog(org.opencastproject.metadata.mpeg7.Mpeg7Catalog) LinkedList(java.util.LinkedList) MediaTimePoint(org.opencastproject.metadata.mpeg7.MediaTimePoint) ConfigurationException(org.osgi.service.cm.ConfigurationException) ServiceRegistryException(org.opencastproject.serviceregistry.api.ServiceRegistryException) WorkflowOperationException(org.opencastproject.workflow.api.WorkflowOperationException) TextAnalyzerException(org.opencastproject.textanalyzer.api.TextAnalyzerException) MediaPackageException(org.opencastproject.mediapackage.MediaPackageException) NotFoundException(org.opencastproject.util.NotFoundException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) EncoderException(org.opencastproject.composer.api.EncoderException) Mpeg7Catalog(org.opencastproject.metadata.mpeg7.Mpeg7Catalog) MediaPackageReference(org.opencastproject.mediapackage.MediaPackageReference) VideoSegment(org.opencastproject.metadata.mpeg7.VideoSegment) SpatioTemporalLocator(org.opencastproject.metadata.mpeg7.SpatioTemporalLocator) Video(org.opencastproject.metadata.mpeg7.Video) SpatioTemporalLocatorImpl(org.opencastproject.metadata.mpeg7.SpatioTemporalLocatorImpl) MediaTime(org.opencastproject.metadata.mpeg7.MediaTime) MediaDuration(org.opencastproject.metadata.mpeg7.MediaDuration) MediaPackageReferenceImpl(org.opencastproject.mediapackage.MediaPackageReferenceImpl) Map(java.util.Map) HashMap(java.util.HashMap) Track(org.opencastproject.mediapackage.Track)

Aggregations

AnnotateVideoProgress (com.google.cloud.videointelligence.v1.AnnotateVideoProgress)19 AnnotateVideoRequest (com.google.cloud.videointelligence.v1.AnnotateVideoRequest)19 AnnotateVideoResponse (com.google.cloud.videointelligence.v1.AnnotateVideoResponse)19 VideoAnnotationResults (com.google.cloud.videointelligence.v1.VideoAnnotationResults)19 VideoIntelligenceServiceClient (com.google.cloud.videointelligence.v1.VideoIntelligenceServiceClient)19 VideoSegment (com.google.cloud.videointelligence.v1.VideoSegment)19 Duration (com.google.protobuf.Duration)18 Path (java.nio.file.Path)12 DetectedAttribute (com.google.cloud.videointelligence.v1.DetectedAttribute)8 Entity (com.google.cloud.videointelligence.v1.Entity)8 NormalizedBoundingBox (com.google.cloud.videointelligence.v1.NormalizedBoundingBox)8 TimestampedObject (com.google.cloud.videointelligence.v1.TimestampedObject)8 Track (com.google.cloud.videointelligence.v1.Track)8 LogoRecognitionAnnotation (com.google.cloud.videointelligence.v1.LogoRecognitionAnnotation)4 NormalizedVertex (com.google.cloud.videointelligence.v1.NormalizedVertex)4 ObjectTrackingAnnotation (com.google.cloud.videointelligence.v1.ObjectTrackingAnnotation)4 ObjectTrackingFrame (com.google.cloud.videointelligence.v1.ObjectTrackingFrame)4 TextAnnotation (com.google.cloud.videointelligence.v1.TextAnnotation)4 TextFrame (com.google.cloud.videointelligence.v1.TextFrame)4 TextSegment (com.google.cloud.videointelligence.v1.TextSegment)4