use of com.google.cloud.videointelligence.v1p3beta1.VideoSegment in project java-video-intelligence by googleapis.
the class DetectPersonGcs method detectPersonGcs.
// Detects people in a video stored in Google Cloud Storage using
// the Cloud Video Intelligence API.
public static void detectPersonGcs(String gcsUri) throws Exception {
try (VideoIntelligenceServiceClient videoIntelligenceServiceClient = VideoIntelligenceServiceClient.create()) {
// Reads a local video file and converts it to base64.
PersonDetectionConfig personDetectionConfig = PersonDetectionConfig.newBuilder().setIncludeBoundingBoxes(true).setIncludePoseLandmarks(true).setIncludeAttributes(true).build();
VideoContext videoContext = VideoContext.newBuilder().setPersonDetectionConfig(personDetectionConfig).build();
AnnotateVideoRequest request = AnnotateVideoRequest.newBuilder().setInputUri(gcsUri).addFeatures(Feature.PERSON_DETECTION).setVideoContext(videoContext).build();
// Detects people in a video
OperationFuture<AnnotateVideoResponse, AnnotateVideoProgress> future = videoIntelligenceServiceClient.annotateVideoAsync(request);
System.out.println("Waiting for operation to complete...");
AnnotateVideoResponse response = future.get();
// Get the first response, since we sent only one video.
VideoAnnotationResults annotationResult = response.getAnnotationResultsList().get(0);
// Annotations for list of people detected, tracked and recognized in video.
for (PersonDetectionAnnotation personDetectionAnnotation : annotationResult.getPersonDetectionAnnotationsList()) {
System.out.print("Person detected:\n");
for (Track track : personDetectionAnnotation.getTracksList()) {
VideoSegment segment = track.getSegment();
System.out.printf("\tStart: %d.%.0fs\n", segment.getStartTimeOffset().getSeconds(), segment.getStartTimeOffset().getNanos() / 1e6);
System.out.printf("\tEnd: %d.%.0fs\n", segment.getEndTimeOffset().getSeconds(), segment.getEndTimeOffset().getNanos() / 1e6);
// Each segment includes timestamped objects that include characteristic--e.g. clothes,
// posture of the person detected.
TimestampedObject firstTimestampedObject = track.getTimestampedObjects(0);
// of the person detected.
for (DetectedAttribute attribute : firstTimestampedObject.getAttributesList()) {
System.out.printf("\tAttribute: %s; Value: %s\n", attribute.getName(), attribute.getValue());
}
// Landmarks in person detection include body parts.
for (DetectedLandmark attribute : firstTimestampedObject.getLandmarksList()) {
System.out.printf("\tLandmark: %s; Vertex: %f, %f\n", attribute.getName(), attribute.getPoint().getX(), attribute.getPoint().getY());
}
}
}
}
}
use of com.google.cloud.videointelligence.v1p3beta1.VideoSegment in project opencast by opencast.
the class SolrIndexManager method addMpeg7Metadata.
/**
* Add the mpeg 7 catalog data to the solr document.
*
* @param doc
* the input document to the solr index
* @param mpeg7
* the mpeg7 catalog
*/
@SuppressWarnings("unchecked")
static void addMpeg7Metadata(SolrInputDocument doc, MediaPackage mediaPackage, Mpeg7Catalog mpeg7) {
// Check for multimedia content
if (!mpeg7.multimediaContent().hasNext()) {
logger.warn("Mpeg-7 doesn't contain multimedia content");
return;
}
// Get the content duration by looking at the first content track. This
// of course assumes that all tracks are equally long.
MultimediaContent<? extends MultimediaContentType> mc = mpeg7.multimediaContent().next();
MultimediaContentType mct = mc.elements().next();
MediaTime mediaTime = mct.getMediaTime();
Schema.setDcExtent(doc, mediaTime.getMediaDuration().getDurationInMilliseconds());
// Check if the keywords have been filled by (manually) added dublin
// core data. If not, look for the most relevant fields in mpeg-7.
SortedSet<TextAnnotation> sortedAnnotations = null;
if (!"".equals(Schema.getOcKeywords(doc))) {
sortedAnnotations = new TreeSet<TextAnnotation>(new Comparator<TextAnnotation>() {
@Override
public int compare(TextAnnotation a1, TextAnnotation a2) {
double v1 = RELEVANCE_BOOST * a1.getRelevance() + a1.getConfidence();
double v2 = RELEVANCE_BOOST * a2.getRelevance() + a2.getConfidence();
if (v1 > v2) {
return -1;
} else if (v1 < v2) {
return 1;
}
return 0;
}
});
}
// Iterate over the tracks and extract keywords and hints
Iterator<MultimediaContent<? extends MultimediaContentType>> mmIter = mpeg7.multimediaContent();
int segmentCount = 0;
while (mmIter.hasNext()) {
MultimediaContent<?> multimediaContent = mmIter.next();
// We need to process visual segments first, due to the way they are handled in the ui.
for (Iterator<?> iterator = multimediaContent.elements(); iterator.hasNext(); ) {
MultimediaContentType type = (MultimediaContentType) iterator.next();
if (!(type instanceof Video) && !(type instanceof AudioVisual)) {
continue;
}
// for every segment in the current multimedia content track
Video video = (Video) type;
Iterator<VideoSegment> vsegments = (Iterator<VideoSegment>) video.getTemporalDecomposition().segments();
while (vsegments.hasNext()) {
VideoSegment segment = vsegments.next();
StringBuffer segmentText = new StringBuffer();
StringBuffer hintField = new StringBuffer();
// Collect the video text elements to a segment text
SpatioTemporalDecomposition spt = segment.getSpatioTemporalDecomposition();
if (spt != null) {
for (VideoText videoText : spt.getVideoText()) {
if (segmentText.length() > 0) {
segmentText.append(" ");
}
segmentText.append(videoText.getText().getText());
// TODO: Add hint on bounding box
}
}
// Add keyword annotations
Iterator<TextAnnotation> textAnnotations = segment.textAnnotations();
while (textAnnotations.hasNext()) {
TextAnnotation textAnnotation = textAnnotations.next();
Iterator<?> kwIter = textAnnotation.keywordAnnotations();
while (kwIter.hasNext()) {
KeywordAnnotation keywordAnnotation = (KeywordAnnotation) kwIter.next();
if (segmentText.length() > 0) {
segmentText.append(" ");
}
segmentText.append(keywordAnnotation.getKeyword());
}
}
// Add free text annotations
Iterator<TextAnnotation> freeIter = segment.textAnnotations();
if (freeIter.hasNext()) {
Iterator<FreeTextAnnotation> freeTextIter = freeIter.next().freeTextAnnotations();
while (freeTextIter.hasNext()) {
FreeTextAnnotation freeTextAnnotation = freeTextIter.next();
if (segmentText.length() > 0) {
segmentText.append(" ");
}
segmentText.append(freeTextAnnotation.getText());
}
}
// add segment text to solr document
Schema.setSegmentText(doc, new DField<String>(segmentText.toString(), Integer.toString(segmentCount)));
// get the segments time properties
MediaTimePoint timepoint = segment.getMediaTime().getMediaTimePoint();
MediaDuration duration = segment.getMediaTime().getMediaDuration();
// TODO: define a class with hint field constants
hintField.append("time=" + timepoint.getTimeInMilliseconds() + "\n");
hintField.append("duration=" + duration.getDurationInMilliseconds() + "\n");
// Look for preview images. Their characteristics are that they are
// attached as attachments with a flavor of preview/<something>.
String time = timepoint.toString();
for (Attachment slide : mediaPackage.getAttachments(MediaPackageElements.PRESENTATION_SEGMENT_PREVIEW)) {
MediaPackageReference ref = slide.getReference();
if (ref != null && time.equals(ref.getProperty("time"))) {
hintField.append("preview");
hintField.append(".");
hintField.append(ref.getIdentifier());
hintField.append("=");
hintField.append(slide.getURI().toString());
hintField.append("\n");
}
}
logger.trace("Adding segment: {}", timepoint);
Schema.setSegmentHint(doc, new DField<>(hintField.toString(), Integer.toString(segmentCount)));
// increase segment counter
segmentCount++;
}
}
}
// Put the most important keywords into a special solr field
if (sortedAnnotations != null) {
Schema.setOcKeywords(doc, importantKeywordsString(sortedAnnotations).toString());
}
}
use of com.google.cloud.videointelligence.v1p3beta1.VideoSegment in project opencast by opencast.
the class TextAnalyzerServiceImpl method extract.
/**
* Starts text extraction on the image and returns a receipt containing the final result in the form of an
* Mpeg7Catalog.
*
* @param image
* the element to analyze
* @param block
* <code>true</code> to make this operation synchronous
* @return a receipt containing the resulting mpeg-7 catalog
* @throws TextAnalyzerException
*/
private Catalog extract(Job job, Attachment image) throws TextAnalyzerException, MediaPackageException {
final Attachment attachment = image;
final URI imageUrl = attachment.getURI();
File imageFile = null;
try {
Mpeg7CatalogImpl mpeg7 = Mpeg7CatalogImpl.newInstance();
logger.info("Starting text extraction from {}", imageUrl);
try {
imageFile = workspace.get(imageUrl);
} catch (NotFoundException e) {
throw new TextAnalyzerException("Image " + imageUrl + " not found in workspace", e);
} catch (IOException e) {
throw new TextAnalyzerException("Unable to access " + imageUrl + " in workspace", e);
}
VideoText[] videoTexts = analyze(imageFile, image.getIdentifier());
// Create a temporal decomposition
MediaTime mediaTime = new MediaTimeImpl(0, 0);
Video avContent = mpeg7.addVideoContent(image.getIdentifier(), mediaTime, null);
TemporalDecomposition<VideoSegment> temporalDecomposition = (TemporalDecomposition<VideoSegment>) avContent.getTemporalDecomposition();
// Add a segment
VideoSegment videoSegment = temporalDecomposition.createSegment("segment-0");
videoSegment.setMediaTime(mediaTime);
// Add the video text to the spacio temporal decomposition of the segment
SpatioTemporalDecomposition spatioTemporalDecomposition = videoSegment.createSpatioTemporalDecomposition(true, false);
for (VideoText videoText : videoTexts) {
spatioTemporalDecomposition.addVideoText(videoText);
}
logger.info("Text extraction of {} finished, {} lines found", attachment.getURI(), videoTexts.length);
URI uri;
InputStream in;
try {
in = mpeg7CatalogService.serialize(mpeg7);
} catch (IOException e) {
throw new TextAnalyzerException("Error serializing mpeg7", e);
}
try {
uri = workspace.putInCollection(COLLECTION_ID, job.getId() + ".xml", in);
} catch (IOException e) {
throw new TextAnalyzerException("Unable to put mpeg7 into the workspace", e);
}
Catalog catalog = (Catalog) MediaPackageElementBuilderFactory.newInstance().newElementBuilder().newElement(Catalog.TYPE, MediaPackageElements.TEXTS);
catalog.setURI(uri);
logger.debug("Created MPEG7 catalog for {}", imageUrl);
return catalog;
} catch (Exception e) {
logger.warn("Error extracting text from " + imageUrl, e);
if (e instanceof TextAnalyzerException) {
throw (TextAnalyzerException) e;
} else {
throw new TextAnalyzerException(e);
}
} finally {
try {
workspace.delete(imageUrl);
} catch (Exception e) {
logger.warn("Unable to delete temporary text analysis image {}: {}", imageUrl, e);
}
}
}
use of com.google.cloud.videointelligence.v1p3beta1.VideoSegment in project java-docs-samples by GoogleCloudPlatform.
the class Detect method analyzeShots.
/**
* Performs shot analysis on the video at the provided Cloud Storage path.
*
* @param gcsUri the path to the video file to analyze.
*/
public static void analyzeShots(String gcsUri) throws Exception {
// Instantiate a com.google.cloud.videointelligence.v1.VideoIntelligenceServiceClient
try (VideoIntelligenceServiceClient client = VideoIntelligenceServiceClient.create()) {
// Provide path to file hosted on GCS as "gs://bucket-name/..."
AnnotateVideoRequest request = AnnotateVideoRequest.newBuilder().setInputUri(gcsUri).addFeatures(Feature.SHOT_CHANGE_DETECTION).build();
// Create an operation that will contain the response when the operation completes.
OperationFuture<AnnotateVideoResponse, AnnotateVideoProgress> response = client.annotateVideoAsync(request);
System.out.println("Waiting for operation to complete...");
// Print detected shot changes and their location ranges in the analyzed video.
for (VideoAnnotationResults result : response.get().getAnnotationResultsList()) {
if (result.getShotAnnotationsCount() > 0) {
System.out.println("Shots: ");
for (VideoSegment segment : result.getShotAnnotationsList()) {
double startTime = segment.getStartTimeOffset().getSeconds() + segment.getStartTimeOffset().getNanos() / 1e9;
double endTime = segment.getEndTimeOffset().getSeconds() + segment.getEndTimeOffset().getNanos() / 1e9;
System.out.printf("Location: %.3f:%.3f\n", startTime, endTime);
}
} else {
System.out.println("No shot changes detected in " + gcsUri);
}
}
}
// [END detect_shots]
}
use of com.google.cloud.videointelligence.v1p3beta1.VideoSegment in project opencast by opencast.
the class TextAnalysisWorkflowOperationHandler method extractVideoText.
/**
* Runs the text analysis service on each of the video segments found.
*
* @param mediaPackage
* the original mediapackage
* @param operation
* the workflow operation
* @throws ExecutionException
* @throws InterruptedException
* @throws NotFoundException
* @throws WorkflowOperationException
*/
protected WorkflowOperationResult extractVideoText(final MediaPackage mediaPackage, WorkflowOperationInstance operation) throws EncoderException, InterruptedException, ExecutionException, IOException, NotFoundException, MediaPackageException, TextAnalyzerException, WorkflowOperationException, ServiceRegistryException {
long totalTimeInQueue = 0;
List<String> sourceTagSet = asList(operation.getConfiguration("source-tags"));
List<String> targetTagSet = asList(operation.getConfiguration("target-tags"));
// Select the catalogs according to the tags
Map<Catalog, Mpeg7Catalog> catalogs = loadSegmentCatalogs(mediaPackage, operation);
// Was there at least one matching catalog
if (catalogs.size() == 0) {
logger.debug("Mediapackage {} has no suitable mpeg-7 catalogs based on tags {} to to run text analysis", mediaPackage, sourceTagSet);
return createResult(mediaPackage, Action.CONTINUE);
}
// Loop over all existing segment catalogs
for (Entry<Catalog, Mpeg7Catalog> mapEntry : catalogs.entrySet()) {
Map<VideoSegment, Job> jobs = new HashMap<VideoSegment, Job>();
List<Attachment> images = new LinkedList<Attachment>();
Catalog segmentCatalog = mapEntry.getKey();
try {
MediaPackageReference catalogRef = segmentCatalog.getReference();
// Make sure we can figure out the source track
if (catalogRef == null) {
logger.info("Skipping catalog {} since we can't determine the source track", segmentCatalog);
} else if (mediaPackage.getElementByReference(catalogRef) == null) {
logger.info("Skipping catalog {} since we can't determine the source track", segmentCatalog);
} else if (!(mediaPackage.getElementByReference(catalogRef) instanceof Track)) {
logger.info("Skipping catalog {} since it's source was not a track", segmentCatalog);
}
logger.info("Analyzing mpeg-7 segments catalog {} for text", segmentCatalog);
// Create a copy that will contain the segments enriched with the video text elements
Mpeg7Catalog textCatalog = mapEntry.getValue().clone();
Track sourceTrack = mediaPackage.getTrack(catalogRef.getIdentifier());
// Load the temporal decomposition (segments)
Video videoContent = textCatalog.videoContent().next();
TemporalDecomposition<? extends Segment> decomposition = videoContent.getTemporalDecomposition();
Iterator<? extends Segment> segmentIterator = decomposition.segments();
// For every segment, try to find the still image and run text analysis on it
List<VideoSegment> videoSegments = new LinkedList<VideoSegment>();
while (segmentIterator.hasNext()) {
Segment segment = segmentIterator.next();
if ((segment instanceof VideoSegment)) {
videoSegments.add((VideoSegment) segment);
}
}
// argument array for image extraction
double[] times = new double[videoSegments.size()];
for (int i = 0; i < videoSegments.size(); i++) {
VideoSegment videoSegment = videoSegments.get(i);
MediaTimePoint segmentTimePoint = videoSegment.getMediaTime().getMediaTimePoint();
MediaDuration segmentDuration = videoSegment.getMediaTime().getMediaDuration();
// Choose a time
MediaPackageReference reference = null;
if (catalogRef == null) {
reference = new MediaPackageReferenceImpl();
} else {
reference = new MediaPackageReferenceImpl(catalogRef.getType(), catalogRef.getIdentifier());
}
reference.setProperty("time", segmentTimePoint.toString());
// Have the time for ocr image created. To circumvent problems with slowly building slides, we take the image
// that is
// almost at the end of the segment, it should contain the most content and is stable as well.
long startTimeSeconds = segmentTimePoint.getTimeInMilliseconds() / 1000;
long durationSeconds = segmentDuration.getDurationInMilliseconds() / 1000;
times[i] = Math.max(startTimeSeconds + durationSeconds - stabilityThreshold + 1, 0);
}
// Have the ocr image(s) created.
Job imageJob = composer.image(sourceTrack, IMAGE_EXTRACTION_PROFILE, times);
if (!waitForStatus(imageJob).isSuccess()) {
throw new WorkflowOperationException("Extracting scene images from " + sourceTrack + " failed");
}
if (imageJob.getPayload() == null) {
throw new WorkflowOperationException("The payload of extracting images job from " + sourceTrack + " was null");
}
totalTimeInQueue += imageJob.getQueueTime();
for (MediaPackageElement imageMpe : MediaPackageElementParser.getArrayFromXml(imageJob.getPayload())) {
Attachment image = (Attachment) imageMpe;
images.add(image);
}
if (images.isEmpty() || images.size() != times.length) {
throw new WorkflowOperationException("There are no images produced for " + sourceTrack + " or the images count isn't equal the count of the video segments.");
}
// Run text extraction on each of the images
Iterator<VideoSegment> it = videoSegments.iterator();
for (MediaPackageElement element : images) {
Attachment image = (Attachment) element;
VideoSegment videoSegment = it.next();
jobs.put(videoSegment, analysisService.extract(image));
}
// Wait for all jobs to be finished
if (!waitForStatus(jobs.values().toArray(new Job[jobs.size()])).isSuccess()) {
throw new WorkflowOperationException("Text extraction failed on images from " + sourceTrack);
}
// Process the text extraction results
for (Map.Entry<VideoSegment, Job> entry : jobs.entrySet()) {
Job job = serviceRegistry.getJob(entry.getValue().getId());
totalTimeInQueue += job.getQueueTime();
VideoSegment videoSegment = entry.getKey();
MediaDuration segmentDuration = videoSegment.getMediaTime().getMediaDuration();
Catalog catalog = (Catalog) MediaPackageElementParser.getFromXml(job.getPayload());
if (catalog == null) {
logger.warn("Text analysis did not return a valid mpeg7 for segment {}", videoSegment);
continue;
}
Mpeg7Catalog videoTextCatalog = loadMpeg7Catalog(catalog);
if (videoTextCatalog == null) {
throw new IllegalStateException("Text analysis service did not return a valid mpeg7");
}
// Add the spatiotemporal decompositions from the new catalog to the existing video segments
Iterator<Video> videoTextContents = videoTextCatalog.videoContent();
if (videoTextContents == null || !videoTextContents.hasNext()) {
logger.debug("Text analysis was not able to extract any text from {}", job.getArguments().get(0));
break;
}
try {
Video textVideoContent = videoTextContents.next();
VideoSegment textVideoSegment = (VideoSegment) textVideoContent.getTemporalDecomposition().segments().next();
VideoText[] videoTexts = textVideoSegment.getSpatioTemporalDecomposition().getVideoText();
SpatioTemporalDecomposition std = videoSegment.createSpatioTemporalDecomposition(true, false);
for (VideoText videoText : videoTexts) {
MediaTime mediaTime = new MediaTimeImpl(new MediaRelTimePointImpl(0), segmentDuration);
SpatioTemporalLocator locator = new SpatioTemporalLocatorImpl(mediaTime);
videoText.setSpatioTemporalLocator(locator);
std.addVideoText(videoText);
}
} catch (Exception e) {
logger.warn("The mpeg-7 structure returned by the text analyzer is not what is expected", e);
continue;
}
}
// Put the catalog into the workspace and add it to the media package
MediaPackageElementBuilder builder = MediaPackageElementBuilderFactory.newInstance().newElementBuilder();
Catalog catalog = (Catalog) builder.newElement(MediaPackageElement.Type.Catalog, MediaPackageElements.TEXTS);
catalog.setIdentifier(null);
catalog.setReference(segmentCatalog.getReference());
// the catalog now has an ID, so we can store the file properly
mediaPackage.add(catalog);
InputStream in = mpeg7CatalogService.serialize(textCatalog);
String filename = "slidetext.xml";
URI workspaceURI = workspace.put(mediaPackage.getIdentifier().toString(), catalog.getIdentifier(), filename, in);
catalog.setURI(workspaceURI);
// Since we've enriched and stored the mpeg7 catalog, remove the original
try {
mediaPackage.remove(segmentCatalog);
workspace.delete(segmentCatalog.getURI());
} catch (Exception e) {
logger.warn("Unable to delete segment catalog {}: {}", segmentCatalog.getURI(), e);
}
// Add flavor and target tags
catalog.setFlavor(MediaPackageElements.TEXTS);
for (String tag : targetTagSet) {
catalog.addTag(tag);
}
} finally {
// Remove images that were created for text extraction
logger.debug("Removing temporary images");
for (Attachment image : images) {
try {
workspace.delete(image.getURI());
} catch (Exception e) {
logger.warn("Unable to delete temporary image {}: {}", image.getURI(), e);
}
}
// Remove the temporary text
for (Job j : jobs.values()) {
Catalog catalog = null;
try {
Job job = serviceRegistry.getJob(j.getId());
if (!Job.Status.FINISHED.equals(job.getStatus())) {
continue;
}
catalog = (Catalog) MediaPackageElementParser.getFromXml(job.getPayload());
if (catalog != null) {
workspace.delete(catalog.getURI());
}
} catch (Exception e) {
if (catalog != null) {
logger.warn("Unable to delete temporary text file {}: {}", catalog.getURI(), e);
} else {
logger.warn("Unable to parse textextraction payload of job {}", j.getId());
}
}
}
}
}
logger.debug("Text analysis completed");
return createResult(mediaPackage, Action.CONTINUE, totalTimeInQueue);
}
Aggregations