use of com.google.cloud.vision.v1p3beta1.TextAnnotation in project opencast by opencast.
the class SolrIndexManager method addMpeg7Metadata.
/**
* Add the mpeg 7 catalog data to the solr document.
*
* @param doc
* the input document to the solr index
* @param mpeg7
* the mpeg7 catalog
*/
@SuppressWarnings("unchecked")
static void addMpeg7Metadata(SolrInputDocument doc, MediaPackage mediaPackage, Mpeg7Catalog mpeg7) {
// Check for multimedia content
if (!mpeg7.multimediaContent().hasNext()) {
logger.warn("Mpeg-7 doesn't contain multimedia content");
return;
}
// Get the content duration by looking at the first content track. This
// of course assumes that all tracks are equally long.
MultimediaContent<? extends MultimediaContentType> mc = mpeg7.multimediaContent().next();
MultimediaContentType mct = mc.elements().next();
MediaTime mediaTime = mct.getMediaTime();
Schema.setDcExtent(doc, mediaTime.getMediaDuration().getDurationInMilliseconds());
// Check if the keywords have been filled by (manually) added dublin
// core data. If not, look for the most relevant fields in mpeg-7.
SortedSet<TextAnnotation> sortedAnnotations = null;
if (!"".equals(Schema.getOcKeywords(doc))) {
sortedAnnotations = new TreeSet<TextAnnotation>(new Comparator<TextAnnotation>() {
@Override
public int compare(TextAnnotation a1, TextAnnotation a2) {
double v1 = RELEVANCE_BOOST * a1.getRelevance() + a1.getConfidence();
double v2 = RELEVANCE_BOOST * a2.getRelevance() + a2.getConfidence();
if (v1 > v2) {
return -1;
} else if (v1 < v2) {
return 1;
}
return 0;
}
});
}
// Iterate over the tracks and extract keywords and hints
Iterator<MultimediaContent<? extends MultimediaContentType>> mmIter = mpeg7.multimediaContent();
int segmentCount = 0;
while (mmIter.hasNext()) {
MultimediaContent<?> multimediaContent = mmIter.next();
// We need to process visual segments first, due to the way they are handled in the ui.
for (Iterator<?> iterator = multimediaContent.elements(); iterator.hasNext(); ) {
MultimediaContentType type = (MultimediaContentType) iterator.next();
if (!(type instanceof Video) && !(type instanceof AudioVisual)) {
continue;
}
// for every segment in the current multimedia content track
Video video = (Video) type;
Iterator<VideoSegment> vsegments = (Iterator<VideoSegment>) video.getTemporalDecomposition().segments();
while (vsegments.hasNext()) {
VideoSegment segment = vsegments.next();
StringBuffer segmentText = new StringBuffer();
StringBuffer hintField = new StringBuffer();
// Collect the video text elements to a segment text
SpatioTemporalDecomposition spt = segment.getSpatioTemporalDecomposition();
if (spt != null) {
for (VideoText videoText : spt.getVideoText()) {
if (segmentText.length() > 0) {
segmentText.append(" ");
}
segmentText.append(videoText.getText().getText());
// TODO: Add hint on bounding box
}
}
// Add keyword annotations
Iterator<TextAnnotation> textAnnotations = segment.textAnnotations();
while (textAnnotations.hasNext()) {
TextAnnotation textAnnotation = textAnnotations.next();
Iterator<?> kwIter = textAnnotation.keywordAnnotations();
while (kwIter.hasNext()) {
KeywordAnnotation keywordAnnotation = (KeywordAnnotation) kwIter.next();
if (segmentText.length() > 0) {
segmentText.append(" ");
}
segmentText.append(keywordAnnotation.getKeyword());
}
}
// Add free text annotations
Iterator<TextAnnotation> freeIter = segment.textAnnotations();
if (freeIter.hasNext()) {
Iterator<FreeTextAnnotation> freeTextIter = freeIter.next().freeTextAnnotations();
while (freeTextIter.hasNext()) {
FreeTextAnnotation freeTextAnnotation = freeTextIter.next();
if (segmentText.length() > 0) {
segmentText.append(" ");
}
segmentText.append(freeTextAnnotation.getText());
}
}
// add segment text to solr document
Schema.setSegmentText(doc, new DField<String>(segmentText.toString(), Integer.toString(segmentCount)));
// get the segments time properties
MediaTimePoint timepoint = segment.getMediaTime().getMediaTimePoint();
MediaDuration duration = segment.getMediaTime().getMediaDuration();
// TODO: define a class with hint field constants
hintField.append("time=" + timepoint.getTimeInMilliseconds() + "\n");
hintField.append("duration=" + duration.getDurationInMilliseconds() + "\n");
// Look for preview images. Their characteristics are that they are
// attached as attachments with a flavor of preview/<something>.
String time = timepoint.toString();
for (Attachment slide : mediaPackage.getAttachments(MediaPackageElements.PRESENTATION_SEGMENT_PREVIEW)) {
MediaPackageReference ref = slide.getReference();
if (ref != null && time.equals(ref.getProperty("time"))) {
hintField.append("preview");
hintField.append(".");
hintField.append(ref.getIdentifier());
hintField.append("=");
hintField.append(slide.getURI().toString());
hintField.append("\n");
}
}
logger.trace("Adding segment: {}", timepoint);
Schema.setSegmentHint(doc, new DField<>(hintField.toString(), Integer.toString(segmentCount)));
// increase segment counter
segmentCount++;
}
}
}
// Put the most important keywords into a special solr field
if (sortedAnnotations != null) {
Schema.setOcKeywords(doc, importantKeywordsString(sortedAnnotations).toString());
}
}
use of com.google.cloud.vision.v1p3beta1.TextAnnotation in project opencast by opencast.
the class SolrIndexManager method getMaxRelevance.
/**
* Gets the maximum relevance for a given keyword in the text annotation.
*
* @param keyword
* @param sortedAnnotations
* @return The maximum relevance value.
*/
static double getMaxRelevance(String keyword, SortedSet<TextAnnotation> sortedAnnotations) {
double max = 0.0;
String needle = null;
TextAnnotation textAnnotation = null;
Iterator<TextAnnotation> textAnnotations = sortedAnnotations.iterator();
while (textAnnotations.hasNext()) {
textAnnotation = textAnnotations.next();
Iterator<KeywordAnnotation> keywordAnnotations = textAnnotation.keywordAnnotations();
while (keywordAnnotations.hasNext()) {
KeywordAnnotation ann = keywordAnnotations.next();
needle = ann.getKeyword().toLowerCase();
if (keyword.equals(needle)) {
if (max < textAnnotation.getRelevance()) {
max = textAnnotation.getRelevance();
}
}
}
}
return max;
}
use of com.google.cloud.vision.v1p3beta1.TextAnnotation in project opencast by opencast.
the class Mpeg7CaptionConverter method getLanguageList.
/**
* @see org.opencastproject.caption.api.CaptionConverter#getLanguageList(java.io.InputStream)
*/
@SuppressWarnings("unchecked")
@Override
public String[] getLanguageList(InputStream inputStream) throws CaptionConverterException {
Set<String> languages = new HashSet<String>();
Mpeg7Catalog catalog = new Mpeg7CatalogImpl(inputStream);
Iterator<Audio> audioContentIterator = catalog.audioContent();
if (audioContentIterator == null)
return languages.toArray(new String[languages.size()]);
content: while (audioContentIterator.hasNext()) {
Audio audioContent = audioContentIterator.next();
TemporalDecomposition<AudioSegment> audioSegments = (TemporalDecomposition<AudioSegment>) audioContent.getTemporalDecomposition();
Iterator<AudioSegment> audioSegmentIterator = audioSegments.segments();
if (audioSegmentIterator == null)
continue content;
while (audioSegmentIterator.hasNext()) {
AudioSegment segment = audioSegmentIterator.next();
Iterator<TextAnnotation> annotationIterator = segment.textAnnotations();
if (annotationIterator == null)
continue content;
while (annotationIterator.hasNext()) {
TextAnnotation annotation = annotationIterator.next();
String language = annotation.getLanguage();
if (language != null)
languages.add(language);
}
}
}
return languages.toArray(new String[languages.size()]);
}
use of com.google.cloud.vision.v1p3beta1.TextAnnotation in project opencast by opencast.
the class Mpeg7CaptionConverter method importCaption.
/**
* @see org.opencastproject.caption.api.CaptionConverter#importCaption(java.io.InputStream, java.lang.String)
*/
@SuppressWarnings("unchecked")
@Override
public List<Caption> importCaption(InputStream inputStream, String language) throws CaptionConverterException {
List<Caption> captions = new ArrayList<Caption>();
Mpeg7Catalog catalog = new Mpeg7CatalogImpl(inputStream);
Iterator<Audio> audioContentIterator = catalog.audioContent();
if (audioContentIterator == null)
return captions;
content: while (audioContentIterator.hasNext()) {
Audio audioContent = audioContentIterator.next();
TemporalDecomposition<AudioSegment> audioSegments = (TemporalDecomposition<AudioSegment>) audioContent.getTemporalDecomposition();
Iterator<AudioSegment> audioSegmentIterator = audioSegments.segments();
if (audioSegmentIterator == null)
continue content;
while (audioSegmentIterator.hasNext()) {
AudioSegment segment = audioSegmentIterator.next();
Iterator<TextAnnotation> annotationIterator = segment.textAnnotations();
if (annotationIterator == null)
continue content;
while (annotationIterator.hasNext()) {
TextAnnotation annotation = annotationIterator.next();
if (!annotation.getLanguage().equals(language)) {
logger.debug("Skipping audio content '{}' because of language mismatch", audioContent.getId());
continue content;
}
List<String> captionLines = new ArrayList<String>();
Iterator<FreeTextAnnotation> freeTextAnnotationIterator = annotation.freeTextAnnotations();
if (freeTextAnnotationIterator == null)
continue;
while (freeTextAnnotationIterator.hasNext()) {
FreeTextAnnotation freeTextAnnotation = freeTextAnnotationIterator.next();
captionLines.add(freeTextAnnotation.getText());
}
MediaTime segmentTime = segment.getMediaTime();
MediaTimePoint stp = segmentTime.getMediaTimePoint();
MediaDuration d = segmentTime.getMediaDuration();
Calendar startCalendar = Calendar.getInstance();
int millisAtStart = (int) (stp.getTimeInMilliseconds() - (((stp.getHour() * 60 + stp.getMinutes()) * 60 + stp.getSeconds()) * 1000));
int millisAtEnd = (int) (d.getDurationInMilliseconds() - (((d.getHours() * 60 + d.getMinutes()) * 60 + d.getSeconds()) * 1000));
startCalendar.set(Calendar.HOUR, stp.getHour());
startCalendar.set(Calendar.MINUTE, stp.getMinutes());
startCalendar.set(Calendar.SECOND, stp.getSeconds());
startCalendar.set(Calendar.MILLISECOND, millisAtStart);
startCalendar.add(Calendar.HOUR, d.getHours());
startCalendar.add(Calendar.MINUTE, d.getMinutes());
startCalendar.add(Calendar.SECOND, d.getSeconds());
startCalendar.set(Calendar.MILLISECOND, millisAtEnd);
try {
Time startTime = new TimeImpl(stp.getHour(), stp.getMinutes(), stp.getSeconds(), millisAtStart);
Time endTime = new TimeImpl(startCalendar.get(Calendar.HOUR), startCalendar.get(Calendar.MINUTE), startCalendar.get(Calendar.SECOND), startCalendar.get(Calendar.MILLISECOND));
Caption caption = new CaptionImpl(startTime, endTime, captionLines.toArray(new String[captionLines.size()]));
captions.add(caption);
} catch (IllegalTimeFormatException e) {
logger.warn("Error setting caption time: {}", e.getMessage());
}
}
}
}
return captions;
}
use of com.google.cloud.vision.v1p3beta1.TextAnnotation in project java-vision by googleapis.
the class DetectBeta method detectHandwrittenOcr.
// [END vision_localize_objects_gcs_beta]
// [START vision_handwritten_ocr_beta]
/**
* Performs handwritten text detection on a local image file.
*
* @param filePath The path to the local file to detect handwritten text on.
* @param out A {@link PrintStream} to write the results to.
* @throws Exception on errors while closing the client.
* @throws IOException on Input/Output errors.
*/
public static void detectHandwrittenOcr(String filePath, PrintStream out) throws Exception {
List<AnnotateImageRequest> requests = new ArrayList<>();
ByteString imgBytes = ByteString.readFrom(new FileInputStream(filePath));
Image img = Image.newBuilder().setContent(imgBytes).build();
Feature feat = Feature.newBuilder().setType(Type.DOCUMENT_TEXT_DETECTION).build();
// Set the Language Hint codes for handwritten OCR
ImageContext imageContext = ImageContext.newBuilder().addLanguageHints("en-t-i0-handwrit").build();
AnnotateImageRequest request = AnnotateImageRequest.newBuilder().addFeatures(feat).setImage(img).setImageContext(imageContext).build();
requests.add(request);
try (ImageAnnotatorClient client = ImageAnnotatorClient.create()) {
BatchAnnotateImagesResponse response = client.batchAnnotateImages(requests);
List<AnnotateImageResponse> responses = response.getResponsesList();
client.close();
for (AnnotateImageResponse res : responses) {
if (res.hasError()) {
out.printf("Error: %s\n", res.getError().getMessage());
return;
}
// For full list of available annotations, see http://g.co/cloud/vision/docs
TextAnnotation annotation = res.getFullTextAnnotation();
for (Page page : annotation.getPagesList()) {
String pageText = "";
for (Block block : page.getBlocksList()) {
String blockText = "";
for (Paragraph para : block.getParagraphsList()) {
String paraText = "";
for (Word word : para.getWordsList()) {
String wordText = "";
for (Symbol symbol : word.getSymbolsList()) {
wordText = wordText + symbol.getText();
out.format("Symbol text: %s (confidence: %f)\n", symbol.getText(), symbol.getConfidence());
}
out.format("Word text: %s (confidence: %f)\n\n", wordText, word.getConfidence());
paraText = String.format("%s %s", paraText, wordText);
}
// Output Example using Paragraph:
out.println("\nParagraph: \n" + paraText);
out.format("Paragraph Confidence: %f\n", para.getConfidence());
blockText = blockText + paraText;
}
pageText = pageText + blockText;
}
}
out.println("\nComplete annotation:");
out.println(annotation.getText());
}
}
}
Aggregations