Search in sources :

Example 1 with Time

use of org.opencastproject.caption.api.Time in project opencast by opencast.

the class SubRipCaptionConverter method importCaption.

/**
 * {@inheritDoc} Since srt does not store information about language, language parameter is ignored.
 *
 * @see org.opencastproject.caption.api.CaptionConverter#importCaption(java.io.InputStream, java.lang.String)
 */
@Override
public List<Caption> importCaption(InputStream in, String language) throws CaptionConverterException {
    List<Caption> collection = new ArrayList<Caption>();
    // initialize scanner object
    Scanner scanner = new Scanner(in, "UTF-8");
    scanner.useDelimiter("[\n(\r\n)]{2}");
    // create initial time
    Time time = null;
    try {
        time = new TimeImpl(0, 0, 0, 0);
    } catch (IllegalTimeFormatException e1) {
    }
    while (scanner.hasNext()) {
        String captionString = scanner.next();
        // convert line endings to \n
        captionString = captionString.replace("\r\n", "\n");
        // split to number, time and caption
        String[] captionParts = captionString.split("\n", 3);
        // check for table length
        if (captionParts.length != 3) {
            throw new CaptionConverterException("Invalid caption for SubRip format: " + captionString);
        }
        // get time part
        String[] timePart = captionParts[1].split("-->");
        // parse time
        Time inTime;
        Time outTime;
        try {
            inTime = TimeUtil.importSrt(timePart[0].trim());
            outTime = TimeUtil.importSrt(timePart[1].trim());
        } catch (IllegalTimeFormatException e) {
            throw new CaptionConverterException(e.getMessage());
        }
        // check for time validity
        if (inTime.compareTo(time) < 0 || outTime.compareTo(inTime) <= 0) {
            logger.warn("Caption with invalid time encountered. Skipping...");
            continue;
        }
        time = outTime;
        // get text captions
        String[] captionLines = createCaptionLines(captionParts[2]);
        if (captionLines == null) {
            throw new CaptionConverterException("Caption does not contain any caption text: " + captionString);
        }
        // create caption object and add to caption collection
        Caption caption = new CaptionImpl(inTime, outTime, captionLines);
        collection.add(caption);
    }
    return collection;
}
Also used : Scanner(java.util.Scanner) CaptionConverterException(org.opencastproject.caption.api.CaptionConverterException) IllegalTimeFormatException(org.opencastproject.caption.api.IllegalTimeFormatException) CaptionImpl(org.opencastproject.caption.impl.CaptionImpl) ArrayList(java.util.ArrayList) Time(org.opencastproject.caption.api.Time) Caption(org.opencastproject.caption.api.Caption) TimeImpl(org.opencastproject.caption.impl.TimeImpl)

Example 2 with Time

use of org.opencastproject.caption.api.Time in project opencast by opencast.

the class Mpeg7CaptionConverter method importCaption.

/**
 * @see org.opencastproject.caption.api.CaptionConverter#importCaption(java.io.InputStream, java.lang.String)
 */
@SuppressWarnings("unchecked")
@Override
public List<Caption> importCaption(InputStream inputStream, String language) throws CaptionConverterException {
    List<Caption> captions = new ArrayList<Caption>();
    Mpeg7Catalog catalog = new Mpeg7CatalogImpl(inputStream);
    Iterator<Audio> audioContentIterator = catalog.audioContent();
    if (audioContentIterator == null)
        return captions;
    content: while (audioContentIterator.hasNext()) {
        Audio audioContent = audioContentIterator.next();
        TemporalDecomposition<AudioSegment> audioSegments = (TemporalDecomposition<AudioSegment>) audioContent.getTemporalDecomposition();
        Iterator<AudioSegment> audioSegmentIterator = audioSegments.segments();
        if (audioSegmentIterator == null)
            continue content;
        while (audioSegmentIterator.hasNext()) {
            AudioSegment segment = audioSegmentIterator.next();
            Iterator<TextAnnotation> annotationIterator = segment.textAnnotations();
            if (annotationIterator == null)
                continue content;
            while (annotationIterator.hasNext()) {
                TextAnnotation annotation = annotationIterator.next();
                if (!annotation.getLanguage().equals(language)) {
                    logger.debug("Skipping audio content '{}' because of language mismatch", audioContent.getId());
                    continue content;
                }
                List<String> captionLines = new ArrayList<String>();
                Iterator<FreeTextAnnotation> freeTextAnnotationIterator = annotation.freeTextAnnotations();
                if (freeTextAnnotationIterator == null)
                    continue;
                while (freeTextAnnotationIterator.hasNext()) {
                    FreeTextAnnotation freeTextAnnotation = freeTextAnnotationIterator.next();
                    captionLines.add(freeTextAnnotation.getText());
                }
                MediaTime segmentTime = segment.getMediaTime();
                MediaTimePoint stp = segmentTime.getMediaTimePoint();
                MediaDuration d = segmentTime.getMediaDuration();
                Calendar startCalendar = Calendar.getInstance();
                int millisAtStart = (int) (stp.getTimeInMilliseconds() - (((stp.getHour() * 60 + stp.getMinutes()) * 60 + stp.getSeconds()) * 1000));
                int millisAtEnd = (int) (d.getDurationInMilliseconds() - (((d.getHours() * 60 + d.getMinutes()) * 60 + d.getSeconds()) * 1000));
                startCalendar.set(Calendar.HOUR, stp.getHour());
                startCalendar.set(Calendar.MINUTE, stp.getMinutes());
                startCalendar.set(Calendar.SECOND, stp.getSeconds());
                startCalendar.set(Calendar.MILLISECOND, millisAtStart);
                startCalendar.add(Calendar.HOUR, d.getHours());
                startCalendar.add(Calendar.MINUTE, d.getMinutes());
                startCalendar.add(Calendar.SECOND, d.getSeconds());
                startCalendar.set(Calendar.MILLISECOND, millisAtEnd);
                try {
                    Time startTime = new TimeImpl(stp.getHour(), stp.getMinutes(), stp.getSeconds(), millisAtStart);
                    Time endTime = new TimeImpl(startCalendar.get(Calendar.HOUR), startCalendar.get(Calendar.MINUTE), startCalendar.get(Calendar.SECOND), startCalendar.get(Calendar.MILLISECOND));
                    Caption caption = new CaptionImpl(startTime, endTime, captionLines.toArray(new String[captionLines.size()]));
                    captions.add(caption);
                } catch (IllegalTimeFormatException e) {
                    logger.warn("Error setting caption time: {}", e.getMessage());
                }
            }
        }
    }
    return captions;
}
Also used : IllegalTimeFormatException(org.opencastproject.caption.api.IllegalTimeFormatException) MediaTimePoint(org.opencastproject.metadata.mpeg7.MediaTimePoint) Calendar(java.util.Calendar) ArrayList(java.util.ArrayList) MediaTime(org.opencastproject.metadata.mpeg7.MediaTime) Time(org.opencastproject.caption.api.Time) Caption(org.opencastproject.caption.api.Caption) FreeTextAnnotation(org.opencastproject.metadata.mpeg7.FreeTextAnnotation) TimeImpl(org.opencastproject.caption.impl.TimeImpl) MediaTimeImpl(org.opencastproject.metadata.mpeg7.MediaTimeImpl) Mpeg7Catalog(org.opencastproject.metadata.mpeg7.Mpeg7Catalog) CaptionImpl(org.opencastproject.caption.impl.CaptionImpl) MediaTime(org.opencastproject.metadata.mpeg7.MediaTime) Iterator(java.util.Iterator) MediaDuration(org.opencastproject.metadata.mpeg7.MediaDuration) Mpeg7CatalogImpl(org.opencastproject.metadata.mpeg7.Mpeg7CatalogImpl) ArrayList(java.util.ArrayList) List(java.util.List) TemporalDecomposition(org.opencastproject.metadata.mpeg7.TemporalDecomposition) Audio(org.opencastproject.metadata.mpeg7.Audio) TextAnnotation(org.opencastproject.metadata.mpeg7.TextAnnotation) FreeTextAnnotation(org.opencastproject.metadata.mpeg7.FreeTextAnnotation) AudioSegment(org.opencastproject.metadata.mpeg7.AudioSegment)

Example 3 with Time

use of org.opencastproject.caption.api.Time in project opencast by opencast.

the class IBMWatsonCaptionConverterTest method importCaption.

private void importCaption() throws Exception {
    List<Caption> captionList = converter.importCaption(inputStream, "");
    Assert.assertEquals(7, captionList.size());
    Caption caption = captionList.get(0);
    String[] text = caption.getCaption();
    Assert.assertEquals(1, text.length);
    Assert.assertEquals("in the earliest days it was a style of programming called imperative programming language ", text[0]);
    Time time = caption.getStartTime();
    Assert.assertEquals(0, time.getHours());
    Assert.assertEquals(0, time.getMinutes());
    Assert.assertEquals(0, time.getSeconds());
    Assert.assertEquals(750, time.getMilliseconds());
    time = caption.getStopTime();
    Assert.assertEquals(0, time.getHours());
    Assert.assertEquals(0, time.getMinutes());
    Assert.assertEquals(5, time.getSeconds());
    Assert.assertEquals(240, time.getMilliseconds());
    caption = captionList.get(1);
    text = caption.getCaption();
    Assert.assertEquals(1, text.length);
    Assert.assertEquals("principal example of that is the language see ", text[0]);
    time = caption.getStartTime();
    Assert.assertEquals(0, time.getHours());
    Assert.assertEquals(0, time.getMinutes());
    Assert.assertEquals(7, time.getSeconds());
    Assert.assertEquals(460, time.getMilliseconds());
    time = caption.getStopTime();
    Assert.assertEquals(0, time.getHours());
    Assert.assertEquals(0, time.getMinutes());
    Assert.assertEquals(10, time.getSeconds());
    Assert.assertEquals(150, time.getMilliseconds());
    caption = captionList.get(2);
    text = caption.getCaption();
    Assert.assertEquals(1, text.length);
    Assert.assertEquals("it is rather old because Sarah is fact stems from the late 19 seventies but he still use a great deal ", text[0]);
    time = caption.getStartTime();
    Assert.assertEquals(0, time.getHours());
    Assert.assertEquals(0, time.getMinutes());
    Assert.assertEquals(10, time.getSeconds());
    Assert.assertEquals(620, time.getMilliseconds());
    time = caption.getStopTime();
    Assert.assertEquals(0, time.getHours());
    Assert.assertEquals(0, time.getMinutes());
    Assert.assertEquals(18, time.getSeconds());
    Assert.assertEquals(110, time.getMilliseconds());
    caption = captionList.get(3);
    text = caption.getCaption();
    Assert.assertEquals(1, text.length);
    Assert.assertEquals("in fact is the principal programming language that's taught ", text[0]);
    time = caption.getStartTime();
    Assert.assertEquals(0, time.getHours());
    Assert.assertEquals(0, time.getMinutes());
    Assert.assertEquals(18, time.getSeconds());
    Assert.assertEquals(110, time.getMilliseconds());
    time = caption.getStopTime();
    Assert.assertEquals(0, time.getHours());
    Assert.assertEquals(0, time.getMinutes());
    Assert.assertEquals(20, time.getSeconds());
    Assert.assertEquals(960, time.getMilliseconds());
    caption = captionList.get(4);
    text = caption.getCaption();
    Assert.assertEquals(1, text.length);
    Assert.assertEquals("in a very popular ", text[0]);
    time = caption.getStartTime();
    Assert.assertEquals(0, time.getHours());
    Assert.assertEquals(0, time.getMinutes());
    Assert.assertEquals(21, time.getSeconds());
    Assert.assertEquals(490, time.getMilliseconds());
    time = caption.getStopTime();
    Assert.assertEquals(0, time.getHours());
    Assert.assertEquals(0, time.getMinutes());
    Assert.assertEquals(22, time.getSeconds());
    Assert.assertEquals(580, time.getMilliseconds());
    caption = captionList.get(5);
    text = caption.getCaption();
    Assert.assertEquals(1, text.length);
    Assert.assertEquals("a computer science course called CS 15 see if it is up to become the largest undergraduate course herpetological ", text[0]);
    time = caption.getStartTime();
    Assert.assertEquals(0, time.getHours());
    Assert.assertEquals(0, time.getMinutes());
    Assert.assertEquals(23, time.getSeconds());
    Assert.assertEquals(320, time.getMilliseconds());
    time = caption.getStopTime();
    Assert.assertEquals(0, time.getHours());
    Assert.assertEquals(0, time.getMinutes());
    Assert.assertEquals(28, time.getSeconds());
    Assert.assertEquals(900, time.getMilliseconds());
    caption = captionList.get(6);
    text = caption.getCaption();
    Assert.assertEquals(1, text.length);
    Assert.assertEquals("thing office who are extension ", text[0]);
    time = caption.getStartTime();
    Assert.assertEquals(0, time.getHours());
    Assert.assertEquals(0, time.getMinutes());
    Assert.assertEquals(28, time.getSeconds());
    Assert.assertEquals(900, time.getMilliseconds());
    time = caption.getStopTime();
    Assert.assertEquals(0, time.getHours());
    Assert.assertEquals(0, time.getMinutes());
    Assert.assertEquals(30, time.getSeconds());
    Assert.assertEquals(0, time.getMilliseconds());
}
Also used : Time(org.opencastproject.caption.api.Time) Caption(org.opencastproject.caption.api.Caption)

Example 4 with Time

use of org.opencastproject.caption.api.Time in project opencast by opencast.

the class DFXPCaptionConverter method parsePElement.

/**
 * Parse &lt;p&gt; element which contains one caption.
 *
 * @param p
 *          &lt;p&gt; element to be parsed
 * @return new {@link Caption} object
 * @throws IllegalTimeFormatException
 *           if time format does not match with expected format for DFXP
 */
private Caption parsePElement(Element p) throws IllegalTimeFormatException {
    Time begin = TimeUtil.importDFXP(p.getAttribute("begin").trim());
    Time end = TimeUtil.importDFXP(p.getAttribute("end").trim());
    // FIXME add logic for duration if end is absent
    // get text inside p
    String[] textArray = getTextCore(p).split("\n");
    return new CaptionImpl(begin, end, textArray);
}
Also used : CaptionImpl(org.opencastproject.caption.impl.CaptionImpl) Time(org.opencastproject.caption.api.Time)

Example 5 with Time

use of org.opencastproject.caption.api.Time in project opencast by opencast.

the class DFXPCaptionConverter method importCaption.

/**
 * {@inheritDoc} Parser used for parsing XML document is DOM parser. Language parameter will determine which language
 * is searched for and parsed. If there is no matching language, empty collection is returned. If language parameter
 * is <code>null</code> first language found is parsed.
 *
 * @see org.opencastproject.caption.api.CaptionConverter#importCaption(java.io.InputStream, java.lang.String)
 */
@Override
public List<Caption> importCaption(InputStream in, String language) throws CaptionConverterException {
    // create new collection
    List<Caption> collection = new ArrayList<Caption>();
    Document doc;
    try {
        DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
        DocumentBuilder builder = factory.newDocumentBuilder();
        doc = builder.parse(in);
        doc.getDocumentElement().normalize();
    } catch (ParserConfigurationException e) {
        throw new CaptionConverterException("Could not parse captions", e);
    } catch (SAXException e) {
        throw new CaptionConverterException("Could not parse captions", e);
    } catch (IOException e) {
        throw new CaptionConverterException("Could not parse captions", e);
    }
    // get all <div> elements since they contain information about language
    NodeList divElements = doc.getElementsByTagName("div");
    Element targetDiv = null;
    if (language != null) {
        // find first <div> element with matching language
        for (int i = 0; i < divElements.getLength(); i++) {
            Element n = (Element) divElements.item(i);
            if (n.getAttribute("xml:lang").equals(language)) {
                targetDiv = n;
                break;
            }
        }
    } else {
        if (divElements.getLength() > 1) {
            // more than one existing <div> element, no language specified
            logger.warn("More than one <div> element available. Parsing first one...");
        }
        if (divElements.getLength() != 0) {
            targetDiv = (Element) divElements.item(0);
        }
    }
    // check if we found node
    if (targetDiv == null) {
        logger.warn("No suitable <div> element found for language {}", language);
    } else {
        NodeList pElements = targetDiv.getElementsByTagName("p");
        // initialize start time
        Time time = null;
        try {
            time = new TimeImpl(0, 0, 0, 0);
        } catch (IllegalTimeFormatException e1) {
        }
        for (int i = 0; i < pElements.getLength(); i++) {
            try {
                Caption caption = parsePElement((Element) pElements.item(i));
                // check time
                if (caption.getStartTime().compareTo(time) < 0 || caption.getStopTime().compareTo(caption.getStartTime()) <= 0) {
                    logger.warn("Caption with invalid time encountered. Skipping...");
                    continue;
                }
                collection.add(caption);
            } catch (IllegalTimeFormatException e) {
                logger.warn("Caption with invalid time format encountered. Skipping...");
            }
        }
    }
    // return collection
    return collection;
}
Also used : CaptionConverterException(org.opencastproject.caption.api.CaptionConverterException) DocumentBuilderFactory(javax.xml.parsers.DocumentBuilderFactory) IllegalTimeFormatException(org.opencastproject.caption.api.IllegalTimeFormatException) NodeList(org.w3c.dom.NodeList) Element(org.w3c.dom.Element) MediaPackageElement(org.opencastproject.mediapackage.MediaPackageElement) ArrayList(java.util.ArrayList) Time(org.opencastproject.caption.api.Time) IOException(java.io.IOException) Document(org.w3c.dom.Document) Caption(org.opencastproject.caption.api.Caption) TimeImpl(org.opencastproject.caption.impl.TimeImpl) SAXException(org.xml.sax.SAXException) DocumentBuilder(javax.xml.parsers.DocumentBuilder) ParserConfigurationException(javax.xml.parsers.ParserConfigurationException)

Aggregations

Time (org.opencastproject.caption.api.Time)6 Caption (org.opencastproject.caption.api.Caption)5 ArrayList (java.util.ArrayList)3 IllegalTimeFormatException (org.opencastproject.caption.api.IllegalTimeFormatException)3 CaptionImpl (org.opencastproject.caption.impl.CaptionImpl)3 TimeImpl (org.opencastproject.caption.impl.TimeImpl)3 IOException (java.io.IOException)2 Calendar (java.util.Calendar)2 ParserConfigurationException (javax.xml.parsers.ParserConfigurationException)2 CaptionConverterException (org.opencastproject.caption.api.CaptionConverterException)2 Audio (org.opencastproject.metadata.mpeg7.Audio)2 AudioSegment (org.opencastproject.metadata.mpeg7.AudioSegment)2 FreeTextAnnotation (org.opencastproject.metadata.mpeg7.FreeTextAnnotation)2 MediaTime (org.opencastproject.metadata.mpeg7.MediaTime)2 MediaTimeImpl (org.opencastproject.metadata.mpeg7.MediaTimeImpl)2 MediaTimePoint (org.opencastproject.metadata.mpeg7.MediaTimePoint)2 Mpeg7Catalog (org.opencastproject.metadata.mpeg7.Mpeg7Catalog)2 TemporalDecomposition (org.opencastproject.metadata.mpeg7.TemporalDecomposition)2 TextAnnotation (org.opencastproject.metadata.mpeg7.TextAnnotation)2 Iterator (java.util.Iterator)1