Search in sources :

Example 6 with Caption

use of org.opencastproject.caption.api.Caption in project opencast by opencast.

the class IBMWatsonCaptionConverterTest method importCaption.

private void importCaption() throws Exception {
    List<Caption> captionList = converter.importCaption(inputStream, "");
    Assert.assertEquals(7, captionList.size());
    Caption caption = captionList.get(0);
    String[] text = caption.getCaption();
    Assert.assertEquals(1, text.length);
    Assert.assertEquals("in the earliest days it was a style of programming called imperative programming language ", text[0]);
    Time time = caption.getStartTime();
    Assert.assertEquals(0, time.getHours());
    Assert.assertEquals(0, time.getMinutes());
    Assert.assertEquals(0, time.getSeconds());
    Assert.assertEquals(750, time.getMilliseconds());
    time = caption.getStopTime();
    Assert.assertEquals(0, time.getHours());
    Assert.assertEquals(0, time.getMinutes());
    Assert.assertEquals(5, time.getSeconds());
    Assert.assertEquals(240, time.getMilliseconds());
    caption = captionList.get(1);
    text = caption.getCaption();
    Assert.assertEquals(1, text.length);
    Assert.assertEquals("principal example of that is the language see ", text[0]);
    time = caption.getStartTime();
    Assert.assertEquals(0, time.getHours());
    Assert.assertEquals(0, time.getMinutes());
    Assert.assertEquals(7, time.getSeconds());
    Assert.assertEquals(460, time.getMilliseconds());
    time = caption.getStopTime();
    Assert.assertEquals(0, time.getHours());
    Assert.assertEquals(0, time.getMinutes());
    Assert.assertEquals(10, time.getSeconds());
    Assert.assertEquals(150, time.getMilliseconds());
    caption = captionList.get(2);
    text = caption.getCaption();
    Assert.assertEquals(1, text.length);
    Assert.assertEquals("it is rather old because Sarah is fact stems from the late 19 seventies but he still use a great deal ", text[0]);
    time = caption.getStartTime();
    Assert.assertEquals(0, time.getHours());
    Assert.assertEquals(0, time.getMinutes());
    Assert.assertEquals(10, time.getSeconds());
    Assert.assertEquals(620, time.getMilliseconds());
    time = caption.getStopTime();
    Assert.assertEquals(0, time.getHours());
    Assert.assertEquals(0, time.getMinutes());
    Assert.assertEquals(18, time.getSeconds());
    Assert.assertEquals(110, time.getMilliseconds());
    caption = captionList.get(3);
    text = caption.getCaption();
    Assert.assertEquals(1, text.length);
    Assert.assertEquals("in fact is the principal programming language that's taught ", text[0]);
    time = caption.getStartTime();
    Assert.assertEquals(0, time.getHours());
    Assert.assertEquals(0, time.getMinutes());
    Assert.assertEquals(18, time.getSeconds());
    Assert.assertEquals(110, time.getMilliseconds());
    time = caption.getStopTime();
    Assert.assertEquals(0, time.getHours());
    Assert.assertEquals(0, time.getMinutes());
    Assert.assertEquals(20, time.getSeconds());
    Assert.assertEquals(960, time.getMilliseconds());
    caption = captionList.get(4);
    text = caption.getCaption();
    Assert.assertEquals(1, text.length);
    Assert.assertEquals("in a very popular ", text[0]);
    time = caption.getStartTime();
    Assert.assertEquals(0, time.getHours());
    Assert.assertEquals(0, time.getMinutes());
    Assert.assertEquals(21, time.getSeconds());
    Assert.assertEquals(490, time.getMilliseconds());
    time = caption.getStopTime();
    Assert.assertEquals(0, time.getHours());
    Assert.assertEquals(0, time.getMinutes());
    Assert.assertEquals(22, time.getSeconds());
    Assert.assertEquals(580, time.getMilliseconds());
    caption = captionList.get(5);
    text = caption.getCaption();
    Assert.assertEquals(1, text.length);
    Assert.assertEquals("a computer science course called CS 15 see if it is up to become the largest undergraduate course herpetological ", text[0]);
    time = caption.getStartTime();
    Assert.assertEquals(0, time.getHours());
    Assert.assertEquals(0, time.getMinutes());
    Assert.assertEquals(23, time.getSeconds());
    Assert.assertEquals(320, time.getMilliseconds());
    time = caption.getStopTime();
    Assert.assertEquals(0, time.getHours());
    Assert.assertEquals(0, time.getMinutes());
    Assert.assertEquals(28, time.getSeconds());
    Assert.assertEquals(900, time.getMilliseconds());
    caption = captionList.get(6);
    text = caption.getCaption();
    Assert.assertEquals(1, text.length);
    Assert.assertEquals("thing office who are extension ", text[0]);
    time = caption.getStartTime();
    Assert.assertEquals(0, time.getHours());
    Assert.assertEquals(0, time.getMinutes());
    Assert.assertEquals(28, time.getSeconds());
    Assert.assertEquals(900, time.getMilliseconds());
    time = caption.getStopTime();
    Assert.assertEquals(0, time.getHours());
    Assert.assertEquals(0, time.getMinutes());
    Assert.assertEquals(30, time.getSeconds());
    Assert.assertEquals(0, time.getMilliseconds());
}
Also used : Time(org.opencastproject.caption.api.Time) Caption(org.opencastproject.caption.api.Caption)

Example 7 with Caption

use of org.opencastproject.caption.api.Caption in project opencast by opencast.

the class SubRipCaptionConverter method exportCaption.

/**
 * {@inheritDoc} Since srt does not store information about language, language parameter is ignored.
 */
@Override
public void exportCaption(OutputStream outputStream, List<Caption> captions, String language) throws IOException {
    if (language != null) {
        logger.debug("SubRip format does not include language information. Ignoring language attribute.");
    }
    // initialize stream writer
    OutputStreamWriter osw = new OutputStreamWriter(outputStream, "UTF-8");
    BufferedWriter bw = new BufferedWriter(osw);
    // initialize counter
    int counter = 1;
    for (Caption caption : captions) {
        String captionString = String.format("%2$d%1$s%3$s --> %4$s%1$s%5$s%1$s%1$s", LINE_ENDING, counter, TimeUtil.exportToSrt(caption.getStartTime()), TimeUtil.exportToSrt(caption.getStopTime()), createCaptionText(caption.getCaption()));
        bw.append(captionString);
        counter++;
    }
    bw.flush();
    bw.close();
    osw.close();
}
Also used : OutputStreamWriter(java.io.OutputStreamWriter) Caption(org.opencastproject.caption.api.Caption) BufferedWriter(java.io.BufferedWriter)

Example 8 with Caption

use of org.opencastproject.caption.api.Caption in project opencast by opencast.

the class CaptionServiceImpl method importCaptions.

/**
 * Imports captions using registered converter engine and specified language.
 *
 * @param input
 *          file containing captions
 * @param inputFormat
 *          format of imported captions
 * @param language
 *          (optional) captions' language
 * @return {@link List} of parsed captions
 * @throws UnsupportedCaptionFormatException
 *           if there is no registered engine for given format
 * @throws IllegalCaptionFormatException
 *           if parser encounters exception
 */
private List<Caption> importCaptions(File input, String inputFormat, String language) throws UnsupportedCaptionFormatException, CaptionConverterException {
    // get input format
    CaptionConverter converter = getCaptionConverter(inputFormat);
    if (converter == null) {
        logger.error("No available caption format found for {}.", inputFormat);
        throw new UnsupportedCaptionFormatException(inputFormat);
    }
    FileInputStream fileStream = null;
    try {
        fileStream = new FileInputStream(input);
        List<Caption> collection = converter.importCaption(fileStream, language);
        return collection;
    } catch (FileNotFoundException e) {
        throw new CaptionConverterException("Could not locate file " + input);
    } finally {
        IOUtils.closeQuietly(fileStream);
    }
}
Also used : CaptionConverterException(org.opencastproject.caption.api.CaptionConverterException) UnsupportedCaptionFormatException(org.opencastproject.caption.api.UnsupportedCaptionFormatException) FileNotFoundException(java.io.FileNotFoundException) CaptionConverter(org.opencastproject.caption.api.CaptionConverter) FileInputStream(java.io.FileInputStream) Caption(org.opencastproject.caption.api.Caption)

Example 9 with Caption

use of org.opencastproject.caption.api.Caption in project opencast by opencast.

the class DFXPCaptionConverter method exportCaption.

/**
 * {@inheritDoc} DOM parser is used to parse template from which whole document is then constructed.
 */
@Override
public void exportCaption(OutputStream outputStream, List<Caption> captions, String language) throws IOException {
    // get document builder factory and parse template
    DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
    Document doc = null;
    InputStream is = null;
    try {
        DocumentBuilder builder = factory.newDocumentBuilder();
        // load dfxp template from file
        is = DFXPCaptionConverter.class.getResourceAsStream("/templates/template.dfxp.xml");
        doc = builder.parse(is);
    } catch (ParserConfigurationException e) {
        // should not happen
        throw new RuntimeException(e);
    } catch (SAXException e) {
        // should not happen unless template is invalid
        throw new RuntimeException(e);
    } catch (IOException e) {
        // should not happen
        throw new RuntimeException(e);
    } finally {
        IOUtils.closeQuietly(is);
    }
    // retrieve body element
    Node bodyNode = doc.getElementsByTagName("body").item(0);
    // create new div element with specified language
    Element divNode = doc.createElement("div");
    divNode.setAttribute("xml:lang", language != null ? language : "und");
    bodyNode.appendChild(divNode);
    // update document
    for (Caption caption : captions) {
        Element newNode = doc.createElement("p");
        newNode.setAttribute("begin", TimeUtil.exportToDFXP(caption.getStartTime()));
        newNode.setAttribute("end", TimeUtil.exportToDFXP(caption.getStopTime()));
        String[] captionText = caption.getCaption();
        // text part
        newNode.appendChild(doc.createTextNode(captionText[0]));
        for (int i = 1; i < captionText.length; i++) {
            newNode.appendChild(doc.createElement("br"));
            newNode.appendChild(doc.createTextNode(captionText[i]));
        }
        divNode.appendChild(newNode);
    }
    // initialize stream writer
    OutputStreamWriter osw = new OutputStreamWriter(outputStream, "UTF-8");
    StreamResult result = new StreamResult(osw);
    DOMSource source = new DOMSource(doc);
    TransformerFactory tfactory = TransformerFactory.newInstance();
    Transformer transformer;
    try {
        transformer = tfactory.newTransformer();
        transformer.transform(source, result);
        osw.flush();
    } catch (TransformerConfigurationException e) {
        // should not happen
        throw new RuntimeException(e);
    } catch (TransformerException e) {
        // should not happen
        throw new RuntimeException(e);
    } finally {
        IOUtils.closeQuietly(osw);
    }
}
Also used : DOMSource(javax.xml.transform.dom.DOMSource) DocumentBuilderFactory(javax.xml.parsers.DocumentBuilderFactory) TransformerFactory(javax.xml.transform.TransformerFactory) Transformer(javax.xml.transform.Transformer) StreamResult(javax.xml.transform.stream.StreamResult) TransformerConfigurationException(javax.xml.transform.TransformerConfigurationException) InputStream(java.io.InputStream) Node(org.w3c.dom.Node) Element(org.w3c.dom.Element) MediaPackageElement(org.opencastproject.mediapackage.MediaPackageElement) IOException(java.io.IOException) Document(org.w3c.dom.Document) Caption(org.opencastproject.caption.api.Caption) SAXException(org.xml.sax.SAXException) DocumentBuilder(javax.xml.parsers.DocumentBuilder) OutputStreamWriter(java.io.OutputStreamWriter) ParserConfigurationException(javax.xml.parsers.ParserConfigurationException) TransformerException(javax.xml.transform.TransformerException)

Example 10 with Caption

use of org.opencastproject.caption.api.Caption in project opencast by opencast.

the class DFXPCaptionConverter method importCaption.

/**
 * {@inheritDoc} Parser used for parsing XML document is DOM parser. Language parameter will determine which language
 * is searched for and parsed. If there is no matching language, empty collection is returned. If language parameter
 * is <code>null</code> first language found is parsed.
 *
 * @see org.opencastproject.caption.api.CaptionConverter#importCaption(java.io.InputStream, java.lang.String)
 */
@Override
public List<Caption> importCaption(InputStream in, String language) throws CaptionConverterException {
    // create new collection
    List<Caption> collection = new ArrayList<Caption>();
    Document doc;
    try {
        DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
        DocumentBuilder builder = factory.newDocumentBuilder();
        doc = builder.parse(in);
        doc.getDocumentElement().normalize();
    } catch (ParserConfigurationException e) {
        throw new CaptionConverterException("Could not parse captions", e);
    } catch (SAXException e) {
        throw new CaptionConverterException("Could not parse captions", e);
    } catch (IOException e) {
        throw new CaptionConverterException("Could not parse captions", e);
    }
    // get all <div> elements since they contain information about language
    NodeList divElements = doc.getElementsByTagName("div");
    Element targetDiv = null;
    if (language != null) {
        // find first <div> element with matching language
        for (int i = 0; i < divElements.getLength(); i++) {
            Element n = (Element) divElements.item(i);
            if (n.getAttribute("xml:lang").equals(language)) {
                targetDiv = n;
                break;
            }
        }
    } else {
        if (divElements.getLength() > 1) {
            // more than one existing <div> element, no language specified
            logger.warn("More than one <div> element available. Parsing first one...");
        }
        if (divElements.getLength() != 0) {
            targetDiv = (Element) divElements.item(0);
        }
    }
    // check if we found node
    if (targetDiv == null) {
        logger.warn("No suitable <div> element found for language {}", language);
    } else {
        NodeList pElements = targetDiv.getElementsByTagName("p");
        // initialize start time
        Time time = null;
        try {
            time = new TimeImpl(0, 0, 0, 0);
        } catch (IllegalTimeFormatException e1) {
        }
        for (int i = 0; i < pElements.getLength(); i++) {
            try {
                Caption caption = parsePElement((Element) pElements.item(i));
                // check time
                if (caption.getStartTime().compareTo(time) < 0 || caption.getStopTime().compareTo(caption.getStartTime()) <= 0) {
                    logger.warn("Caption with invalid time encountered. Skipping...");
                    continue;
                }
                collection.add(caption);
            } catch (IllegalTimeFormatException e) {
                logger.warn("Caption with invalid time format encountered. Skipping...");
            }
        }
    }
    // return collection
    return collection;
}
Also used : CaptionConverterException(org.opencastproject.caption.api.CaptionConverterException) DocumentBuilderFactory(javax.xml.parsers.DocumentBuilderFactory) IllegalTimeFormatException(org.opencastproject.caption.api.IllegalTimeFormatException) NodeList(org.w3c.dom.NodeList) Element(org.w3c.dom.Element) MediaPackageElement(org.opencastproject.mediapackage.MediaPackageElement) ArrayList(java.util.ArrayList) Time(org.opencastproject.caption.api.Time) IOException(java.io.IOException) Document(org.w3c.dom.Document) Caption(org.opencastproject.caption.api.Caption) TimeImpl(org.opencastproject.caption.impl.TimeImpl) SAXException(org.xml.sax.SAXException) DocumentBuilder(javax.xml.parsers.DocumentBuilder) ParserConfigurationException(javax.xml.parsers.ParserConfigurationException)

Aggregations

Caption (org.opencastproject.caption.api.Caption)13 IOException (java.io.IOException)6 CaptionConverterException (org.opencastproject.caption.api.CaptionConverterException)6 ArrayList (java.util.ArrayList)5 IllegalTimeFormatException (org.opencastproject.caption.api.IllegalTimeFormatException)5 Time (org.opencastproject.caption.api.Time)5 CaptionImpl (org.opencastproject.caption.impl.CaptionImpl)4 TimeImpl (org.opencastproject.caption.impl.TimeImpl)4 OutputStreamWriter (java.io.OutputStreamWriter)3 ParserConfigurationException (javax.xml.parsers.ParserConfigurationException)3 MediaPackageElement (org.opencastproject.mediapackage.MediaPackageElement)3 BufferedWriter (java.io.BufferedWriter)2 FileNotFoundException (java.io.FileNotFoundException)2 Calendar (java.util.Calendar)2 DocumentBuilder (javax.xml.parsers.DocumentBuilder)2 DocumentBuilderFactory (javax.xml.parsers.DocumentBuilderFactory)2 Transformer (javax.xml.transform.Transformer)2 TransformerConfigurationException (javax.xml.transform.TransformerConfigurationException)2 TransformerException (javax.xml.transform.TransformerException)2 DOMSource (javax.xml.transform.dom.DOMSource)2