use of org.opencastproject.caption.api.Caption in project opencast by opencast.
the class IBMWatsonCaptionConverterTest method importCaption.
private void importCaption() throws Exception {
List<Caption> captionList = converter.importCaption(inputStream, "");
Assert.assertEquals(7, captionList.size());
Caption caption = captionList.get(0);
String[] text = caption.getCaption();
Assert.assertEquals(1, text.length);
Assert.assertEquals("in the earliest days it was a style of programming called imperative programming language ", text[0]);
Time time = caption.getStartTime();
Assert.assertEquals(0, time.getHours());
Assert.assertEquals(0, time.getMinutes());
Assert.assertEquals(0, time.getSeconds());
Assert.assertEquals(750, time.getMilliseconds());
time = caption.getStopTime();
Assert.assertEquals(0, time.getHours());
Assert.assertEquals(0, time.getMinutes());
Assert.assertEquals(5, time.getSeconds());
Assert.assertEquals(240, time.getMilliseconds());
caption = captionList.get(1);
text = caption.getCaption();
Assert.assertEquals(1, text.length);
Assert.assertEquals("principal example of that is the language see ", text[0]);
time = caption.getStartTime();
Assert.assertEquals(0, time.getHours());
Assert.assertEquals(0, time.getMinutes());
Assert.assertEquals(7, time.getSeconds());
Assert.assertEquals(460, time.getMilliseconds());
time = caption.getStopTime();
Assert.assertEquals(0, time.getHours());
Assert.assertEquals(0, time.getMinutes());
Assert.assertEquals(10, time.getSeconds());
Assert.assertEquals(150, time.getMilliseconds());
caption = captionList.get(2);
text = caption.getCaption();
Assert.assertEquals(1, text.length);
Assert.assertEquals("it is rather old because Sarah is fact stems from the late 19 seventies but he still use a great deal ", text[0]);
time = caption.getStartTime();
Assert.assertEquals(0, time.getHours());
Assert.assertEquals(0, time.getMinutes());
Assert.assertEquals(10, time.getSeconds());
Assert.assertEquals(620, time.getMilliseconds());
time = caption.getStopTime();
Assert.assertEquals(0, time.getHours());
Assert.assertEquals(0, time.getMinutes());
Assert.assertEquals(18, time.getSeconds());
Assert.assertEquals(110, time.getMilliseconds());
caption = captionList.get(3);
text = caption.getCaption();
Assert.assertEquals(1, text.length);
Assert.assertEquals("in fact is the principal programming language that's taught ", text[0]);
time = caption.getStartTime();
Assert.assertEquals(0, time.getHours());
Assert.assertEquals(0, time.getMinutes());
Assert.assertEquals(18, time.getSeconds());
Assert.assertEquals(110, time.getMilliseconds());
time = caption.getStopTime();
Assert.assertEquals(0, time.getHours());
Assert.assertEquals(0, time.getMinutes());
Assert.assertEquals(20, time.getSeconds());
Assert.assertEquals(960, time.getMilliseconds());
caption = captionList.get(4);
text = caption.getCaption();
Assert.assertEquals(1, text.length);
Assert.assertEquals("in a very popular ", text[0]);
time = caption.getStartTime();
Assert.assertEquals(0, time.getHours());
Assert.assertEquals(0, time.getMinutes());
Assert.assertEquals(21, time.getSeconds());
Assert.assertEquals(490, time.getMilliseconds());
time = caption.getStopTime();
Assert.assertEquals(0, time.getHours());
Assert.assertEquals(0, time.getMinutes());
Assert.assertEquals(22, time.getSeconds());
Assert.assertEquals(580, time.getMilliseconds());
caption = captionList.get(5);
text = caption.getCaption();
Assert.assertEquals(1, text.length);
Assert.assertEquals("a computer science course called CS 15 see if it is up to become the largest undergraduate course herpetological ", text[0]);
time = caption.getStartTime();
Assert.assertEquals(0, time.getHours());
Assert.assertEquals(0, time.getMinutes());
Assert.assertEquals(23, time.getSeconds());
Assert.assertEquals(320, time.getMilliseconds());
time = caption.getStopTime();
Assert.assertEquals(0, time.getHours());
Assert.assertEquals(0, time.getMinutes());
Assert.assertEquals(28, time.getSeconds());
Assert.assertEquals(900, time.getMilliseconds());
caption = captionList.get(6);
text = caption.getCaption();
Assert.assertEquals(1, text.length);
Assert.assertEquals("thing office who are extension ", text[0]);
time = caption.getStartTime();
Assert.assertEquals(0, time.getHours());
Assert.assertEquals(0, time.getMinutes());
Assert.assertEquals(28, time.getSeconds());
Assert.assertEquals(900, time.getMilliseconds());
time = caption.getStopTime();
Assert.assertEquals(0, time.getHours());
Assert.assertEquals(0, time.getMinutes());
Assert.assertEquals(30, time.getSeconds());
Assert.assertEquals(0, time.getMilliseconds());
}
use of org.opencastproject.caption.api.Caption in project opencast by opencast.
the class SubRipCaptionConverter method exportCaption.
/**
* {@inheritDoc} Since srt does not store information about language, language parameter is ignored.
*/
@Override
public void exportCaption(OutputStream outputStream, List<Caption> captions, String language) throws IOException {
if (language != null) {
logger.debug("SubRip format does not include language information. Ignoring language attribute.");
}
// initialize stream writer
OutputStreamWriter osw = new OutputStreamWriter(outputStream, "UTF-8");
BufferedWriter bw = new BufferedWriter(osw);
// initialize counter
int counter = 1;
for (Caption caption : captions) {
String captionString = String.format("%2$d%1$s%3$s --> %4$s%1$s%5$s%1$s%1$s", LINE_ENDING, counter, TimeUtil.exportToSrt(caption.getStartTime()), TimeUtil.exportToSrt(caption.getStopTime()), createCaptionText(caption.getCaption()));
bw.append(captionString);
counter++;
}
bw.flush();
bw.close();
osw.close();
}
use of org.opencastproject.caption.api.Caption in project opencast by opencast.
the class CaptionServiceImpl method importCaptions.
/**
* Imports captions using registered converter engine and specified language.
*
* @param input
* file containing captions
* @param inputFormat
* format of imported captions
* @param language
* (optional) captions' language
* @return {@link List} of parsed captions
* @throws UnsupportedCaptionFormatException
* if there is no registered engine for given format
* @throws IllegalCaptionFormatException
* if parser encounters exception
*/
private List<Caption> importCaptions(File input, String inputFormat, String language) throws UnsupportedCaptionFormatException, CaptionConverterException {
// get input format
CaptionConverter converter = getCaptionConverter(inputFormat);
if (converter == null) {
logger.error("No available caption format found for {}.", inputFormat);
throw new UnsupportedCaptionFormatException(inputFormat);
}
FileInputStream fileStream = null;
try {
fileStream = new FileInputStream(input);
List<Caption> collection = converter.importCaption(fileStream, language);
return collection;
} catch (FileNotFoundException e) {
throw new CaptionConverterException("Could not locate file " + input);
} finally {
IOUtils.closeQuietly(fileStream);
}
}
use of org.opencastproject.caption.api.Caption in project opencast by opencast.
the class DFXPCaptionConverter method exportCaption.
/**
* {@inheritDoc} DOM parser is used to parse template from which whole document is then constructed.
*/
@Override
public void exportCaption(OutputStream outputStream, List<Caption> captions, String language) throws IOException {
// get document builder factory and parse template
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
Document doc = null;
InputStream is = null;
try {
DocumentBuilder builder = factory.newDocumentBuilder();
// load dfxp template from file
is = DFXPCaptionConverter.class.getResourceAsStream("/templates/template.dfxp.xml");
doc = builder.parse(is);
} catch (ParserConfigurationException e) {
// should not happen
throw new RuntimeException(e);
} catch (SAXException e) {
// should not happen unless template is invalid
throw new RuntimeException(e);
} catch (IOException e) {
// should not happen
throw new RuntimeException(e);
} finally {
IOUtils.closeQuietly(is);
}
// retrieve body element
Node bodyNode = doc.getElementsByTagName("body").item(0);
// create new div element with specified language
Element divNode = doc.createElement("div");
divNode.setAttribute("xml:lang", language != null ? language : "und");
bodyNode.appendChild(divNode);
// update document
for (Caption caption : captions) {
Element newNode = doc.createElement("p");
newNode.setAttribute("begin", TimeUtil.exportToDFXP(caption.getStartTime()));
newNode.setAttribute("end", TimeUtil.exportToDFXP(caption.getStopTime()));
String[] captionText = caption.getCaption();
// text part
newNode.appendChild(doc.createTextNode(captionText[0]));
for (int i = 1; i < captionText.length; i++) {
newNode.appendChild(doc.createElement("br"));
newNode.appendChild(doc.createTextNode(captionText[i]));
}
divNode.appendChild(newNode);
}
// initialize stream writer
OutputStreamWriter osw = new OutputStreamWriter(outputStream, "UTF-8");
StreamResult result = new StreamResult(osw);
DOMSource source = new DOMSource(doc);
TransformerFactory tfactory = TransformerFactory.newInstance();
Transformer transformer;
try {
transformer = tfactory.newTransformer();
transformer.transform(source, result);
osw.flush();
} catch (TransformerConfigurationException e) {
// should not happen
throw new RuntimeException(e);
} catch (TransformerException e) {
// should not happen
throw new RuntimeException(e);
} finally {
IOUtils.closeQuietly(osw);
}
}
use of org.opencastproject.caption.api.Caption in project opencast by opencast.
the class DFXPCaptionConverter method importCaption.
/**
* {@inheritDoc} Parser used for parsing XML document is DOM parser. Language parameter will determine which language
* is searched for and parsed. If there is no matching language, empty collection is returned. If language parameter
* is <code>null</code> first language found is parsed.
*
* @see org.opencastproject.caption.api.CaptionConverter#importCaption(java.io.InputStream, java.lang.String)
*/
@Override
public List<Caption> importCaption(InputStream in, String language) throws CaptionConverterException {
// create new collection
List<Caption> collection = new ArrayList<Caption>();
Document doc;
try {
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
DocumentBuilder builder = factory.newDocumentBuilder();
doc = builder.parse(in);
doc.getDocumentElement().normalize();
} catch (ParserConfigurationException e) {
throw new CaptionConverterException("Could not parse captions", e);
} catch (SAXException e) {
throw new CaptionConverterException("Could not parse captions", e);
} catch (IOException e) {
throw new CaptionConverterException("Could not parse captions", e);
}
// get all <div> elements since they contain information about language
NodeList divElements = doc.getElementsByTagName("div");
Element targetDiv = null;
if (language != null) {
// find first <div> element with matching language
for (int i = 0; i < divElements.getLength(); i++) {
Element n = (Element) divElements.item(i);
if (n.getAttribute("xml:lang").equals(language)) {
targetDiv = n;
break;
}
}
} else {
if (divElements.getLength() > 1) {
// more than one existing <div> element, no language specified
logger.warn("More than one <div> element available. Parsing first one...");
}
if (divElements.getLength() != 0) {
targetDiv = (Element) divElements.item(0);
}
}
// check if we found node
if (targetDiv == null) {
logger.warn("No suitable <div> element found for language {}", language);
} else {
NodeList pElements = targetDiv.getElementsByTagName("p");
// initialize start time
Time time = null;
try {
time = new TimeImpl(0, 0, 0, 0);
} catch (IllegalTimeFormatException e1) {
}
for (int i = 0; i < pElements.getLength(); i++) {
try {
Caption caption = parsePElement((Element) pElements.item(i));
// check time
if (caption.getStartTime().compareTo(time) < 0 || caption.getStopTime().compareTo(caption.getStartTime()) <= 0) {
logger.warn("Caption with invalid time encountered. Skipping...");
continue;
}
collection.add(caption);
} catch (IllegalTimeFormatException e) {
logger.warn("Caption with invalid time format encountered. Skipping...");
}
}
}
// return collection
return collection;
}
Aggregations