use of org.opencastproject.caption.api.Time in project opencast by opencast.
the class SubRipCaptionConverter method importCaption.
/**
* {@inheritDoc} Since srt does not store information about language, language parameter is ignored.
*
* @see org.opencastproject.caption.api.CaptionConverter#importCaption(java.io.InputStream, java.lang.String)
*/
@Override
public List<Caption> importCaption(InputStream in, String language) throws CaptionConverterException {
List<Caption> collection = new ArrayList<Caption>();
// initialize scanner object
Scanner scanner = new Scanner(in, "UTF-8");
scanner.useDelimiter("[\n(\r\n)]{2}");
// create initial time
Time time = null;
try {
time = new TimeImpl(0, 0, 0, 0);
} catch (IllegalTimeFormatException e1) {
}
while (scanner.hasNext()) {
String captionString = scanner.next();
// convert line endings to \n
captionString = captionString.replace("\r\n", "\n");
// split to number, time and caption
String[] captionParts = captionString.split("\n", 3);
// check for table length
if (captionParts.length != 3) {
throw new CaptionConverterException("Invalid caption for SubRip format: " + captionString);
}
// get time part
String[] timePart = captionParts[1].split("-->");
// parse time
Time inTime;
Time outTime;
try {
inTime = TimeUtil.importSrt(timePart[0].trim());
outTime = TimeUtil.importSrt(timePart[1].trim());
} catch (IllegalTimeFormatException e) {
throw new CaptionConverterException(e.getMessage());
}
// check for time validity
if (inTime.compareTo(time) < 0 || outTime.compareTo(inTime) <= 0) {
logger.warn("Caption with invalid time encountered. Skipping...");
continue;
}
time = outTime;
// get text captions
String[] captionLines = createCaptionLines(captionParts[2]);
if (captionLines == null) {
throw new CaptionConverterException("Caption does not contain any caption text: " + captionString);
}
// create caption object and add to caption collection
Caption caption = new CaptionImpl(inTime, outTime, captionLines);
collection.add(caption);
}
return collection;
}
use of org.opencastproject.caption.api.Time in project opencast by opencast.
the class Mpeg7CaptionConverter method importCaption.
/**
* @see org.opencastproject.caption.api.CaptionConverter#importCaption(java.io.InputStream, java.lang.String)
*/
@SuppressWarnings("unchecked")
@Override
public List<Caption> importCaption(InputStream inputStream, String language) throws CaptionConverterException {
List<Caption> captions = new ArrayList<Caption>();
Mpeg7Catalog catalog = new Mpeg7CatalogImpl(inputStream);
Iterator<Audio> audioContentIterator = catalog.audioContent();
if (audioContentIterator == null)
return captions;
content: while (audioContentIterator.hasNext()) {
Audio audioContent = audioContentIterator.next();
TemporalDecomposition<AudioSegment> audioSegments = (TemporalDecomposition<AudioSegment>) audioContent.getTemporalDecomposition();
Iterator<AudioSegment> audioSegmentIterator = audioSegments.segments();
if (audioSegmentIterator == null)
continue content;
while (audioSegmentIterator.hasNext()) {
AudioSegment segment = audioSegmentIterator.next();
Iterator<TextAnnotation> annotationIterator = segment.textAnnotations();
if (annotationIterator == null)
continue content;
while (annotationIterator.hasNext()) {
TextAnnotation annotation = annotationIterator.next();
if (!annotation.getLanguage().equals(language)) {
logger.debug("Skipping audio content '{}' because of language mismatch", audioContent.getId());
continue content;
}
List<String> captionLines = new ArrayList<String>();
Iterator<FreeTextAnnotation> freeTextAnnotationIterator = annotation.freeTextAnnotations();
if (freeTextAnnotationIterator == null)
continue;
while (freeTextAnnotationIterator.hasNext()) {
FreeTextAnnotation freeTextAnnotation = freeTextAnnotationIterator.next();
captionLines.add(freeTextAnnotation.getText());
}
MediaTime segmentTime = segment.getMediaTime();
MediaTimePoint stp = segmentTime.getMediaTimePoint();
MediaDuration d = segmentTime.getMediaDuration();
Calendar startCalendar = Calendar.getInstance();
int millisAtStart = (int) (stp.getTimeInMilliseconds() - (((stp.getHour() * 60 + stp.getMinutes()) * 60 + stp.getSeconds()) * 1000));
int millisAtEnd = (int) (d.getDurationInMilliseconds() - (((d.getHours() * 60 + d.getMinutes()) * 60 + d.getSeconds()) * 1000));
startCalendar.set(Calendar.HOUR, stp.getHour());
startCalendar.set(Calendar.MINUTE, stp.getMinutes());
startCalendar.set(Calendar.SECOND, stp.getSeconds());
startCalendar.set(Calendar.MILLISECOND, millisAtStart);
startCalendar.add(Calendar.HOUR, d.getHours());
startCalendar.add(Calendar.MINUTE, d.getMinutes());
startCalendar.add(Calendar.SECOND, d.getSeconds());
startCalendar.set(Calendar.MILLISECOND, millisAtEnd);
try {
Time startTime = new TimeImpl(stp.getHour(), stp.getMinutes(), stp.getSeconds(), millisAtStart);
Time endTime = new TimeImpl(startCalendar.get(Calendar.HOUR), startCalendar.get(Calendar.MINUTE), startCalendar.get(Calendar.SECOND), startCalendar.get(Calendar.MILLISECOND));
Caption caption = new CaptionImpl(startTime, endTime, captionLines.toArray(new String[captionLines.size()]));
captions.add(caption);
} catch (IllegalTimeFormatException e) {
logger.warn("Error setting caption time: {}", e.getMessage());
}
}
}
}
return captions;
}
use of org.opencastproject.caption.api.Time in project opencast by opencast.
the class IBMWatsonCaptionConverterTest method importCaption.
private void importCaption() throws Exception {
List<Caption> captionList = converter.importCaption(inputStream, "");
Assert.assertEquals(7, captionList.size());
Caption caption = captionList.get(0);
String[] text = caption.getCaption();
Assert.assertEquals(1, text.length);
Assert.assertEquals("in the earliest days it was a style of programming called imperative programming language ", text[0]);
Time time = caption.getStartTime();
Assert.assertEquals(0, time.getHours());
Assert.assertEquals(0, time.getMinutes());
Assert.assertEquals(0, time.getSeconds());
Assert.assertEquals(750, time.getMilliseconds());
time = caption.getStopTime();
Assert.assertEquals(0, time.getHours());
Assert.assertEquals(0, time.getMinutes());
Assert.assertEquals(5, time.getSeconds());
Assert.assertEquals(240, time.getMilliseconds());
caption = captionList.get(1);
text = caption.getCaption();
Assert.assertEquals(1, text.length);
Assert.assertEquals("principal example of that is the language see ", text[0]);
time = caption.getStartTime();
Assert.assertEquals(0, time.getHours());
Assert.assertEquals(0, time.getMinutes());
Assert.assertEquals(7, time.getSeconds());
Assert.assertEquals(460, time.getMilliseconds());
time = caption.getStopTime();
Assert.assertEquals(0, time.getHours());
Assert.assertEquals(0, time.getMinutes());
Assert.assertEquals(10, time.getSeconds());
Assert.assertEquals(150, time.getMilliseconds());
caption = captionList.get(2);
text = caption.getCaption();
Assert.assertEquals(1, text.length);
Assert.assertEquals("it is rather old because Sarah is fact stems from the late 19 seventies but he still use a great deal ", text[0]);
time = caption.getStartTime();
Assert.assertEquals(0, time.getHours());
Assert.assertEquals(0, time.getMinutes());
Assert.assertEquals(10, time.getSeconds());
Assert.assertEquals(620, time.getMilliseconds());
time = caption.getStopTime();
Assert.assertEquals(0, time.getHours());
Assert.assertEquals(0, time.getMinutes());
Assert.assertEquals(18, time.getSeconds());
Assert.assertEquals(110, time.getMilliseconds());
caption = captionList.get(3);
text = caption.getCaption();
Assert.assertEquals(1, text.length);
Assert.assertEquals("in fact is the principal programming language that's taught ", text[0]);
time = caption.getStartTime();
Assert.assertEquals(0, time.getHours());
Assert.assertEquals(0, time.getMinutes());
Assert.assertEquals(18, time.getSeconds());
Assert.assertEquals(110, time.getMilliseconds());
time = caption.getStopTime();
Assert.assertEquals(0, time.getHours());
Assert.assertEquals(0, time.getMinutes());
Assert.assertEquals(20, time.getSeconds());
Assert.assertEquals(960, time.getMilliseconds());
caption = captionList.get(4);
text = caption.getCaption();
Assert.assertEquals(1, text.length);
Assert.assertEquals("in a very popular ", text[0]);
time = caption.getStartTime();
Assert.assertEquals(0, time.getHours());
Assert.assertEquals(0, time.getMinutes());
Assert.assertEquals(21, time.getSeconds());
Assert.assertEquals(490, time.getMilliseconds());
time = caption.getStopTime();
Assert.assertEquals(0, time.getHours());
Assert.assertEquals(0, time.getMinutes());
Assert.assertEquals(22, time.getSeconds());
Assert.assertEquals(580, time.getMilliseconds());
caption = captionList.get(5);
text = caption.getCaption();
Assert.assertEquals(1, text.length);
Assert.assertEquals("a computer science course called CS 15 see if it is up to become the largest undergraduate course herpetological ", text[0]);
time = caption.getStartTime();
Assert.assertEquals(0, time.getHours());
Assert.assertEquals(0, time.getMinutes());
Assert.assertEquals(23, time.getSeconds());
Assert.assertEquals(320, time.getMilliseconds());
time = caption.getStopTime();
Assert.assertEquals(0, time.getHours());
Assert.assertEquals(0, time.getMinutes());
Assert.assertEquals(28, time.getSeconds());
Assert.assertEquals(900, time.getMilliseconds());
caption = captionList.get(6);
text = caption.getCaption();
Assert.assertEquals(1, text.length);
Assert.assertEquals("thing office who are extension ", text[0]);
time = caption.getStartTime();
Assert.assertEquals(0, time.getHours());
Assert.assertEquals(0, time.getMinutes());
Assert.assertEquals(28, time.getSeconds());
Assert.assertEquals(900, time.getMilliseconds());
time = caption.getStopTime();
Assert.assertEquals(0, time.getHours());
Assert.assertEquals(0, time.getMinutes());
Assert.assertEquals(30, time.getSeconds());
Assert.assertEquals(0, time.getMilliseconds());
}
use of org.opencastproject.caption.api.Time in project opencast by opencast.
the class DFXPCaptionConverter method parsePElement.
/**
* Parse <p> element which contains one caption.
*
* @param p
* <p> element to be parsed
* @return new {@link Caption} object
* @throws IllegalTimeFormatException
* if time format does not match with expected format for DFXP
*/
private Caption parsePElement(Element p) throws IllegalTimeFormatException {
Time begin = TimeUtil.importDFXP(p.getAttribute("begin").trim());
Time end = TimeUtil.importDFXP(p.getAttribute("end").trim());
// FIXME add logic for duration if end is absent
// get text inside p
String[] textArray = getTextCore(p).split("\n");
return new CaptionImpl(begin, end, textArray);
}
use of org.opencastproject.caption.api.Time in project opencast by opencast.
the class DFXPCaptionConverter method importCaption.
/**
* {@inheritDoc} Parser used for parsing XML document is DOM parser. Language parameter will determine which language
* is searched for and parsed. If there is no matching language, empty collection is returned. If language parameter
* is <code>null</code> first language found is parsed.
*
* @see org.opencastproject.caption.api.CaptionConverter#importCaption(java.io.InputStream, java.lang.String)
*/
@Override
public List<Caption> importCaption(InputStream in, String language) throws CaptionConverterException {
// create new collection
List<Caption> collection = new ArrayList<Caption>();
Document doc;
try {
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
DocumentBuilder builder = factory.newDocumentBuilder();
doc = builder.parse(in);
doc.getDocumentElement().normalize();
} catch (ParserConfigurationException e) {
throw new CaptionConverterException("Could not parse captions", e);
} catch (SAXException e) {
throw new CaptionConverterException("Could not parse captions", e);
} catch (IOException e) {
throw new CaptionConverterException("Could not parse captions", e);
}
// get all <div> elements since they contain information about language
NodeList divElements = doc.getElementsByTagName("div");
Element targetDiv = null;
if (language != null) {
// find first <div> element with matching language
for (int i = 0; i < divElements.getLength(); i++) {
Element n = (Element) divElements.item(i);
if (n.getAttribute("xml:lang").equals(language)) {
targetDiv = n;
break;
}
}
} else {
if (divElements.getLength() > 1) {
// more than one existing <div> element, no language specified
logger.warn("More than one <div> element available. Parsing first one...");
}
if (divElements.getLength() != 0) {
targetDiv = (Element) divElements.item(0);
}
}
// check if we found node
if (targetDiv == null) {
logger.warn("No suitable <div> element found for language {}", language);
} else {
NodeList pElements = targetDiv.getElementsByTagName("p");
// initialize start time
Time time = null;
try {
time = new TimeImpl(0, 0, 0, 0);
} catch (IllegalTimeFormatException e1) {
}
for (int i = 0; i < pElements.getLength(); i++) {
try {
Caption caption = parsePElement((Element) pElements.item(i));
// check time
if (caption.getStartTime().compareTo(time) < 0 || caption.getStopTime().compareTo(caption.getStartTime()) <= 0) {
logger.warn("Caption with invalid time encountered. Skipping...");
continue;
}
collection.add(caption);
} catch (IllegalTimeFormatException e) {
logger.warn("Caption with invalid time format encountered. Skipping...");
}
}
}
// return collection
return collection;
}
Aggregations