use of org.opencastproject.caption.impl.CaptionImpl in project opencast by opencast.
the class IBMWatsonCaptionConverter method importCaption.
@Override
public List<Caption> importCaption(InputStream inputStream, String language) throws CaptionConverterException {
List<Caption> captionList = new ArrayList<Caption>();
JSONParser jsonParser = new JSONParser();
try {
JSONObject resultsObj = (JSONObject) jsonParser.parse(new InputStreamReader(inputStream));
String jobId = "Unknown";
if (resultsObj.get("id") != null)
jobId = (String) resultsObj.get("id");
// Log warnings
if (resultsObj.get("warnings") != null) {
JSONArray warningsArray = (JSONArray) resultsObj.get("warnings");
if (warningsArray != null) {
for (Object w : warningsArray) logger.warn("Warning from Speech-To-Text service: {}" + w);
}
}
JSONArray outerResultsArray = (JSONArray) resultsObj.get("results");
JSONObject obj = (JSONObject) outerResultsArray.get(0);
JSONArray resultsArray = (JSONArray) obj.get("results");
resultsLoop: for (int i = 0; i < resultsArray.size(); i++) {
JSONObject resultElement = (JSONObject) resultsArray.get(i);
// Ignore results that are not final
if (!(Boolean) resultElement.get("final"))
continue;
JSONArray alternativesArray = (JSONArray) resultElement.get("alternatives");
if (alternativesArray != null && alternativesArray.size() > 0) {
JSONObject alternativeElement = (JSONObject) alternativesArray.get(0);
String transcript = (String) alternativeElement.get("transcript");
if (transcript != null) {
JSONArray timestampsArray = (JSONArray) alternativeElement.get("timestamps");
if (timestampsArray == null || timestampsArray.size() == 0) {
logger.warn("Could not build caption object for job {}, result index {}: timestamp data not found", jobId, i);
continue;
}
// Force a maximum line size of LINE_SIZE + one word
String[] words = transcript.split("\\s+");
StringBuffer line = new StringBuffer();
int indexFirst = -1;
int indexLast = -1;
for (int j = 0; j < words.length; j++) {
if (indexFirst == -1)
indexFirst = j;
line.append(words[j]);
line.append(" ");
if (line.length() >= LINE_SIZE || j == words.length - 1) {
indexLast = j;
// Create a caption
double start = -1;
double end = -1;
if (indexLast < timestampsArray.size()) {
// Get start time of first element
JSONArray wordTsArray = (JSONArray) timestampsArray.get(indexFirst);
if (wordTsArray.size() == 3)
start = ((Number) wordTsArray.get(1)).doubleValue();
// Get end time of last element
wordTsArray = (JSONArray) timestampsArray.get(indexLast);
if (wordTsArray.size() == 3)
end = ((Number) wordTsArray.get(2)).doubleValue();
}
if (start == -1 || end == -1) {
logger.warn("Could not build caption object for job {}, result index {}: start/end times not found", jobId, i);
continue resultsLoop;
}
String[] captionLines = new String[1];
captionLines[0] = line.toString().replace("%HESITATION", "...");
captionList.add(new CaptionImpl(buildTime((long) (start * 1000)), buildTime((long) (end * 1000)), captionLines));
indexFirst = -1;
indexLast = -1;
line.setLength(0);
}
}
}
}
}
} catch (Exception e) {
logger.warn("Error when parsing IBM Watson transcriptions result: {}" + e.getMessage());
throw new CaptionConverterException(e);
}
return captionList;
}
use of org.opencastproject.caption.impl.CaptionImpl in project opencast by opencast.
the class SubRipCaptionConverter method importCaption.
/**
* {@inheritDoc} Since srt does not store information about language, language parameter is ignored.
*
* @see org.opencastproject.caption.api.CaptionConverter#importCaption(java.io.InputStream, java.lang.String)
*/
@Override
public List<Caption> importCaption(InputStream in, String language) throws CaptionConverterException {
List<Caption> collection = new ArrayList<Caption>();
// initialize scanner object
Scanner scanner = new Scanner(in, "UTF-8");
scanner.useDelimiter("[\n(\r\n)]{2}");
// create initial time
Time time = null;
try {
time = new TimeImpl(0, 0, 0, 0);
} catch (IllegalTimeFormatException e1) {
}
while (scanner.hasNext()) {
String captionString = scanner.next();
// convert line endings to \n
captionString = captionString.replace("\r\n", "\n");
// split to number, time and caption
String[] captionParts = captionString.split("\n", 3);
// check for table length
if (captionParts.length != 3) {
throw new CaptionConverterException("Invalid caption for SubRip format: " + captionString);
}
// get time part
String[] timePart = captionParts[1].split("-->");
// parse time
Time inTime;
Time outTime;
try {
inTime = TimeUtil.importSrt(timePart[0].trim());
outTime = TimeUtil.importSrt(timePart[1].trim());
} catch (IllegalTimeFormatException e) {
throw new CaptionConverterException(e.getMessage());
}
// check for time validity
if (inTime.compareTo(time) < 0 || outTime.compareTo(inTime) <= 0) {
logger.warn("Caption with invalid time encountered. Skipping...");
continue;
}
time = outTime;
// get text captions
String[] captionLines = createCaptionLines(captionParts[2]);
if (captionLines == null) {
throw new CaptionConverterException("Caption does not contain any caption text: " + captionString);
}
// create caption object and add to caption collection
Caption caption = new CaptionImpl(inTime, outTime, captionLines);
collection.add(caption);
}
return collection;
}
use of org.opencastproject.caption.impl.CaptionImpl in project opencast by opencast.
the class Mpeg7CaptionConverter method importCaption.
/**
* @see org.opencastproject.caption.api.CaptionConverter#importCaption(java.io.InputStream, java.lang.String)
*/
@SuppressWarnings("unchecked")
@Override
public List<Caption> importCaption(InputStream inputStream, String language) throws CaptionConverterException {
List<Caption> captions = new ArrayList<Caption>();
Mpeg7Catalog catalog = new Mpeg7CatalogImpl(inputStream);
Iterator<Audio> audioContentIterator = catalog.audioContent();
if (audioContentIterator == null)
return captions;
content: while (audioContentIterator.hasNext()) {
Audio audioContent = audioContentIterator.next();
TemporalDecomposition<AudioSegment> audioSegments = (TemporalDecomposition<AudioSegment>) audioContent.getTemporalDecomposition();
Iterator<AudioSegment> audioSegmentIterator = audioSegments.segments();
if (audioSegmentIterator == null)
continue content;
while (audioSegmentIterator.hasNext()) {
AudioSegment segment = audioSegmentIterator.next();
Iterator<TextAnnotation> annotationIterator = segment.textAnnotations();
if (annotationIterator == null)
continue content;
while (annotationIterator.hasNext()) {
TextAnnotation annotation = annotationIterator.next();
if (!annotation.getLanguage().equals(language)) {
logger.debug("Skipping audio content '{}' because of language mismatch", audioContent.getId());
continue content;
}
List<String> captionLines = new ArrayList<String>();
Iterator<FreeTextAnnotation> freeTextAnnotationIterator = annotation.freeTextAnnotations();
if (freeTextAnnotationIterator == null)
continue;
while (freeTextAnnotationIterator.hasNext()) {
FreeTextAnnotation freeTextAnnotation = freeTextAnnotationIterator.next();
captionLines.add(freeTextAnnotation.getText());
}
MediaTime segmentTime = segment.getMediaTime();
MediaTimePoint stp = segmentTime.getMediaTimePoint();
MediaDuration d = segmentTime.getMediaDuration();
Calendar startCalendar = Calendar.getInstance();
int millisAtStart = (int) (stp.getTimeInMilliseconds() - (((stp.getHour() * 60 + stp.getMinutes()) * 60 + stp.getSeconds()) * 1000));
int millisAtEnd = (int) (d.getDurationInMilliseconds() - (((d.getHours() * 60 + d.getMinutes()) * 60 + d.getSeconds()) * 1000));
startCalendar.set(Calendar.HOUR, stp.getHour());
startCalendar.set(Calendar.MINUTE, stp.getMinutes());
startCalendar.set(Calendar.SECOND, stp.getSeconds());
startCalendar.set(Calendar.MILLISECOND, millisAtStart);
startCalendar.add(Calendar.HOUR, d.getHours());
startCalendar.add(Calendar.MINUTE, d.getMinutes());
startCalendar.add(Calendar.SECOND, d.getSeconds());
startCalendar.set(Calendar.MILLISECOND, millisAtEnd);
try {
Time startTime = new TimeImpl(stp.getHour(), stp.getMinutes(), stp.getSeconds(), millisAtStart);
Time endTime = new TimeImpl(startCalendar.get(Calendar.HOUR), startCalendar.get(Calendar.MINUTE), startCalendar.get(Calendar.SECOND), startCalendar.get(Calendar.MILLISECOND));
Caption caption = new CaptionImpl(startTime, endTime, captionLines.toArray(new String[captionLines.size()]));
captions.add(caption);
} catch (IllegalTimeFormatException e) {
logger.warn("Error setting caption time: {}", e.getMessage());
}
}
}
}
return captions;
}
use of org.opencastproject.caption.impl.CaptionImpl in project opencast by opencast.
the class DFXPCaptionConverter method parsePElement.
/**
* Parse <p> element which contains one caption.
*
* @param p
* <p> element to be parsed
* @return new {@link Caption} object
* @throws IllegalTimeFormatException
* if time format does not match with expected format for DFXP
*/
private Caption parsePElement(Element p) throws IllegalTimeFormatException {
Time begin = TimeUtil.importDFXP(p.getAttribute("begin").trim());
Time end = TimeUtil.importDFXP(p.getAttribute("end").trim());
// FIXME add logic for duration if end is absent
// get text inside p
String[] textArray = getTextCore(p).split("\n");
return new CaptionImpl(begin, end, textArray);
}
use of org.opencastproject.caption.impl.CaptionImpl in project opencast by opencast.
the class WebVttCaptionConveterTest method testExport.
@Test
public void testExport() throws Exception {
List<Caption> captionList = new ArrayList<Caption>();
String[] captionLines1 = new String[1];
captionLines1[0] = CAPTION_LINE + "1.";
captionList.add(new CaptionImpl(new TimeImpl(0, 0, 49, 520), new TimeImpl(0, 0, 52, 961), captionLines1));
String[] captionLines2 = new String[1];
captionLines2[0] = CAPTION_LINE + "2.";
captionList.add(new CaptionImpl(new TimeImpl(0, 0, 54, 123), new TimeImpl(0, 0, 56, 456), captionLines2));
format.exportCaption(outputStream, captionList, null);
Assert.assertTrue(outputStream.toString("UTF-8").equals(expectedOutput));
}
Aggregations