use of org.opencastproject.caption.api.CaptionConverterException in project opencast by opencast.
the class IBMWatsonCaptionConverter method importCaption.
@Override
public List<Caption> importCaption(InputStream inputStream, String language) throws CaptionConverterException {
List<Caption> captionList = new ArrayList<Caption>();
JSONParser jsonParser = new JSONParser();
try {
JSONObject resultsObj = (JSONObject) jsonParser.parse(new InputStreamReader(inputStream));
String jobId = "Unknown";
if (resultsObj.get("id") != null)
jobId = (String) resultsObj.get("id");
// Log warnings
if (resultsObj.get("warnings") != null) {
JSONArray warningsArray = (JSONArray) resultsObj.get("warnings");
if (warningsArray != null) {
for (Object w : warningsArray) logger.warn("Warning from Speech-To-Text service: {}" + w);
}
}
JSONArray outerResultsArray = (JSONArray) resultsObj.get("results");
JSONObject obj = (JSONObject) outerResultsArray.get(0);
JSONArray resultsArray = (JSONArray) obj.get("results");
resultsLoop: for (int i = 0; i < resultsArray.size(); i++) {
JSONObject resultElement = (JSONObject) resultsArray.get(i);
// Ignore results that are not final
if (!(Boolean) resultElement.get("final"))
continue;
JSONArray alternativesArray = (JSONArray) resultElement.get("alternatives");
if (alternativesArray != null && alternativesArray.size() > 0) {
JSONObject alternativeElement = (JSONObject) alternativesArray.get(0);
String transcript = (String) alternativeElement.get("transcript");
if (transcript != null) {
JSONArray timestampsArray = (JSONArray) alternativeElement.get("timestamps");
if (timestampsArray == null || timestampsArray.size() == 0) {
logger.warn("Could not build caption object for job {}, result index {}: timestamp data not found", jobId, i);
continue;
}
// Force a maximum line size of LINE_SIZE + one word
String[] words = transcript.split("\\s+");
StringBuffer line = new StringBuffer();
int indexFirst = -1;
int indexLast = -1;
for (int j = 0; j < words.length; j++) {
if (indexFirst == -1)
indexFirst = j;
line.append(words[j]);
line.append(" ");
if (line.length() >= LINE_SIZE || j == words.length - 1) {
indexLast = j;
// Create a caption
double start = -1;
double end = -1;
if (indexLast < timestampsArray.size()) {
// Get start time of first element
JSONArray wordTsArray = (JSONArray) timestampsArray.get(indexFirst);
if (wordTsArray.size() == 3)
start = ((Number) wordTsArray.get(1)).doubleValue();
// Get end time of last element
wordTsArray = (JSONArray) timestampsArray.get(indexLast);
if (wordTsArray.size() == 3)
end = ((Number) wordTsArray.get(2)).doubleValue();
}
if (start == -1 || end == -1) {
logger.warn("Could not build caption object for job {}, result index {}: start/end times not found", jobId, i);
continue resultsLoop;
}
String[] captionLines = new String[1];
captionLines[0] = line.toString().replace("%HESITATION", "...");
captionList.add(new CaptionImpl(buildTime((long) (start * 1000)), buildTime((long) (end * 1000)), captionLines));
indexFirst = -1;
indexLast = -1;
line.setLength(0);
}
}
}
}
}
} catch (Exception e) {
logger.warn("Error when parsing IBM Watson transcriptions result: {}" + e.getMessage());
throw new CaptionConverterException(e);
}
return captionList;
}
use of org.opencastproject.caption.api.CaptionConverterException in project opencast by opencast.
the class SubRipCaptionConverter method importCaption.
/**
* {@inheritDoc} Since srt does not store information about language, language parameter is ignored.
*
* @see org.opencastproject.caption.api.CaptionConverter#importCaption(java.io.InputStream, java.lang.String)
*/
@Override
public List<Caption> importCaption(InputStream in, String language) throws CaptionConverterException {
List<Caption> collection = new ArrayList<Caption>();
// initialize scanner object
Scanner scanner = new Scanner(in, "UTF-8");
scanner.useDelimiter("[\n(\r\n)]{2}");
// create initial time
Time time = null;
try {
time = new TimeImpl(0, 0, 0, 0);
} catch (IllegalTimeFormatException e1) {
}
while (scanner.hasNext()) {
String captionString = scanner.next();
// convert line endings to \n
captionString = captionString.replace("\r\n", "\n");
// split to number, time and caption
String[] captionParts = captionString.split("\n", 3);
// check for table length
if (captionParts.length != 3) {
throw new CaptionConverterException("Invalid caption for SubRip format: " + captionString);
}
// get time part
String[] timePart = captionParts[1].split("-->");
// parse time
Time inTime;
Time outTime;
try {
inTime = TimeUtil.importSrt(timePart[0].trim());
outTime = TimeUtil.importSrt(timePart[1].trim());
} catch (IllegalTimeFormatException e) {
throw new CaptionConverterException(e.getMessage());
}
// check for time validity
if (inTime.compareTo(time) < 0 || outTime.compareTo(inTime) <= 0) {
logger.warn("Caption with invalid time encountered. Skipping...");
continue;
}
time = outTime;
// get text captions
String[] captionLines = createCaptionLines(captionParts[2]);
if (captionLines == null) {
throw new CaptionConverterException("Caption does not contain any caption text: " + captionString);
}
// create caption object and add to caption collection
Caption caption = new CaptionImpl(inTime, outTime, captionLines);
collection.add(caption);
}
return collection;
}
use of org.opencastproject.caption.api.CaptionConverterException in project opencast by opencast.
the class CaptionServiceImpl method getLanguageList.
/**
* {@inheritDoc}
*/
@Override
public String[] getLanguageList(MediaPackageElement input, String format) throws UnsupportedCaptionFormatException, CaptionConverterException {
if (format == null) {
throw new UnsupportedCaptionFormatException("<null>");
}
CaptionConverter converter = getCaptionConverter(format);
if (converter == null) {
throw new UnsupportedCaptionFormatException(format);
}
File captions;
try {
captions = workspace.get(input.getURI());
} catch (NotFoundException e) {
throw new CaptionConverterException("Requested media package element " + input + " could not be found.");
} catch (IOException e) {
throw new CaptionConverterException("Requested media package element " + input + "could not be accessed.");
}
FileInputStream stream = null;
String[] languageList;
try {
stream = new FileInputStream(captions);
languageList = converter.getLanguageList(stream);
} catch (FileNotFoundException e) {
throw new CaptionConverterException("Requested file " + captions + "could not be found.");
} finally {
IoSupport.closeQuietly(stream);
}
return languageList == null ? new String[0] : languageList;
}
use of org.opencastproject.caption.api.CaptionConverterException in project opencast by opencast.
the class CaptionServiceImpl method convert.
/**
* Converts the captions and returns them in a new catalog.
*
* @return the converted catalog
*/
protected MediaPackageElement convert(Job job, MediaPackageElement input, String inputFormat, String outputFormat, String language) throws UnsupportedCaptionFormatException, CaptionConverterException, MediaPackageException {
try {
// check parameters
if (input == null)
throw new IllegalArgumentException("Input element can't be null");
if (StringUtils.isBlank(inputFormat))
throw new IllegalArgumentException("Input format is null");
if (StringUtils.isBlank(outputFormat))
throw new IllegalArgumentException("Output format is null");
// get input file
File captionsFile;
try {
captionsFile = workspace.get(input.getURI());
} catch (NotFoundException e) {
throw new CaptionConverterException("Requested media package element " + input + " could not be found.");
} catch (IOException e) {
throw new CaptionConverterException("Requested media package element " + input + "could not be accessed.");
}
logger.debug("Atempting to convert from {} to {}...", inputFormat, outputFormat);
List<Caption> collection = null;
try {
collection = importCaptions(captionsFile, inputFormat, language);
logger.debug("Parsing to collection succeeded.");
} catch (UnsupportedCaptionFormatException e) {
throw new UnsupportedCaptionFormatException(inputFormat);
} catch (CaptionConverterException e) {
throw e;
}
URI exported;
try {
exported = exportCaptions(collection, job.getId() + "." + FilenameUtils.getExtension(captionsFile.getAbsolutePath()), outputFormat, language);
logger.debug("Exporting captions succeeding.");
} catch (UnsupportedCaptionFormatException e) {
throw new UnsupportedCaptionFormatException(outputFormat);
} catch (IOException e) {
throw new CaptionConverterException("Could not export caption collection.", e);
}
// create catalog and set properties
CaptionConverter converter = getCaptionConverter(outputFormat);
MediaPackageElementBuilder elementBuilder = MediaPackageElementBuilderFactory.newInstance().newElementBuilder();
MediaPackageElement mpe = elementBuilder.elementFromURI(exported, converter.getElementType(), new MediaPackageElementFlavor("captions", outputFormat + (language == null ? "" : "+" + language)));
if (mpe.getMimeType() == null) {
String[] mimetype = FileTypeMap.getDefaultFileTypeMap().getContentType(exported.getPath()).split("/");
mpe.setMimeType(mimeType(mimetype[0], mimetype[1]));
}
if (language != null)
mpe.addTag("lang:" + language);
return mpe;
} catch (Exception e) {
logger.warn("Error converting captions in " + input, e);
if (e instanceof CaptionConverterException) {
throw (CaptionConverterException) e;
} else if (e instanceof UnsupportedCaptionFormatException) {
throw (UnsupportedCaptionFormatException) e;
} else {
throw new CaptionConverterException(e);
}
}
}
use of org.opencastproject.caption.api.CaptionConverterException in project opencast by opencast.
the class CaptionServiceRemoteImpl method convert.
/**
* @see org.opencastproject.caption.api.CaptionService#convert(MediaPackageElement, String, String, String)
*/
@Override
public Job convert(MediaPackageElement input, String inputFormat, String outputFormat, String language) throws UnsupportedCaptionFormatException, CaptionConverterException, MediaPackageException {
HttpPost post = new HttpPost("/convert");
try {
List<BasicNameValuePair> params = new ArrayList<BasicNameValuePair>();
params.add(new BasicNameValuePair("captions", MediaPackageElementParser.getAsXml(input)));
params.add(new BasicNameValuePair("input", inputFormat));
params.add(new BasicNameValuePair("output", outputFormat));
if (StringUtils.isNotBlank(language))
params.add(new BasicNameValuePair("language", language));
post.setEntity(new UrlEncodedFormEntity(params));
} catch (Exception e) {
throw new CaptionConverterException(e);
}
HttpResponse response = null;
try {
response = getResponse(post);
if (response != null) {
String content = EntityUtils.toString(response.getEntity());
Job r = JobParser.parseJob(content);
logger.info("Converting job {} started on a remote caption service", r.getId());
return r;
}
} catch (Exception e) {
throw new CaptionConverterException("Unable to convert catalog " + input + " using a remote caption service", e);
} finally {
closeConnection(response);
}
throw new CaptionConverterException("Unable to convert catalog " + input + " using a remote caption service");
}
Aggregations