Search in sources :

Example 1 with CaptionConverterException

use of org.opencastproject.caption.api.CaptionConverterException in project opencast by opencast.

the class IBMWatsonCaptionConverter method importCaption.

@Override
public List<Caption> importCaption(InputStream inputStream, String language) throws CaptionConverterException {
    List<Caption> captionList = new ArrayList<Caption>();
    JSONParser jsonParser = new JSONParser();
    try {
        JSONObject resultsObj = (JSONObject) jsonParser.parse(new InputStreamReader(inputStream));
        String jobId = "Unknown";
        if (resultsObj.get("id") != null)
            jobId = (String) resultsObj.get("id");
        // Log warnings
        if (resultsObj.get("warnings") != null) {
            JSONArray warningsArray = (JSONArray) resultsObj.get("warnings");
            if (warningsArray != null) {
                for (Object w : warningsArray) logger.warn("Warning from Speech-To-Text service: {}" + w);
            }
        }
        JSONArray outerResultsArray = (JSONArray) resultsObj.get("results");
        JSONObject obj = (JSONObject) outerResultsArray.get(0);
        JSONArray resultsArray = (JSONArray) obj.get("results");
        resultsLoop: for (int i = 0; i < resultsArray.size(); i++) {
            JSONObject resultElement = (JSONObject) resultsArray.get(i);
            // Ignore results that are not final
            if (!(Boolean) resultElement.get("final"))
                continue;
            JSONArray alternativesArray = (JSONArray) resultElement.get("alternatives");
            if (alternativesArray != null && alternativesArray.size() > 0) {
                JSONObject alternativeElement = (JSONObject) alternativesArray.get(0);
                String transcript = (String) alternativeElement.get("transcript");
                if (transcript != null) {
                    JSONArray timestampsArray = (JSONArray) alternativeElement.get("timestamps");
                    if (timestampsArray == null || timestampsArray.size() == 0) {
                        logger.warn("Could not build caption object for job {}, result index {}: timestamp data not found", jobId, i);
                        continue;
                    }
                    // Force a maximum line size of LINE_SIZE + one word
                    String[] words = transcript.split("\\s+");
                    StringBuffer line = new StringBuffer();
                    int indexFirst = -1;
                    int indexLast = -1;
                    for (int j = 0; j < words.length; j++) {
                        if (indexFirst == -1)
                            indexFirst = j;
                        line.append(words[j]);
                        line.append(" ");
                        if (line.length() >= LINE_SIZE || j == words.length - 1) {
                            indexLast = j;
                            // Create a caption
                            double start = -1;
                            double end = -1;
                            if (indexLast < timestampsArray.size()) {
                                // Get start time of first element
                                JSONArray wordTsArray = (JSONArray) timestampsArray.get(indexFirst);
                                if (wordTsArray.size() == 3)
                                    start = ((Number) wordTsArray.get(1)).doubleValue();
                                // Get end time of last element
                                wordTsArray = (JSONArray) timestampsArray.get(indexLast);
                                if (wordTsArray.size() == 3)
                                    end = ((Number) wordTsArray.get(2)).doubleValue();
                            }
                            if (start == -1 || end == -1) {
                                logger.warn("Could not build caption object for job {}, result index {}: start/end times not found", jobId, i);
                                continue resultsLoop;
                            }
                            String[] captionLines = new String[1];
                            captionLines[0] = line.toString().replace("%HESITATION", "...");
                            captionList.add(new CaptionImpl(buildTime((long) (start * 1000)), buildTime((long) (end * 1000)), captionLines));
                            indexFirst = -1;
                            indexLast = -1;
                            line.setLength(0);
                        }
                    }
                }
            }
        }
    } catch (Exception e) {
        logger.warn("Error when parsing IBM Watson transcriptions result: {}" + e.getMessage());
        throw new CaptionConverterException(e);
    }
    return captionList;
}
Also used : CaptionConverterException(org.opencastproject.caption.api.CaptionConverterException) InputStreamReader(java.io.InputStreamReader) ArrayList(java.util.ArrayList) JSONArray(org.json.simple.JSONArray) Caption(org.opencastproject.caption.api.Caption) IllegalTimeFormatException(org.opencastproject.caption.api.IllegalTimeFormatException) IOException(java.io.IOException) CaptionConverterException(org.opencastproject.caption.api.CaptionConverterException) JSONObject(org.json.simple.JSONObject) CaptionImpl(org.opencastproject.caption.impl.CaptionImpl) JSONParser(org.json.simple.parser.JSONParser) JSONObject(org.json.simple.JSONObject)

Example 2 with CaptionConverterException

use of org.opencastproject.caption.api.CaptionConverterException in project opencast by opencast.

the class SubRipCaptionConverter method importCaption.

/**
 * {@inheritDoc} Since srt does not store information about language, language parameter is ignored.
 *
 * @see org.opencastproject.caption.api.CaptionConverter#importCaption(java.io.InputStream, java.lang.String)
 */
@Override
public List<Caption> importCaption(InputStream in, String language) throws CaptionConverterException {
    List<Caption> collection = new ArrayList<Caption>();
    // initialize scanner object
    Scanner scanner = new Scanner(in, "UTF-8");
    scanner.useDelimiter("[\n(\r\n)]{2}");
    // create initial time
    Time time = null;
    try {
        time = new TimeImpl(0, 0, 0, 0);
    } catch (IllegalTimeFormatException e1) {
    }
    while (scanner.hasNext()) {
        String captionString = scanner.next();
        // convert line endings to \n
        captionString = captionString.replace("\r\n", "\n");
        // split to number, time and caption
        String[] captionParts = captionString.split("\n", 3);
        // check for table length
        if (captionParts.length != 3) {
            throw new CaptionConverterException("Invalid caption for SubRip format: " + captionString);
        }
        // get time part
        String[] timePart = captionParts[1].split("-->");
        // parse time
        Time inTime;
        Time outTime;
        try {
            inTime = TimeUtil.importSrt(timePart[0].trim());
            outTime = TimeUtil.importSrt(timePart[1].trim());
        } catch (IllegalTimeFormatException e) {
            throw new CaptionConverterException(e.getMessage());
        }
        // check for time validity
        if (inTime.compareTo(time) < 0 || outTime.compareTo(inTime) <= 0) {
            logger.warn("Caption with invalid time encountered. Skipping...");
            continue;
        }
        time = outTime;
        // get text captions
        String[] captionLines = createCaptionLines(captionParts[2]);
        if (captionLines == null) {
            throw new CaptionConverterException("Caption does not contain any caption text: " + captionString);
        }
        // create caption object and add to caption collection
        Caption caption = new CaptionImpl(inTime, outTime, captionLines);
        collection.add(caption);
    }
    return collection;
}
Also used : Scanner(java.util.Scanner) CaptionConverterException(org.opencastproject.caption.api.CaptionConverterException) IllegalTimeFormatException(org.opencastproject.caption.api.IllegalTimeFormatException) CaptionImpl(org.opencastproject.caption.impl.CaptionImpl) ArrayList(java.util.ArrayList) Time(org.opencastproject.caption.api.Time) Caption(org.opencastproject.caption.api.Caption) TimeImpl(org.opencastproject.caption.impl.TimeImpl)

Example 3 with CaptionConverterException

use of org.opencastproject.caption.api.CaptionConverterException in project opencast by opencast.

the class CaptionServiceImpl method getLanguageList.

/**
 * {@inheritDoc}
 */
@Override
public String[] getLanguageList(MediaPackageElement input, String format) throws UnsupportedCaptionFormatException, CaptionConverterException {
    if (format == null) {
        throw new UnsupportedCaptionFormatException("<null>");
    }
    CaptionConverter converter = getCaptionConverter(format);
    if (converter == null) {
        throw new UnsupportedCaptionFormatException(format);
    }
    File captions;
    try {
        captions = workspace.get(input.getURI());
    } catch (NotFoundException e) {
        throw new CaptionConverterException("Requested media package element " + input + " could not be found.");
    } catch (IOException e) {
        throw new CaptionConverterException("Requested media package element " + input + "could not be accessed.");
    }
    FileInputStream stream = null;
    String[] languageList;
    try {
        stream = new FileInputStream(captions);
        languageList = converter.getLanguageList(stream);
    } catch (FileNotFoundException e) {
        throw new CaptionConverterException("Requested file " + captions + "could not be found.");
    } finally {
        IoSupport.closeQuietly(stream);
    }
    return languageList == null ? new String[0] : languageList;
}
Also used : CaptionConverterException(org.opencastproject.caption.api.CaptionConverterException) UnsupportedCaptionFormatException(org.opencastproject.caption.api.UnsupportedCaptionFormatException) FileNotFoundException(java.io.FileNotFoundException) NotFoundException(org.opencastproject.util.NotFoundException) FileNotFoundException(java.io.FileNotFoundException) IOException(java.io.IOException) CaptionConverter(org.opencastproject.caption.api.CaptionConverter) File(java.io.File) FileInputStream(java.io.FileInputStream)

Example 4 with CaptionConverterException

use of org.opencastproject.caption.api.CaptionConverterException in project opencast by opencast.

the class CaptionServiceImpl method convert.

/**
 * Converts the captions and returns them in a new catalog.
 *
 * @return the converted catalog
 */
protected MediaPackageElement convert(Job job, MediaPackageElement input, String inputFormat, String outputFormat, String language) throws UnsupportedCaptionFormatException, CaptionConverterException, MediaPackageException {
    try {
        // check parameters
        if (input == null)
            throw new IllegalArgumentException("Input element can't be null");
        if (StringUtils.isBlank(inputFormat))
            throw new IllegalArgumentException("Input format is null");
        if (StringUtils.isBlank(outputFormat))
            throw new IllegalArgumentException("Output format is null");
        // get input file
        File captionsFile;
        try {
            captionsFile = workspace.get(input.getURI());
        } catch (NotFoundException e) {
            throw new CaptionConverterException("Requested media package element " + input + " could not be found.");
        } catch (IOException e) {
            throw new CaptionConverterException("Requested media package element " + input + "could not be accessed.");
        }
        logger.debug("Atempting to convert from {} to {}...", inputFormat, outputFormat);
        List<Caption> collection = null;
        try {
            collection = importCaptions(captionsFile, inputFormat, language);
            logger.debug("Parsing to collection succeeded.");
        } catch (UnsupportedCaptionFormatException e) {
            throw new UnsupportedCaptionFormatException(inputFormat);
        } catch (CaptionConverterException e) {
            throw e;
        }
        URI exported;
        try {
            exported = exportCaptions(collection, job.getId() + "." + FilenameUtils.getExtension(captionsFile.getAbsolutePath()), outputFormat, language);
            logger.debug("Exporting captions succeeding.");
        } catch (UnsupportedCaptionFormatException e) {
            throw new UnsupportedCaptionFormatException(outputFormat);
        } catch (IOException e) {
            throw new CaptionConverterException("Could not export caption collection.", e);
        }
        // create catalog and set properties
        CaptionConverter converter = getCaptionConverter(outputFormat);
        MediaPackageElementBuilder elementBuilder = MediaPackageElementBuilderFactory.newInstance().newElementBuilder();
        MediaPackageElement mpe = elementBuilder.elementFromURI(exported, converter.getElementType(), new MediaPackageElementFlavor("captions", outputFormat + (language == null ? "" : "+" + language)));
        if (mpe.getMimeType() == null) {
            String[] mimetype = FileTypeMap.getDefaultFileTypeMap().getContentType(exported.getPath()).split("/");
            mpe.setMimeType(mimeType(mimetype[0], mimetype[1]));
        }
        if (language != null)
            mpe.addTag("lang:" + language);
        return mpe;
    } catch (Exception e) {
        logger.warn("Error converting captions in " + input, e);
        if (e instanceof CaptionConverterException) {
            throw (CaptionConverterException) e;
        } else if (e instanceof UnsupportedCaptionFormatException) {
            throw (UnsupportedCaptionFormatException) e;
        } else {
            throw new CaptionConverterException(e);
        }
    }
}
Also used : CaptionConverterException(org.opencastproject.caption.api.CaptionConverterException) UnsupportedCaptionFormatException(org.opencastproject.caption.api.UnsupportedCaptionFormatException) NotFoundException(org.opencastproject.util.NotFoundException) FileNotFoundException(java.io.FileNotFoundException) IOException(java.io.IOException) URI(java.net.URI) MediaPackageElementFlavor(org.opencastproject.mediapackage.MediaPackageElementFlavor) Caption(org.opencastproject.caption.api.Caption) CaptionConverterException(org.opencastproject.caption.api.CaptionConverterException) ConfigurationException(org.osgi.service.cm.ConfigurationException) ServiceRegistryException(org.opencastproject.serviceregistry.api.ServiceRegistryException) InvalidSyntaxException(org.osgi.framework.InvalidSyntaxException) NotFoundException(org.opencastproject.util.NotFoundException) IOException(java.io.IOException) MediaPackageException(org.opencastproject.mediapackage.MediaPackageException) FileNotFoundException(java.io.FileNotFoundException) UnsupportedCaptionFormatException(org.opencastproject.caption.api.UnsupportedCaptionFormatException) MediaPackageElementBuilder(org.opencastproject.mediapackage.MediaPackageElementBuilder) MediaPackageElement(org.opencastproject.mediapackage.MediaPackageElement) CaptionConverter(org.opencastproject.caption.api.CaptionConverter) File(java.io.File)

Example 5 with CaptionConverterException

use of org.opencastproject.caption.api.CaptionConverterException in project opencast by opencast.

the class CaptionServiceRemoteImpl method convert.

/**
 * @see org.opencastproject.caption.api.CaptionService#convert(MediaPackageElement, String, String, String)
 */
@Override
public Job convert(MediaPackageElement input, String inputFormat, String outputFormat, String language) throws UnsupportedCaptionFormatException, CaptionConverterException, MediaPackageException {
    HttpPost post = new HttpPost("/convert");
    try {
        List<BasicNameValuePair> params = new ArrayList<BasicNameValuePair>();
        params.add(new BasicNameValuePair("captions", MediaPackageElementParser.getAsXml(input)));
        params.add(new BasicNameValuePair("input", inputFormat));
        params.add(new BasicNameValuePair("output", outputFormat));
        if (StringUtils.isNotBlank(language))
            params.add(new BasicNameValuePair("language", language));
        post.setEntity(new UrlEncodedFormEntity(params));
    } catch (Exception e) {
        throw new CaptionConverterException(e);
    }
    HttpResponse response = null;
    try {
        response = getResponse(post);
        if (response != null) {
            String content = EntityUtils.toString(response.getEntity());
            Job r = JobParser.parseJob(content);
            logger.info("Converting job {} started on a remote caption service", r.getId());
            return r;
        }
    } catch (Exception e) {
        throw new CaptionConverterException("Unable to convert catalog " + input + " using a remote caption service", e);
    } finally {
        closeConnection(response);
    }
    throw new CaptionConverterException("Unable to convert catalog " + input + " using a remote caption service");
}
Also used : HttpPost(org.apache.http.client.methods.HttpPost) CaptionConverterException(org.opencastproject.caption.api.CaptionConverterException) BasicNameValuePair(org.apache.http.message.BasicNameValuePair) ArrayList(java.util.ArrayList) HttpResponse(org.apache.http.HttpResponse) UrlEncodedFormEntity(org.apache.http.client.entity.UrlEncodedFormEntity) Job(org.opencastproject.job.api.Job) CaptionConverterException(org.opencastproject.caption.api.CaptionConverterException) MediaPackageException(org.opencastproject.mediapackage.MediaPackageException) UnsupportedCaptionFormatException(org.opencastproject.caption.api.UnsupportedCaptionFormatException)

Aggregations

CaptionConverterException (org.opencastproject.caption.api.CaptionConverterException)9 IOException (java.io.IOException)5 ArrayList (java.util.ArrayList)5 Caption (org.opencastproject.caption.api.Caption)5 UnsupportedCaptionFormatException (org.opencastproject.caption.api.UnsupportedCaptionFormatException)5 FileNotFoundException (java.io.FileNotFoundException)3 CaptionConverter (org.opencastproject.caption.api.CaptionConverter)3 IllegalTimeFormatException (org.opencastproject.caption.api.IllegalTimeFormatException)3 MediaPackageException (org.opencastproject.mediapackage.MediaPackageException)3 File (java.io.File)2 FileInputStream (java.io.FileInputStream)2 ParserConfigurationException (javax.xml.parsers.ParserConfigurationException)2 HttpResponse (org.apache.http.HttpResponse)2 UrlEncodedFormEntity (org.apache.http.client.entity.UrlEncodedFormEntity)2 HttpPost (org.apache.http.client.methods.HttpPost)2 BasicNameValuePair (org.apache.http.message.BasicNameValuePair)2 Time (org.opencastproject.caption.api.Time)2 CaptionImpl (org.opencastproject.caption.impl.CaptionImpl)2 TimeImpl (org.opencastproject.caption.impl.TimeImpl)2 MediaPackageElement (org.opencastproject.mediapackage.MediaPackageElement)2