Search in sources :

Example 1 with Caption

use of org.opencastproject.caption.api.Caption in project opencast by opencast.

the class IBMWatsonCaptionConverter method importCaption.

@Override
public List<Caption> importCaption(InputStream inputStream, String language) throws CaptionConverterException {
    List<Caption> captionList = new ArrayList<Caption>();
    JSONParser jsonParser = new JSONParser();
    try {
        JSONObject resultsObj = (JSONObject) jsonParser.parse(new InputStreamReader(inputStream));
        String jobId = "Unknown";
        if (resultsObj.get("id") != null)
            jobId = (String) resultsObj.get("id");
        // Log warnings
        if (resultsObj.get("warnings") != null) {
            JSONArray warningsArray = (JSONArray) resultsObj.get("warnings");
            if (warningsArray != null) {
                for (Object w : warningsArray) logger.warn("Warning from Speech-To-Text service: {}" + w);
            }
        }
        JSONArray outerResultsArray = (JSONArray) resultsObj.get("results");
        JSONObject obj = (JSONObject) outerResultsArray.get(0);
        JSONArray resultsArray = (JSONArray) obj.get("results");
        resultsLoop: for (int i = 0; i < resultsArray.size(); i++) {
            JSONObject resultElement = (JSONObject) resultsArray.get(i);
            // Ignore results that are not final
            if (!(Boolean) resultElement.get("final"))
                continue;
            JSONArray alternativesArray = (JSONArray) resultElement.get("alternatives");
            if (alternativesArray != null && alternativesArray.size() > 0) {
                JSONObject alternativeElement = (JSONObject) alternativesArray.get(0);
                String transcript = (String) alternativeElement.get("transcript");
                if (transcript != null) {
                    JSONArray timestampsArray = (JSONArray) alternativeElement.get("timestamps");
                    if (timestampsArray == null || timestampsArray.size() == 0) {
                        logger.warn("Could not build caption object for job {}, result index {}: timestamp data not found", jobId, i);
                        continue;
                    }
                    // Force a maximum line size of LINE_SIZE + one word
                    String[] words = transcript.split("\\s+");
                    StringBuffer line = new StringBuffer();
                    int indexFirst = -1;
                    int indexLast = -1;
                    for (int j = 0; j < words.length; j++) {
                        if (indexFirst == -1)
                            indexFirst = j;
                        line.append(words[j]);
                        line.append(" ");
                        if (line.length() >= LINE_SIZE || j == words.length - 1) {
                            indexLast = j;
                            // Create a caption
                            double start = -1;
                            double end = -1;
                            if (indexLast < timestampsArray.size()) {
                                // Get start time of first element
                                JSONArray wordTsArray = (JSONArray) timestampsArray.get(indexFirst);
                                if (wordTsArray.size() == 3)
                                    start = ((Number) wordTsArray.get(1)).doubleValue();
                                // Get end time of last element
                                wordTsArray = (JSONArray) timestampsArray.get(indexLast);
                                if (wordTsArray.size() == 3)
                                    end = ((Number) wordTsArray.get(2)).doubleValue();
                            }
                            if (start == -1 || end == -1) {
                                logger.warn("Could not build caption object for job {}, result index {}: start/end times not found", jobId, i);
                                continue resultsLoop;
                            }
                            String[] captionLines = new String[1];
                            captionLines[0] = line.toString().replace("%HESITATION", "...");
                            captionList.add(new CaptionImpl(buildTime((long) (start * 1000)), buildTime((long) (end * 1000)), captionLines));
                            indexFirst = -1;
                            indexLast = -1;
                            line.setLength(0);
                        }
                    }
                }
            }
        }
    } catch (Exception e) {
        logger.warn("Error when parsing IBM Watson transcriptions result: {}" + e.getMessage());
        throw new CaptionConverterException(e);
    }
    return captionList;
}
Also used : CaptionConverterException(org.opencastproject.caption.api.CaptionConverterException) InputStreamReader(java.io.InputStreamReader) ArrayList(java.util.ArrayList) JSONArray(org.json.simple.JSONArray) Caption(org.opencastproject.caption.api.Caption) IllegalTimeFormatException(org.opencastproject.caption.api.IllegalTimeFormatException) IOException(java.io.IOException) CaptionConverterException(org.opencastproject.caption.api.CaptionConverterException) JSONObject(org.json.simple.JSONObject) CaptionImpl(org.opencastproject.caption.impl.CaptionImpl) JSONParser(org.json.simple.parser.JSONParser) JSONObject(org.json.simple.JSONObject)

Example 2 with Caption

use of org.opencastproject.caption.api.Caption in project opencast by opencast.

the class SubRipCaptionConverter method importCaption.

/**
 * {@inheritDoc} Since srt does not store information about language, language parameter is ignored.
 *
 * @see org.opencastproject.caption.api.CaptionConverter#importCaption(java.io.InputStream, java.lang.String)
 */
@Override
public List<Caption> importCaption(InputStream in, String language) throws CaptionConverterException {
    List<Caption> collection = new ArrayList<Caption>();
    // initialize scanner object
    Scanner scanner = new Scanner(in, "UTF-8");
    scanner.useDelimiter("[\n(\r\n)]{2}");
    // create initial time
    Time time = null;
    try {
        time = new TimeImpl(0, 0, 0, 0);
    } catch (IllegalTimeFormatException e1) {
    }
    while (scanner.hasNext()) {
        String captionString = scanner.next();
        // convert line endings to \n
        captionString = captionString.replace("\r\n", "\n");
        // split to number, time and caption
        String[] captionParts = captionString.split("\n", 3);
        // check for table length
        if (captionParts.length != 3) {
            throw new CaptionConverterException("Invalid caption for SubRip format: " + captionString);
        }
        // get time part
        String[] timePart = captionParts[1].split("-->");
        // parse time
        Time inTime;
        Time outTime;
        try {
            inTime = TimeUtil.importSrt(timePart[0].trim());
            outTime = TimeUtil.importSrt(timePart[1].trim());
        } catch (IllegalTimeFormatException e) {
            throw new CaptionConverterException(e.getMessage());
        }
        // check for time validity
        if (inTime.compareTo(time) < 0 || outTime.compareTo(inTime) <= 0) {
            logger.warn("Caption with invalid time encountered. Skipping...");
            continue;
        }
        time = outTime;
        // get text captions
        String[] captionLines = createCaptionLines(captionParts[2]);
        if (captionLines == null) {
            throw new CaptionConverterException("Caption does not contain any caption text: " + captionString);
        }
        // create caption object and add to caption collection
        Caption caption = new CaptionImpl(inTime, outTime, captionLines);
        collection.add(caption);
    }
    return collection;
}
Also used : Scanner(java.util.Scanner) CaptionConverterException(org.opencastproject.caption.api.CaptionConverterException) IllegalTimeFormatException(org.opencastproject.caption.api.IllegalTimeFormatException) CaptionImpl(org.opencastproject.caption.impl.CaptionImpl) ArrayList(java.util.ArrayList) Time(org.opencastproject.caption.api.Time) Caption(org.opencastproject.caption.api.Caption) TimeImpl(org.opencastproject.caption.impl.TimeImpl)

Example 3 with Caption

use of org.opencastproject.caption.api.Caption in project opencast by opencast.

the class CaptionServiceImpl method convert.

/**
 * Converts the captions and returns them in a new catalog.
 *
 * @return the converted catalog
 */
protected MediaPackageElement convert(Job job, MediaPackageElement input, String inputFormat, String outputFormat, String language) throws UnsupportedCaptionFormatException, CaptionConverterException, MediaPackageException {
    try {
        // check parameters
        if (input == null)
            throw new IllegalArgumentException("Input element can't be null");
        if (StringUtils.isBlank(inputFormat))
            throw new IllegalArgumentException("Input format is null");
        if (StringUtils.isBlank(outputFormat))
            throw new IllegalArgumentException("Output format is null");
        // get input file
        File captionsFile;
        try {
            captionsFile = workspace.get(input.getURI());
        } catch (NotFoundException e) {
            throw new CaptionConverterException("Requested media package element " + input + " could not be found.");
        } catch (IOException e) {
            throw new CaptionConverterException("Requested media package element " + input + "could not be accessed.");
        }
        logger.debug("Atempting to convert from {} to {}...", inputFormat, outputFormat);
        List<Caption> collection = null;
        try {
            collection = importCaptions(captionsFile, inputFormat, language);
            logger.debug("Parsing to collection succeeded.");
        } catch (UnsupportedCaptionFormatException e) {
            throw new UnsupportedCaptionFormatException(inputFormat);
        } catch (CaptionConverterException e) {
            throw e;
        }
        URI exported;
        try {
            exported = exportCaptions(collection, job.getId() + "." + FilenameUtils.getExtension(captionsFile.getAbsolutePath()), outputFormat, language);
            logger.debug("Exporting captions succeeding.");
        } catch (UnsupportedCaptionFormatException e) {
            throw new UnsupportedCaptionFormatException(outputFormat);
        } catch (IOException e) {
            throw new CaptionConverterException("Could not export caption collection.", e);
        }
        // create catalog and set properties
        CaptionConverter converter = getCaptionConverter(outputFormat);
        MediaPackageElementBuilder elementBuilder = MediaPackageElementBuilderFactory.newInstance().newElementBuilder();
        MediaPackageElement mpe = elementBuilder.elementFromURI(exported, converter.getElementType(), new MediaPackageElementFlavor("captions", outputFormat + (language == null ? "" : "+" + language)));
        if (mpe.getMimeType() == null) {
            String[] mimetype = FileTypeMap.getDefaultFileTypeMap().getContentType(exported.getPath()).split("/");
            mpe.setMimeType(mimeType(mimetype[0], mimetype[1]));
        }
        if (language != null)
            mpe.addTag("lang:" + language);
        return mpe;
    } catch (Exception e) {
        logger.warn("Error converting captions in " + input, e);
        if (e instanceof CaptionConverterException) {
            throw (CaptionConverterException) e;
        } else if (e instanceof UnsupportedCaptionFormatException) {
            throw (UnsupportedCaptionFormatException) e;
        } else {
            throw new CaptionConverterException(e);
        }
    }
}
Also used : CaptionConverterException(org.opencastproject.caption.api.CaptionConverterException) UnsupportedCaptionFormatException(org.opencastproject.caption.api.UnsupportedCaptionFormatException) NotFoundException(org.opencastproject.util.NotFoundException) FileNotFoundException(java.io.FileNotFoundException) IOException(java.io.IOException) URI(java.net.URI) MediaPackageElementFlavor(org.opencastproject.mediapackage.MediaPackageElementFlavor) Caption(org.opencastproject.caption.api.Caption) CaptionConverterException(org.opencastproject.caption.api.CaptionConverterException) ConfigurationException(org.osgi.service.cm.ConfigurationException) ServiceRegistryException(org.opencastproject.serviceregistry.api.ServiceRegistryException) InvalidSyntaxException(org.osgi.framework.InvalidSyntaxException) NotFoundException(org.opencastproject.util.NotFoundException) IOException(java.io.IOException) MediaPackageException(org.opencastproject.mediapackage.MediaPackageException) FileNotFoundException(java.io.FileNotFoundException) UnsupportedCaptionFormatException(org.opencastproject.caption.api.UnsupportedCaptionFormatException) MediaPackageElementBuilder(org.opencastproject.mediapackage.MediaPackageElementBuilder) MediaPackageElement(org.opencastproject.mediapackage.MediaPackageElement) CaptionConverter(org.opencastproject.caption.api.CaptionConverter) File(java.io.File)

Example 4 with Caption

use of org.opencastproject.caption.api.Caption in project opencast by opencast.

the class Mpeg7CaptionConverter method importCaption.

/**
 * @see org.opencastproject.caption.api.CaptionConverter#importCaption(java.io.InputStream, java.lang.String)
 */
@SuppressWarnings("unchecked")
@Override
public List<Caption> importCaption(InputStream inputStream, String language) throws CaptionConverterException {
    List<Caption> captions = new ArrayList<Caption>();
    Mpeg7Catalog catalog = new Mpeg7CatalogImpl(inputStream);
    Iterator<Audio> audioContentIterator = catalog.audioContent();
    if (audioContentIterator == null)
        return captions;
    content: while (audioContentIterator.hasNext()) {
        Audio audioContent = audioContentIterator.next();
        TemporalDecomposition<AudioSegment> audioSegments = (TemporalDecomposition<AudioSegment>) audioContent.getTemporalDecomposition();
        Iterator<AudioSegment> audioSegmentIterator = audioSegments.segments();
        if (audioSegmentIterator == null)
            continue content;
        while (audioSegmentIterator.hasNext()) {
            AudioSegment segment = audioSegmentIterator.next();
            Iterator<TextAnnotation> annotationIterator = segment.textAnnotations();
            if (annotationIterator == null)
                continue content;
            while (annotationIterator.hasNext()) {
                TextAnnotation annotation = annotationIterator.next();
                if (!annotation.getLanguage().equals(language)) {
                    logger.debug("Skipping audio content '{}' because of language mismatch", audioContent.getId());
                    continue content;
                }
                List<String> captionLines = new ArrayList<String>();
                Iterator<FreeTextAnnotation> freeTextAnnotationIterator = annotation.freeTextAnnotations();
                if (freeTextAnnotationIterator == null)
                    continue;
                while (freeTextAnnotationIterator.hasNext()) {
                    FreeTextAnnotation freeTextAnnotation = freeTextAnnotationIterator.next();
                    captionLines.add(freeTextAnnotation.getText());
                }
                MediaTime segmentTime = segment.getMediaTime();
                MediaTimePoint stp = segmentTime.getMediaTimePoint();
                MediaDuration d = segmentTime.getMediaDuration();
                Calendar startCalendar = Calendar.getInstance();
                int millisAtStart = (int) (stp.getTimeInMilliseconds() - (((stp.getHour() * 60 + stp.getMinutes()) * 60 + stp.getSeconds()) * 1000));
                int millisAtEnd = (int) (d.getDurationInMilliseconds() - (((d.getHours() * 60 + d.getMinutes()) * 60 + d.getSeconds()) * 1000));
                startCalendar.set(Calendar.HOUR, stp.getHour());
                startCalendar.set(Calendar.MINUTE, stp.getMinutes());
                startCalendar.set(Calendar.SECOND, stp.getSeconds());
                startCalendar.set(Calendar.MILLISECOND, millisAtStart);
                startCalendar.add(Calendar.HOUR, d.getHours());
                startCalendar.add(Calendar.MINUTE, d.getMinutes());
                startCalendar.add(Calendar.SECOND, d.getSeconds());
                startCalendar.set(Calendar.MILLISECOND, millisAtEnd);
                try {
                    Time startTime = new TimeImpl(stp.getHour(), stp.getMinutes(), stp.getSeconds(), millisAtStart);
                    Time endTime = new TimeImpl(startCalendar.get(Calendar.HOUR), startCalendar.get(Calendar.MINUTE), startCalendar.get(Calendar.SECOND), startCalendar.get(Calendar.MILLISECOND));
                    Caption caption = new CaptionImpl(startTime, endTime, captionLines.toArray(new String[captionLines.size()]));
                    captions.add(caption);
                } catch (IllegalTimeFormatException e) {
                    logger.warn("Error setting caption time: {}", e.getMessage());
                }
            }
        }
    }
    return captions;
}
Also used : IllegalTimeFormatException(org.opencastproject.caption.api.IllegalTimeFormatException) MediaTimePoint(org.opencastproject.metadata.mpeg7.MediaTimePoint) Calendar(java.util.Calendar) ArrayList(java.util.ArrayList) MediaTime(org.opencastproject.metadata.mpeg7.MediaTime) Time(org.opencastproject.caption.api.Time) Caption(org.opencastproject.caption.api.Caption) FreeTextAnnotation(org.opencastproject.metadata.mpeg7.FreeTextAnnotation) TimeImpl(org.opencastproject.caption.impl.TimeImpl) MediaTimeImpl(org.opencastproject.metadata.mpeg7.MediaTimeImpl) Mpeg7Catalog(org.opencastproject.metadata.mpeg7.Mpeg7Catalog) CaptionImpl(org.opencastproject.caption.impl.CaptionImpl) MediaTime(org.opencastproject.metadata.mpeg7.MediaTime) Iterator(java.util.Iterator) MediaDuration(org.opencastproject.metadata.mpeg7.MediaDuration) Mpeg7CatalogImpl(org.opencastproject.metadata.mpeg7.Mpeg7CatalogImpl) ArrayList(java.util.ArrayList) List(java.util.List) TemporalDecomposition(org.opencastproject.metadata.mpeg7.TemporalDecomposition) Audio(org.opencastproject.metadata.mpeg7.Audio) TextAnnotation(org.opencastproject.metadata.mpeg7.TextAnnotation) FreeTextAnnotation(org.opencastproject.metadata.mpeg7.FreeTextAnnotation) AudioSegment(org.opencastproject.metadata.mpeg7.AudioSegment)

Example 5 with Caption

use of org.opencastproject.caption.api.Caption in project opencast by opencast.

the class WebVttCaptionConverter method exportCaption.

/**
 * {@inheritDoc} Language parameter is ignored.
 */
@Override
public void exportCaption(OutputStream outputStream, List<Caption> captions, String language) throws IOException {
    OutputStreamWriter osw = new OutputStreamWriter(outputStream, "UTF-8");
    BufferedWriter bw = new BufferedWriter(osw);
    bw.append("WEBVTT\n\n");
    for (Caption caption : captions) {
        String captionString = String.format("%s --> %s\n%s\n\n", TimeUtil.exportToVtt(caption.getStartTime()), TimeUtil.exportToVtt(caption.getStopTime()), createCaptionText(caption.getCaption()));
        bw.append(captionString);
        logger.trace(captionString);
    }
    bw.flush();
    bw.close();
    osw.close();
}
Also used : OutputStreamWriter(java.io.OutputStreamWriter) Caption(org.opencastproject.caption.api.Caption) BufferedWriter(java.io.BufferedWriter)

Aggregations

Caption (org.opencastproject.caption.api.Caption)13 IOException (java.io.IOException)6 CaptionConverterException (org.opencastproject.caption.api.CaptionConverterException)6 ArrayList (java.util.ArrayList)5 IllegalTimeFormatException (org.opencastproject.caption.api.IllegalTimeFormatException)5 Time (org.opencastproject.caption.api.Time)5 CaptionImpl (org.opencastproject.caption.impl.CaptionImpl)4 TimeImpl (org.opencastproject.caption.impl.TimeImpl)4 OutputStreamWriter (java.io.OutputStreamWriter)3 ParserConfigurationException (javax.xml.parsers.ParserConfigurationException)3 MediaPackageElement (org.opencastproject.mediapackage.MediaPackageElement)3 BufferedWriter (java.io.BufferedWriter)2 FileNotFoundException (java.io.FileNotFoundException)2 Calendar (java.util.Calendar)2 DocumentBuilder (javax.xml.parsers.DocumentBuilder)2 DocumentBuilderFactory (javax.xml.parsers.DocumentBuilderFactory)2 Transformer (javax.xml.transform.Transformer)2 TransformerConfigurationException (javax.xml.transform.TransformerConfigurationException)2 TransformerException (javax.xml.transform.TransformerException)2 DOMSource (javax.xml.transform.dom.DOMSource)2