Search in sources :

Example 1 with StreamAwareContentReaderProxy

use of org.alfresco.repo.content.StreamAwareContentReaderProxy in project alfresco-repository by Alfresco.

the class AbstractContentTransformer2 method transform.

/**
 * @see org.alfresco.repo.content.transform.ContentTransformer#transform(org.alfresco.service.cmr.repository.ContentReader, org.alfresco.service.cmr.repository.ContentWriter, org.alfresco.service.cmr.repository.TransformationOptions)
 */
public void transform(ContentReader reader, ContentWriter writer, TransformationOptions options) throws ContentIOException {
    try {
        depth.set(depth.get() + 1);
        // begin timing
        long before = System.currentTimeMillis();
        String sourceMimetype = reader.getMimetype();
        String targetMimetype = writer.getMimetype();
        // check options map
        if (options == null) {
            options = new TransformationOptions();
        }
        try {
            if (transformerDebug.isEnabled()) {
                ((LegacyTransformerDebug) transformerDebug).pushTransform(this, reader.getContentUrl(), sourceMimetype, targetMimetype, reader.getSize(), options);
            }
            // MNT-16381: check the mimetype of the file supplied by the user
            // matches the sourceMimetype of the reader. Intermediate files are
            // not checked.
            strictMimetypeCheck(reader, options, sourceMimetype);
            // Check the transformability
            checkTransformable(reader, writer, options);
            // Pass on any limits to the reader
            setReaderLimits(reader, writer, options);
            // Transform
            // MNT-12238: CLONE - CLONE - Upload of PPTX causes very high memory usage leading to system instability
            // Limiting transformation up to configured amount of milliseconds to avoid very high RAM consumption
            // and OOM during transforming problematic documents
            TransformationOptionLimits limits = getLimits(reader.getMimetype(), writer.getMimetype(), options);
            long timeoutMs = limits.getTimeoutMs();
            if (!useTimeoutThread || (null == limits) || (-1 == timeoutMs)) {
                transformInternal(reader, writer, options);
            } else {
                Future<?> submittedTask = null;
                StreamAwareContentReaderProxy proxiedReader = new StreamAwareContentReaderProxy(reader);
                StreamAwareContentWriterProxy proxiedWriter = new StreamAwareContentWriterProxy(writer);
                try {
                    submittedTask = getExecutorService().submit(new TransformInternalCallable(proxiedReader, proxiedWriter, options));
                    submittedTask.get(timeoutMs + additionalThreadTimout, TimeUnit.MILLISECONDS);
                } catch (TimeoutException e) {
                    releaseResources(submittedTask, proxiedReader, proxiedWriter);
                    throw new TimeoutException("Transformation failed due to timeout limit");
                } catch (InterruptedException e) {
                    releaseResources(submittedTask, proxiedReader, proxiedWriter);
                    throw new InterruptedException("Transformation failed, because the thread of the transformation was interrupted");
                } catch (ExecutionException e) {
                    Throwable cause = e.getCause();
                    if (cause instanceof TransformInternalCallableException) {
                        cause = ((TransformInternalCallableException) cause).getCause();
                    }
                    throw cause;
                }
            }
            // record time
            long after = System.currentTimeMillis();
            recordTime(sourceMimetype, targetMimetype, after - before);
        } catch (ContentServiceTransientException cste) {
            // update the transformer's average time.
            if (logger.isDebugEnabled()) {
                logger.debug("Transformation has been transiently declined: \n" + "   reader: " + reader + "\n" + "   writer: " + writer + "\n" + "   options: " + options + "\n" + "   transformer: " + this);
            }
            // We rethrow the exception
            throw cste;
        } catch (UnsupportedTransformationException e) {
            // Don't record an error or even the time, as this is normal in compound transformations.
            transformerDebug.debug("          Failed", e);
            throw e;
        } catch (Throwable e) {
            // Make sure that this transformation gets set back i.t.o. time taken.
            // This will ensure that transformers that compete for the same transformation
            // will be prejudiced against transformers that tend to fail
            long after = System.currentTimeMillis();
            recordError(sourceMimetype, targetMimetype, after - before);
            // Ask Tika to detect the document, and report back on if
            // the current mime type is plausible
            String differentType = getMimetypeService().getMimetypeIfNotMatches(reader.getReader());
            // Report the error
            if (differentType == null) {
                transformerDebug.debug("          Failed", e);
                throw new ContentIOException("Content conversion failed: \n" + "   reader: " + reader + "\n" + "   writer: " + writer + "\n" + "   options: " + options.toString(false) + "\n" + "   limits: " + getLimits(reader, writer, options), e);
            } else {
                transformerDebug.debug("          Failed: Mime type was '" + differentType + "'", e);
                if (retryTransformOnDifferentMimeType) {
                    // MNT-11015 fix.
                    // Set a new reader to refresh the input stream.
                    reader = reader.getReader();
                    // set the actual file MIME type detected by Tika for content reader
                    reader.setMimetype(differentType);
                    // Get correct transformer according actual file MIME type and try to transform file with
                    // actual transformer
                    ContentTransformer transformer = this.registry.getTransformer(differentType, reader.getSize(), targetMimetype, options);
                    if (null != transformer) {
                        transformer.transform(reader, writer, options);
                    } else {
                        transformerDebug.debug("          Failed", e);
                        throw new ContentIOException("Content conversion failed: \n" + "   reader: " + reader + "\n" + "   writer: " + writer + "\n" + "   options: " + options.toString(false) + "\n" + "   limits: " + getLimits(reader, writer, options) + "\n" + "   claimed mime type: " + reader.getMimetype() + "\n" + "   detected mime type: " + differentType + "\n" + "   transformer not found" + "\n", e);
                    }
                } else {
                    throw new ContentIOException("Content conversion failed: \n" + "   reader: " + reader + "\n" + "   writer: " + writer + "\n" + "   options: " + options.toString(false) + "\n" + "   limits: " + getLimits(reader, writer, options) + "\n" + "   claimed mime type: " + reader.getMimetype() + "\n" + "   detected mime type: " + differentType, e);
                }
            }
        } finally {
            transformerDebug.popTransform();
            // check that the reader and writer are both closed
            if (reader.isChannelOpen()) {
                logger.error("Content reader not closed by transformer: \n" + "   reader: " + reader + "\n" + "   transformer: " + this);
            }
            if (writer.isChannelOpen()) {
                logger.error("Content writer not closed by transformer: \n" + "   writer: " + writer + "\n" + "   transformer: " + this);
            }
        }
        // done
        if (logger.isDebugEnabled()) {
            logger.debug("Completed transformation: \n" + "   reader: " + reader + "\n" + "   writer: " + writer + "\n" + "   options: " + options + "\n" + "   transformer: " + this);
        }
    } finally {
        depth.set(depth.get() - 1);
    }
}
Also used : StreamAwareContentWriterProxy(org.alfresco.repo.content.StreamAwareContentWriterProxy) ContentIOException(org.alfresco.service.cmr.repository.ContentIOException) TransformationOptions(org.alfresco.service.cmr.repository.TransformationOptions) StreamAwareContentReaderProxy(org.alfresco.repo.content.StreamAwareContentReaderProxy) TransformationOptionLimits(org.alfresco.service.cmr.repository.TransformationOptionLimits) ContentServiceTransientException(org.alfresco.service.cmr.repository.ContentServiceTransientException) ExecutionException(java.util.concurrent.ExecutionException) TimeoutException(java.util.concurrent.TimeoutException)

Example 2 with StreamAwareContentReaderProxy

use of org.alfresco.repo.content.StreamAwareContentReaderProxy in project alfresco-repository by Alfresco.

the class AbstractMappingMetadataExtracter method extractRaw.

/**
 * Calls the {@link AbstractMappingMetadataExtracter#extractRaw(ContentReader)} method
 * using the given limits.
 * <p>
 * Currently the only limit supported by {@link MetadataExtracterLimits} is a timeout
 * so this method uses {@link AbstractMappingMetadataExtracter#getExecutorService()}
 * to execute a {@link FutureTask} with any timeout defined.
 * <p>
 * If no timeout limit is defined or is unlimited (-1),
 * the <code>extractRaw</code> method is called directly.
 *
 * @param reader        the document to extract the values from.  This stream provided by
 *                      the reader must be closed if accessed directly.
 * @param limits        the limits to impose on the extraction
 * @return              Returns a map of document property values keyed by property name.
 * @throws Throwable    All exception conditions can be handled.
 */
private Map<String, Serializable> extractRaw(ContentReader reader, MetadataExtracterLimits limits) throws Throwable {
    FutureTask<Map<String, Serializable>> task = null;
    StreamAwareContentReaderProxy proxiedReader = null;
    if (reader.getSize() > limits.getMaxDocumentSizeMB() * MEGABYTE_SIZE) {
        throw new LimitExceededException("Max doc size exceeded " + limits.getMaxDocumentSizeMB() + " MB");
    }
    synchronized (CONCURRENT_EXTRACTIONS_COUNT) {
        if (logger.isDebugEnabled()) {
            logger.debug("Concurrent extractions : " + CONCURRENT_EXTRACTIONS_COUNT.get());
        }
        if (CONCURRENT_EXTRACTIONS_COUNT.get() < limits.getMaxConcurrentExtractionsCount()) {
            int totalDocCount = CONCURRENT_EXTRACTIONS_COUNT.incrementAndGet();
            if (logger.isDebugEnabled()) {
                logger.debug("New extraction accepted. Concurrent extractions : " + totalDocCount);
            }
        } else {
            throw new LimitExceededException("Reached concurrent extractions limit - " + limits.getMaxConcurrentExtractionsCount());
        }
    }
    try {
        proxiedReader = new StreamAwareContentReaderProxy(reader);
        task = new FutureTask<Map<String, Serializable>>(new ExtractRawCallable(proxiedReader));
        getExecutorService().execute(task);
        return task.get(limits.getTimeoutMs(), TimeUnit.MILLISECONDS);
    } catch (TimeoutException e) {
        task.cancel(true);
        if (null != proxiedReader) {
            proxiedReader.release();
        }
        throw e;
    } catch (InterruptedException e) {
        // We were asked to stop
        task.cancel(true);
        return null;
    } catch (ExecutionException e) {
        // Unwrap our cause and throw that
        Throwable cause = e.getCause();
        if (cause != null && cause instanceof ExtractRawCallableException) {
            cause = ((ExtractRawCallableException) cause).getCause();
        }
        throw cause;
    } finally {
        int totalDocCount = CONCURRENT_EXTRACTIONS_COUNT.decrementAndGet();
        if (logger.isDebugEnabled()) {
            logger.debug("Extraction finalized. Remaining concurrent extraction : " + totalDocCount);
        }
    }
}
Also used : StreamAwareContentReaderProxy(org.alfresco.repo.content.StreamAwareContentReaderProxy) ExecutionException(java.util.concurrent.ExecutionException) Map(java.util.Map) HashMap(java.util.HashMap) TimeoutException(java.util.concurrent.TimeoutException)

Aggregations

ExecutionException (java.util.concurrent.ExecutionException)2 TimeoutException (java.util.concurrent.TimeoutException)2 StreamAwareContentReaderProxy (org.alfresco.repo.content.StreamAwareContentReaderProxy)2 HashMap (java.util.HashMap)1 Map (java.util.Map)1 StreamAwareContentWriterProxy (org.alfresco.repo.content.StreamAwareContentWriterProxy)1 ContentIOException (org.alfresco.service.cmr.repository.ContentIOException)1 ContentServiceTransientException (org.alfresco.service.cmr.repository.ContentServiceTransientException)1 TransformationOptionLimits (org.alfresco.service.cmr.repository.TransformationOptionLimits)1 TransformationOptions (org.alfresco.service.cmr.repository.TransformationOptions)1