use of org.alfresco.repo.content.StreamAwareContentReaderProxy in project alfresco-repository by Alfresco.
the class AbstractContentTransformer2 method transform.
/**
* @see org.alfresco.repo.content.transform.ContentTransformer#transform(org.alfresco.service.cmr.repository.ContentReader, org.alfresco.service.cmr.repository.ContentWriter, org.alfresco.service.cmr.repository.TransformationOptions)
*/
public void transform(ContentReader reader, ContentWriter writer, TransformationOptions options) throws ContentIOException {
try {
depth.set(depth.get() + 1);
// begin timing
long before = System.currentTimeMillis();
String sourceMimetype = reader.getMimetype();
String targetMimetype = writer.getMimetype();
// check options map
if (options == null) {
options = new TransformationOptions();
}
try {
if (transformerDebug.isEnabled()) {
((LegacyTransformerDebug) transformerDebug).pushTransform(this, reader.getContentUrl(), sourceMimetype, targetMimetype, reader.getSize(), options);
}
// MNT-16381: check the mimetype of the file supplied by the user
// matches the sourceMimetype of the reader. Intermediate files are
// not checked.
strictMimetypeCheck(reader, options, sourceMimetype);
// Check the transformability
checkTransformable(reader, writer, options);
// Pass on any limits to the reader
setReaderLimits(reader, writer, options);
// Transform
// MNT-12238: CLONE - CLONE - Upload of PPTX causes very high memory usage leading to system instability
// Limiting transformation up to configured amount of milliseconds to avoid very high RAM consumption
// and OOM during transforming problematic documents
TransformationOptionLimits limits = getLimits(reader.getMimetype(), writer.getMimetype(), options);
long timeoutMs = limits.getTimeoutMs();
if (!useTimeoutThread || (null == limits) || (-1 == timeoutMs)) {
transformInternal(reader, writer, options);
} else {
Future<?> submittedTask = null;
StreamAwareContentReaderProxy proxiedReader = new StreamAwareContentReaderProxy(reader);
StreamAwareContentWriterProxy proxiedWriter = new StreamAwareContentWriterProxy(writer);
try {
submittedTask = getExecutorService().submit(new TransformInternalCallable(proxiedReader, proxiedWriter, options));
submittedTask.get(timeoutMs + additionalThreadTimout, TimeUnit.MILLISECONDS);
} catch (TimeoutException e) {
releaseResources(submittedTask, proxiedReader, proxiedWriter);
throw new TimeoutException("Transformation failed due to timeout limit");
} catch (InterruptedException e) {
releaseResources(submittedTask, proxiedReader, proxiedWriter);
throw new InterruptedException("Transformation failed, because the thread of the transformation was interrupted");
} catch (ExecutionException e) {
Throwable cause = e.getCause();
if (cause instanceof TransformInternalCallableException) {
cause = ((TransformInternalCallableException) cause).getCause();
}
throw cause;
}
}
// record time
long after = System.currentTimeMillis();
recordTime(sourceMimetype, targetMimetype, after - before);
} catch (ContentServiceTransientException cste) {
// update the transformer's average time.
if (logger.isDebugEnabled()) {
logger.debug("Transformation has been transiently declined: \n" + " reader: " + reader + "\n" + " writer: " + writer + "\n" + " options: " + options + "\n" + " transformer: " + this);
}
// We rethrow the exception
throw cste;
} catch (UnsupportedTransformationException e) {
// Don't record an error or even the time, as this is normal in compound transformations.
transformerDebug.debug(" Failed", e);
throw e;
} catch (Throwable e) {
// Make sure that this transformation gets set back i.t.o. time taken.
// This will ensure that transformers that compete for the same transformation
// will be prejudiced against transformers that tend to fail
long after = System.currentTimeMillis();
recordError(sourceMimetype, targetMimetype, after - before);
// Ask Tika to detect the document, and report back on if
// the current mime type is plausible
String differentType = getMimetypeService().getMimetypeIfNotMatches(reader.getReader());
// Report the error
if (differentType == null) {
transformerDebug.debug(" Failed", e);
throw new ContentIOException("Content conversion failed: \n" + " reader: " + reader + "\n" + " writer: " + writer + "\n" + " options: " + options.toString(false) + "\n" + " limits: " + getLimits(reader, writer, options), e);
} else {
transformerDebug.debug(" Failed: Mime type was '" + differentType + "'", e);
if (retryTransformOnDifferentMimeType) {
// MNT-11015 fix.
// Set a new reader to refresh the input stream.
reader = reader.getReader();
// set the actual file MIME type detected by Tika for content reader
reader.setMimetype(differentType);
// Get correct transformer according actual file MIME type and try to transform file with
// actual transformer
ContentTransformer transformer = this.registry.getTransformer(differentType, reader.getSize(), targetMimetype, options);
if (null != transformer) {
transformer.transform(reader, writer, options);
} else {
transformerDebug.debug(" Failed", e);
throw new ContentIOException("Content conversion failed: \n" + " reader: " + reader + "\n" + " writer: " + writer + "\n" + " options: " + options.toString(false) + "\n" + " limits: " + getLimits(reader, writer, options) + "\n" + " claimed mime type: " + reader.getMimetype() + "\n" + " detected mime type: " + differentType + "\n" + " transformer not found" + "\n", e);
}
} else {
throw new ContentIOException("Content conversion failed: \n" + " reader: " + reader + "\n" + " writer: " + writer + "\n" + " options: " + options.toString(false) + "\n" + " limits: " + getLimits(reader, writer, options) + "\n" + " claimed mime type: " + reader.getMimetype() + "\n" + " detected mime type: " + differentType, e);
}
}
} finally {
transformerDebug.popTransform();
// check that the reader and writer are both closed
if (reader.isChannelOpen()) {
logger.error("Content reader not closed by transformer: \n" + " reader: " + reader + "\n" + " transformer: " + this);
}
if (writer.isChannelOpen()) {
logger.error("Content writer not closed by transformer: \n" + " writer: " + writer + "\n" + " transformer: " + this);
}
}
// done
if (logger.isDebugEnabled()) {
logger.debug("Completed transformation: \n" + " reader: " + reader + "\n" + " writer: " + writer + "\n" + " options: " + options + "\n" + " transformer: " + this);
}
} finally {
depth.set(depth.get() - 1);
}
}
use of org.alfresco.repo.content.StreamAwareContentReaderProxy in project alfresco-repository by Alfresco.
the class AbstractMappingMetadataExtracter method extractRaw.
/**
* Calls the {@link AbstractMappingMetadataExtracter#extractRaw(ContentReader)} method
* using the given limits.
* <p>
* Currently the only limit supported by {@link MetadataExtracterLimits} is a timeout
* so this method uses {@link AbstractMappingMetadataExtracter#getExecutorService()}
* to execute a {@link FutureTask} with any timeout defined.
* <p>
* If no timeout limit is defined or is unlimited (-1),
* the <code>extractRaw</code> method is called directly.
*
* @param reader the document to extract the values from. This stream provided by
* the reader must be closed if accessed directly.
* @param limits the limits to impose on the extraction
* @return Returns a map of document property values keyed by property name.
* @throws Throwable All exception conditions can be handled.
*/
private Map<String, Serializable> extractRaw(ContentReader reader, MetadataExtracterLimits limits) throws Throwable {
FutureTask<Map<String, Serializable>> task = null;
StreamAwareContentReaderProxy proxiedReader = null;
if (reader.getSize() > limits.getMaxDocumentSizeMB() * MEGABYTE_SIZE) {
throw new LimitExceededException("Max doc size exceeded " + limits.getMaxDocumentSizeMB() + " MB");
}
synchronized (CONCURRENT_EXTRACTIONS_COUNT) {
if (logger.isDebugEnabled()) {
logger.debug("Concurrent extractions : " + CONCURRENT_EXTRACTIONS_COUNT.get());
}
if (CONCURRENT_EXTRACTIONS_COUNT.get() < limits.getMaxConcurrentExtractionsCount()) {
int totalDocCount = CONCURRENT_EXTRACTIONS_COUNT.incrementAndGet();
if (logger.isDebugEnabled()) {
logger.debug("New extraction accepted. Concurrent extractions : " + totalDocCount);
}
} else {
throw new LimitExceededException("Reached concurrent extractions limit - " + limits.getMaxConcurrentExtractionsCount());
}
}
try {
proxiedReader = new StreamAwareContentReaderProxy(reader);
task = new FutureTask<Map<String, Serializable>>(new ExtractRawCallable(proxiedReader));
getExecutorService().execute(task);
return task.get(limits.getTimeoutMs(), TimeUnit.MILLISECONDS);
} catch (TimeoutException e) {
task.cancel(true);
if (null != proxiedReader) {
proxiedReader.release();
}
throw e;
} catch (InterruptedException e) {
// We were asked to stop
task.cancel(true);
return null;
} catch (ExecutionException e) {
// Unwrap our cause and throw that
Throwable cause = e.getCause();
if (cause != null && cause instanceof ExtractRawCallableException) {
cause = ((ExtractRawCallableException) cause).getCause();
}
throw cause;
} finally {
int totalDocCount = CONCURRENT_EXTRACTIONS_COUNT.decrementAndGet();
if (logger.isDebugEnabled()) {
logger.debug("Extraction finalized. Remaining concurrent extraction : " + totalDocCount);
}
}
}
Aggregations