use of jp.gr.java_conf.dangan.util.lha.LhaFile in project fess-crawler by codelibs.
the class LhaExtractor method getText.
@Override
public ExtractData getText(final InputStream in, final Map<String, String> params) {
if (in == null) {
throw new CrawlerSystemException("The inputstream is null.");
}
final MimeTypeHelper mimeTypeHelper = getMimeTypeHelper();
final ExtractorFactory extractorFactory = getExtractorFactory();
final StringBuilder buf = new StringBuilder(1000);
File tempFile = null;
LhaFile lhaFile = null;
try {
tempFile = File.createTempFile("crawler-", ".lzh");
try (FileOutputStream fos = new FileOutputStream(tempFile)) {
CopyUtil.copy(in, fos);
}
lhaFile = new LhaFile(tempFile);
@SuppressWarnings("unchecked") final Enumeration<LhaHeader> entries = lhaFile.entries();
long contentSize = 0;
while (entries.hasMoreElements()) {
final LhaHeader head = entries.nextElement();
contentSize += head.getOriginalSize();
if (maxContentSize != -1 && contentSize > maxContentSize) {
throw new MaxLengthExceededException("Extracted size is " + contentSize + " > " + maxContentSize);
}
final String filename = head.getPath();
final String mimeType = mimeTypeHelper.getContentType(null, filename);
if (mimeType != null) {
final Extractor extractor = extractorFactory.getExtractor(mimeType);
if (extractor != null) {
InputStream is = null;
try {
is = lhaFile.getInputStream(head);
final Map<String, String> map = new HashMap<>();
map.put(TikaMetadataKeys.RESOURCE_NAME_KEY, filename);
buf.append(extractor.getText(new IgnoreCloseInputStream(is), map).getContent());
buf.append('\n');
} catch (final Exception e) {
if (logger.isDebugEnabled()) {
logger.debug("Exception in an internal extractor.", e);
}
} finally {
CloseableUtil.closeQuietly(is);
}
}
}
}
} catch (final MaxLengthExceededException e) {
throw e;
} catch (final Exception e) {
throw new ExtractException("Could not extract a content.", e);
} finally {
if (lhaFile != null) {
try {
lhaFile.close();
} catch (final IOException e) {
// ignore
}
}
if (tempFile != null && !tempFile.delete()) {
logger.warn("Failed to delete " + tempFile.getAbsolutePath());
}
}
return new ExtractData(buf.toString().trim());
}
Aggregations