use of org.apache.tika.io.TemporaryResources in project tika by apache.
the class ParserDecorator method withFallbacks.
/**
* Decorates the given parsers into a virtual parser, where they'll
* be tried in preference order until one works without error.
* TODO Is this the right name?
* TODO Is this the right place to put this? Should it be in CompositeParser? Elsewhere?
* TODO Should we reset the Metadata if we try another parser?
* TODO Should we reset the ContentHandler if we try another parser?
* TODO Should we log/report failures anywhere?
* @deprecated Do not use until the TODOs are resolved, see TIKA-1509
*/
public static final Parser withFallbacks(final Collection<? extends Parser> parsers, final Set<MediaType> types) {
Parser parser = EmptyParser.INSTANCE;
if (!parsers.isEmpty())
parser = parsers.iterator().next();
return new ParserDecorator(parser) {
private static final long serialVersionUID = 1625187131782069683L;
@Override
public Set<MediaType> getSupportedTypes(ParseContext context) {
return types;
}
@Override
public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException {
// Must have a TikaInputStream, so we can re-use it if parsing fails
// Need to close internally created tstream to release resources
TemporaryResources tmp = (TikaInputStream.isTikaInputStream(stream)) ? null : new TemporaryResources();
try {
TikaInputStream tstream = TikaInputStream.get(stream, tmp);
tstream.getFile();
// Try each parser in turn
for (Parser p : parsers) {
tstream.mark(-1);
try {
p.parse(tstream, handler, metadata, context);
return;
} catch (Exception e) {
// TODO How to log / record this failure?
}
// Prepare for the next parser, if present
tstream.reset();
}
} finally {
if (tmp != null) {
tmp.dispose();
}
}
}
@Override
public String getDecorationName() {
return "With Fallback";
}
};
}
use of org.apache.tika.io.TemporaryResources in project tika by apache.
the class CompositeParser method parse.
/**
* Delegates the call to the matching component parser.
* <p>
* Potential {@link RuntimeException}s, {@link IOException}s and
* {@link SAXException}s unrelated to the given input stream and content
* handler are automatically wrapped into {@link TikaException}s to better
* honor the {@link Parser} contract.
*/
public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException {
Parser parser = getParser(metadata, context);
TemporaryResources tmp = new TemporaryResources();
try {
TikaInputStream taggedStream = TikaInputStream.get(stream, tmp);
TaggedContentHandler taggedHandler = handler != null ? new TaggedContentHandler(handler) : null;
if (parser instanceof ParserDecorator) {
metadata.add("X-Parsed-By", ((ParserDecorator) parser).getWrappedParser().getClass().getName());
} else {
metadata.add("X-Parsed-By", parser.getClass().getName());
}
try {
parser.parse(taggedStream, taggedHandler, metadata, context);
} catch (RuntimeException e) {
throw new TikaException("Unexpected RuntimeException from " + parser, e);
} catch (IOException e) {
taggedStream.throwIfCauseOf(e);
throw new TikaException("TIKA-198: Illegal IOException from " + parser, e);
} catch (SAXException e) {
if (taggedHandler != null)
taggedHandler.throwIfCauseOf(e);
throw new TikaException("TIKA-237: Illegal SAXException from " + parser, e);
}
} finally {
tmp.dispose();
}
}
use of org.apache.tika.io.TemporaryResources in project tika by apache.
the class NetworkParser method parse.
public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException {
TemporaryResources tmp = new TemporaryResources();
try {
TikaInputStream tis = TikaInputStream.get(stream, tmp);
parse(tis, handler, metadata, context);
} finally {
tmp.dispose();
}
}
use of org.apache.tika.io.TemporaryResources in project tika by apache.
the class GeographicInformationParser method parse.
@Override
public void parse(InputStream inputStream, ContentHandler contentHandler, Metadata metadata, ParseContext parseContext) throws IOException, SAXException, TikaException {
metadata.set(Metadata.CONTENT_TYPE, geoInfoType);
DataStore dataStore = null;
DefaultMetadata defaultMetadata = null;
XHTMLContentHandler xhtmlContentHandler = new XHTMLContentHandler(contentHandler, metadata);
TemporaryResources tmp = TikaInputStream.isTikaInputStream(inputStream) ? null : new TemporaryResources();
try {
TikaInputStream tikaInputStream = TikaInputStream.get(inputStream, tmp);
File file = tikaInputStream.getFile();
dataStore = DataStores.open(file);
defaultMetadata = new DefaultMetadata(dataStore.getMetadata());
if (defaultMetadata != null)
extract(xhtmlContentHandler, metadata, defaultMetadata);
} catch (UnsupportedStorageException e) {
throw new TikaException("UnsupportedStorageException", e);
} catch (DataStoreException e) {
throw new TikaException("DataStoreException", e);
} finally {
if (tmp != null) {
tmp.dispose();
}
}
}
use of org.apache.tika.io.TemporaryResources in project tika by apache.
the class GribParser method parse.
public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException {
//Set MIME type as grib2
metadata.set(Metadata.CONTENT_TYPE, GRIB_MIME_TYPE);
TikaInputStream tis = TikaInputStream.get(stream, new TemporaryResources());
File gribFile = tis.getFile();
try {
NetcdfFile ncFile = NetcdfDataset.openFile(gribFile.getAbsolutePath(), null);
// first parse out the set of global attributes
for (Attribute attr : ncFile.getGlobalAttributes()) {
Property property = resolveMetadataKey(attr.getFullName());
if (attr.getDataType().isString()) {
metadata.add(property, attr.getStringValue());
} else if (attr.getDataType().isNumeric()) {
int value = attr.getNumericValue().intValue();
metadata.add(property, String.valueOf(value));
}
}
XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
xhtml.startDocument();
xhtml.newline();
xhtml.startElement("ul");
xhtml.characters("dimensions:");
xhtml.newline();
for (Dimension dim : ncFile.getDimensions()) {
xhtml.element("li", dim.getFullName() + "=" + String.valueOf(dim.getLength()) + ";");
xhtml.newline();
}
xhtml.startElement("ul");
xhtml.characters("variables:");
xhtml.newline();
for (Variable var : ncFile.getVariables()) {
xhtml.element("p", String.valueOf(var.getDataType()) + var.getNameAndDimensions() + ";");
for (Attribute element : var.getAttributes()) {
xhtml.element("li", " :" + element + ";");
xhtml.newline();
}
}
xhtml.endElement("ul");
xhtml.endElement("ul");
xhtml.endDocument();
} catch (IOException e) {
throw new TikaException("NetCDF parse error", e);
}
}
Aggregations