use of org.apache.tika.io.TikaInputStream in project tika by apache.
the class GeographicInformationParser method parse.
@Override
public void parse(InputStream inputStream, ContentHandler contentHandler, Metadata metadata, ParseContext parseContext) throws IOException, SAXException, TikaException {
metadata.set(Metadata.CONTENT_TYPE, geoInfoType);
DataStore dataStore = null;
DefaultMetadata defaultMetadata = null;
XHTMLContentHandler xhtmlContentHandler = new XHTMLContentHandler(contentHandler, metadata);
TemporaryResources tmp = TikaInputStream.isTikaInputStream(inputStream) ? null : new TemporaryResources();
try {
TikaInputStream tikaInputStream = TikaInputStream.get(inputStream, tmp);
File file = tikaInputStream.getFile();
dataStore = DataStores.open(file);
defaultMetadata = new DefaultMetadata(dataStore.getMetadata());
if (defaultMetadata != null)
extract(xhtmlContentHandler, metadata, defaultMetadata);
} catch (UnsupportedStorageException e) {
throw new TikaException("UnsupportedStorageException", e);
} catch (DataStoreException e) {
throw new TikaException("DataStoreException", e);
} finally {
if (tmp != null) {
tmp.dispose();
}
}
}
use of org.apache.tika.io.TikaInputStream in project tika by apache.
the class GribParser method parse.
public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException {
//Set MIME type as grib2
metadata.set(Metadata.CONTENT_TYPE, GRIB_MIME_TYPE);
TikaInputStream tis = TikaInputStream.get(stream, new TemporaryResources());
File gribFile = tis.getFile();
try {
NetcdfFile ncFile = NetcdfDataset.openFile(gribFile.getAbsolutePath(), null);
// first parse out the set of global attributes
for (Attribute attr : ncFile.getGlobalAttributes()) {
Property property = resolveMetadataKey(attr.getFullName());
if (attr.getDataType().isString()) {
metadata.add(property, attr.getStringValue());
} else if (attr.getDataType().isNumeric()) {
int value = attr.getNumericValue().intValue();
metadata.add(property, String.valueOf(value));
}
}
XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
xhtml.startDocument();
xhtml.newline();
xhtml.startElement("ul");
xhtml.characters("dimensions:");
xhtml.newline();
for (Dimension dim : ncFile.getDimensions()) {
xhtml.element("li", dim.getFullName() + "=" + String.valueOf(dim.getLength()) + ";");
xhtml.newline();
}
xhtml.startElement("ul");
xhtml.characters("variables:");
xhtml.newline();
for (Variable var : ncFile.getVariables()) {
xhtml.element("p", String.valueOf(var.getDataType()) + var.getNameAndDimensions() + ";");
for (Attribute element : var.getAttributes()) {
xhtml.element("li", " :" + element + ";");
xhtml.newline();
}
}
xhtml.endElement("ul");
xhtml.endElement("ul");
xhtml.endDocument();
} catch (IOException e) {
throw new TikaException("NetCDF parse error", e);
}
}
use of org.apache.tika.io.TikaInputStream in project tika by apache.
the class GDALParser method processCommand.
public String processCommand(InputStream stream) {
TikaInputStream tis = (TikaInputStream) stream;
String pCommand = this.command;
try {
if (this.command.contains(INPUT_FILE_TOKEN)) {
pCommand = this.command.replace(INPUT_FILE_TOKEN, tis.getFile().getPath());
}
} catch (Exception e) {
LOG.warn("exception processing command", e);
}
return pCommand;
}
use of org.apache.tika.io.TikaInputStream in project tika by apache.
the class GDALParser method parse.
@Override
public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException {
if (!ExternalParser.check("gdalinfo")) {
return;
}
// first set up and run GDAL
// process the command
TemporaryResources tmp = new TemporaryResources();
TikaInputStream tis = TikaInputStream.get(stream, tmp);
String runCommand = processCommand(tis);
String output = execCommand(new String[] { runCommand });
// now extract the actual metadata params
// from the GDAL output in the content stream
// to do this, we need to literally process the output
// from the invoked command b/c we can't read metadata and
// output text from the handler in ExternalParser
// at the same time, so for now, we can't use the
// ExternalParser to do this and I've had to bring some of
// that functionality directly into this class
// TODO: investigate a way to do both using ExternalParser
extractMetFromOutput(output, metadata);
applyPatternsToOutput(output, metadata, getPatterns());
// make the content handler and provide output there
// now that we have metadata
processOutput(handler, metadata, output);
}
use of org.apache.tika.io.TikaInputStream in project tika by apache.
the class TiffParser method parse.
public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException {
TemporaryResources tmp = new TemporaryResources();
try {
TikaInputStream tis = TikaInputStream.get(stream, tmp);
new ImageMetadataExtractor(metadata).parseTiff(tis.getFile());
new JempboxExtractor(metadata).parse(tis);
} finally {
tmp.dispose();
}
XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
xhtml.startDocument();
xhtml.endDocument();
}
Aggregations