use of org.apache.tika.io.IOExceptionWithCause in project tika by apache.
the class BatchProcessBuilder method build.
/**
* Builds a BatchProcess from runtime arguments and a
* input stream of a configuration file. With the exception of the QueueBuilder,
* the builders choose how to adjudicate between
* runtime arguments and the elements in the configuration file.
* <p/>
* This does not close the InputStream!
* @param is inputStream
* @param runtimeAttributes incoming runtime attributes
* @return batch process
* @throws java.io.IOException
*/
public BatchProcess build(InputStream is, Map<String, String> runtimeAttributes) throws IOException {
Document doc = null;
try {
DocumentBuilder docBuilder = new ParseContext().getDocumentBuilder();
doc = docBuilder.parse(is);
} catch (TikaException | SAXException e) {
throw new IOExceptionWithCause(e);
}
Node docElement = doc.getDocumentElement();
return build(docElement, runtimeAttributes);
}
use of org.apache.tika.io.IOExceptionWithCause in project tika by apache.
the class CommandLineParserBuilder method build.
public Options build(InputStream is) throws IOException {
Document doc = null;
try {
DocumentBuilder docBuilder = new ParseContext().getDocumentBuilder();
doc = docBuilder.parse(is);
} catch (TikaException | SAXException e) {
throw new IOExceptionWithCause(e);
}
Node docElement = doc.getDocumentElement();
NodeList children = docElement.getChildNodes();
Node commandlineNode = null;
for (int i = 0; i < children.getLength(); i++) {
Node child = children.item(i);
if (child.getNodeType() != Node.ELEMENT_NODE) {
continue;
}
String nodeName = child.getNodeName();
if (nodeName.equals("commandline")) {
commandlineNode = child;
break;
}
}
Options options = new Options();
if (commandlineNode == null) {
return options;
}
NodeList optionNodes = commandlineNode.getChildNodes();
for (int i = 0; i < optionNodes.getLength(); i++) {
Node optionNode = optionNodes.item(i);
if (optionNode.getNodeType() != Node.ELEMENT_NODE) {
continue;
}
Option opt = buildOption(optionNode);
if (opt != null) {
options.addOption(opt);
}
}
return options;
}
use of org.apache.tika.io.IOExceptionWithCause in project tika by apache.
the class MockParser method parse.
@Override
public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException {
Document doc = null;
try {
DocumentBuilder docBuilder = context.getDocumentBuilder();
doc = docBuilder.parse(stream);
} catch (SAXException e) {
//to distinguish between SAX on read vs SAX while writing
throw new IOExceptionWithCause(e);
}
Node root = doc.getDocumentElement();
NodeList actions = root.getChildNodes();
XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
xhtml.startDocument();
for (int i = 0; i < actions.getLength(); i++) {
executeAction(actions.item(i), metadata, context, xhtml);
}
xhtml.endDocument();
}
use of org.apache.tika.io.IOExceptionWithCause in project tika by apache.
the class CommonsDigester method digest.
@Override
public void digest(InputStream is, Metadata m, ParseContext parseContext) throws IOException {
TikaInputStream tis = TikaInputStream.cast(is);
if (tis != null && tis.hasFile()) {
long sz = -1;
if (tis.hasFile()) {
sz = tis.getLength();
}
//just digest the underlying file.
if (sz > markLimit) {
digestFile(tis.getFile(), m);
return;
}
}
//try the usual mark/reset stuff.
//however, if you actually hit the bound,
//then stop and spool to file via TikaInputStream
SimpleBoundedInputStream bis = new SimpleBoundedInputStream(markLimit, is);
boolean finishedStream = false;
for (DigestAlgorithm algorithm : algorithms) {
bis.mark(markLimit + 1);
finishedStream = digestEach(algorithm, bis, m);
bis.reset();
if (!finishedStream) {
break;
}
}
//spool to File and digest that.
if (!finishedStream) {
if (tis != null) {
digestFile(tis.getFile(), m);
} else {
TemporaryResources tmp = new TemporaryResources();
try {
TikaInputStream tmpTikaInputStream = TikaInputStream.get(is, tmp);
digestFile(tmpTikaInputStream.getFile(), m);
} finally {
try {
tmp.dispose();
} catch (TikaException e) {
throw new IOExceptionWithCause(e);
}
}
}
}
}
Aggregations