use of org.apache.tika.sax.TeeContentHandler in project tika by apache.
the class LanguageDetectingParser method parse.
public void parse(InputStream stream, ContentHandler handler, final Metadata metadata, ParseContext context) throws SAXException, IOException, TikaException {
LanguageHandler langHandler = new LanguageHandler();
ContentHandler tee = new TeeContentHandler(handler, langHandler);
super.parse(stream, tee, metadata, context);
LanguageResult result = langHandler.getLanguage();
if (result.isReasonablyCertain()) {
metadata.set(TikaCoreProperties.LANGUAGE, result.getLanguage());
}
}
use of org.apache.tika.sax.TeeContentHandler in project tika by apache.
the class ForkParser method parse.
public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException {
if (stream == null) {
throw new NullPointerException("null stream");
}
Throwable t;
boolean alive = false;
ForkClient client = acquireClient();
try {
ContentHandler tee = new TeeContentHandler(handler, new MetadataContentHandler(metadata));
t = client.call("parse", stream, tee, metadata, context);
alive = true;
} catch (TikaException te) {
// Problem occurred on our side
alive = true;
throw te;
} catch (IOException e) {
// Problem occurred on the other side
throw new TikaException("Failed to communicate with a forked parser process." + " The process has most likely crashed due to some error" + " like running out of memory. A new process will be" + " started for the next parsing request.", e);
} finally {
releaseClient(client, alive);
}
if (t instanceof IOException) {
throw (IOException) t;
} else if (t instanceof SAXException) {
throw (SAXException) t;
} else if (t instanceof TikaException) {
throw (TikaException) t;
} else if (t != null) {
throw new TikaException("Unexpected error in forked server process", t);
}
}
use of org.apache.tika.sax.TeeContentHandler in project tika by apache.
the class TikaGUI method handleStream.
private void handleStream(InputStream input, Metadata md) throws Exception {
StringWriter htmlBuffer = new StringWriter();
StringWriter textBuffer = new StringWriter();
StringWriter textMainBuffer = new StringWriter();
StringWriter xmlBuffer = new StringWriter();
StringBuilder metadataBuffer = new StringBuilder();
ContentHandler handler = new TeeContentHandler(getHtmlHandler(htmlBuffer), getTextContentHandler(textBuffer), getTextMainContentHandler(textMainBuffer), getXmlContentHandler(xmlBuffer));
context.set(DocumentSelector.class, new ImageDocumentSelector());
input = TikaInputStream.get(new ProgressMonitorInputStream(this, "Parsing stream", input));
if (input.markSupported()) {
int mark = -1;
if (input instanceof TikaInputStream) {
if (((TikaInputStream) input).hasFile()) {
mark = (int) ((TikaInputStream) input).getLength();
}
}
if (mark == -1) {
mark = MAX_MARK;
}
input.mark(mark);
}
parser.parse(input, handler, md, context);
String[] names = md.names();
Arrays.sort(names);
for (String name : names) {
for (String val : md.getValues(name)) {
metadataBuffer.append(name);
metadataBuffer.append(": ");
metadataBuffer.append(val);
metadataBuffer.append("\n");
}
}
String name = md.get(Metadata.RESOURCE_NAME_KEY);
if (name != null && name.length() > 0) {
setTitle("Apache Tika: " + name);
} else {
setTitle("Apache Tika: unnamed document");
}
setText(metadata, metadataBuffer.toString());
setText(xml, xmlBuffer.toString());
setText(text, textBuffer.toString());
setText(textMain, textMainBuffer.toString());
setText(html, htmlBuffer.toString());
if (!input.markSupported()) {
setText(json, "InputStream does not support mark/reset for Recursive Parsing");
layout.show(cards, "metadata");
return;
}
boolean isReset = false;
try {
input.reset();
isReset = true;
} catch (IOException e) {
setText(json, "Error during stream reset.\n" + "There's a limit of " + MAX_MARK + " bytes for this type of processing in the GUI.\n" + "Try the app with command line argument of -J.");
}
if (isReset) {
RecursiveParserWrapper wrapper = new RecursiveParserWrapper(parser, new BasicContentHandlerFactory(BasicContentHandlerFactory.HANDLER_TYPE.BODY, -1));
wrapper.parse(input, null, new Metadata(), new ParseContext());
StringWriter jsonBuffer = new StringWriter();
JsonMetadataList.setPrettyPrinting(true);
JsonMetadataList.toJson(wrapper.getMetadata(), jsonBuffer);
setText(json, jsonBuffer.toString());
}
layout.show(cards, "metadata");
}
Aggregations