use of org.apache.tika.sax.TaggedContentHandler in project tika by apache.
the class XMLParser method parse.
public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException {
if (metadata.get(Metadata.CONTENT_TYPE) == null) {
metadata.set(Metadata.CONTENT_TYPE, "application/xml");
}
final XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
xhtml.startDocument();
xhtml.startElement("p");
TaggedContentHandler tagged = new TaggedContentHandler(handler);
try {
context.getSAXParser().parse(new CloseShieldInputStream(stream), new OfflineContentHandler(new EmbeddedContentHandler(getContentHandler(tagged, metadata, context))));
} catch (SAXException e) {
tagged.throwIfCauseOf(e);
throw new TikaException("XML parse error", e);
} finally {
xhtml.endElement("p");
xhtml.endDocument();
}
}
use of org.apache.tika.sax.TaggedContentHandler in project tika by apache.
the class CompositeParser method parse.
/**
* Delegates the call to the matching component parser.
* <p>
* Potential {@link RuntimeException}s, {@link IOException}s and
* {@link SAXException}s unrelated to the given input stream and content
* handler are automatically wrapped into {@link TikaException}s to better
* honor the {@link Parser} contract.
*/
public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException {
Parser parser = getParser(metadata, context);
TemporaryResources tmp = new TemporaryResources();
try {
TikaInputStream taggedStream = TikaInputStream.get(stream, tmp);
TaggedContentHandler taggedHandler = handler != null ? new TaggedContentHandler(handler) : null;
if (parser instanceof ParserDecorator) {
metadata.add("X-Parsed-By", ((ParserDecorator) parser).getWrappedParser().getClass().getName());
} else {
metadata.add("X-Parsed-By", parser.getClass().getName());
}
try {
parser.parse(taggedStream, taggedHandler, metadata, context);
} catch (RuntimeException e) {
throw new TikaException("Unexpected RuntimeException from " + parser, e);
} catch (IOException e) {
taggedStream.throwIfCauseOf(e);
throw new TikaException("TIKA-198: Illegal IOException from " + parser, e);
} catch (SAXException e) {
if (taggedHandler != null)
taggedHandler.throwIfCauseOf(e);
throw new TikaException("TIKA-237: Illegal SAXException from " + parser, e);
}
} finally {
tmp.dispose();
}
}
use of org.apache.tika.sax.TaggedContentHandler in project tika by apache.
the class DIFParser method parse.
@Override
public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException {
// TODO Auto-generated method stub
final XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
xhtml.startDocument();
xhtml.startElement("p");
TaggedContentHandler tagged = new TaggedContentHandler(handler);
try {
context.getSAXParser().parse(new CloseShieldInputStream(stream), new OfflineContentHandler(new EmbeddedContentHandler(getContentHandler(tagged, metadata, context))));
} catch (SAXException e) {
tagged.throwIfCauseOf(e);
throw new TikaException("XML parse error", e);
} finally {
xhtml.endElement("p");
xhtml.endDocument();
}
}
use of org.apache.tika.sax.TaggedContentHandler in project tika by apache.
the class AbstractXML2003Parser method parse.
@Override
public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException {
setContentType(metadata);
final XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
xhtml.startDocument();
TaggedContentHandler tagged = new TaggedContentHandler(xhtml);
try {
context.getSAXParser().parse(new CloseShieldInputStream(stream), new OfflineContentHandler(new EmbeddedContentHandler(getContentHandler(tagged, metadata, context))));
} catch (SAXException e) {
tagged.throwIfCauseOf(e);
throw new TikaException("XML parse error", e);
} finally {
xhtml.endDocument();
}
}
Aggregations