use of org.apache.tika.sax.XHTMLContentHandler in project tika by apache.
the class AbstractDBParser method parse.
@Override
public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException {
connection = getConnection(stream, metadata, context);
XHTMLContentHandler xHandler = null;
List<String> tableNames = null;
try {
tableNames = getTableNames(connection, metadata, context);
} catch (SQLException e) {
try {
close();
} catch (SQLException sqlE) {
//swallow
}
throw new IOExceptionWithCause(e);
}
for (String tableName : tableNames) {
//add table names to parent metadata
metadata.add(Database.TABLE_NAME, tableName);
}
xHandler = new XHTMLContentHandler(handler, metadata);
xHandler.startDocument();
try {
for (String tableName : tableNames) {
JDBCTableReader tableReader = getTableReader(connection, tableName, context);
xHandler.startElement("table", "name", tableReader.getTableName());
xHandler.startElement("thead");
xHandler.startElement("tr");
for (String header : tableReader.getHeaders()) {
xHandler.startElement("th");
xHandler.characters(header);
xHandler.endElement("th");
}
xHandler.endElement("tr");
xHandler.endElement("thead");
xHandler.startElement("tbody");
while (tableReader.nextRow(xHandler, context)) {
//no-op
}
xHandler.endElement("tbody");
xHandler.endElement("table");
}
} finally {
try {
close();
} catch (IOException | SQLException e) {
//swallow
}
if (xHandler != null) {
xHandler.endDocument();
}
}
}
use of org.apache.tika.sax.XHTMLContentHandler in project tika by apache.
the class JpegParser method parse.
public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException {
TemporaryResources tmp = new TemporaryResources();
try {
TikaInputStream tis = TikaInputStream.get(stream, tmp);
new ImageMetadataExtractor(metadata).parseJpeg(tis.getFile());
new JempboxExtractor(metadata).parse(tis);
} finally {
tmp.dispose();
}
XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
xhtml.startDocument();
xhtml.endDocument();
}
Aggregations