use of org.apache.poi.hwmf.usermodel.HwmfPicture in project tika by apache.
the class WMFParser method parse.
@Override
public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException {
XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
xhtml.startDocument();
try {
HwmfPicture picture = new HwmfPicture(stream);
//to determine when to keep two text parts on the same line
for (HwmfRecord record : picture.getRecords()) {
Charset charset = LocaleUtil.CHARSET_1252;
//This fix should be done within POI
if (record.getRecordType().equals(HwmfRecordType.createFontIndirect)) {
HwmfFont font = ((HwmfText.WmfCreateFontIndirect) record).getFont();
charset = (font.getCharSet() == null || font.getCharSet().getCharset() == null) ? LocaleUtil.CHARSET_1252 : font.getCharSet().getCharset();
}
if (record.getRecordType().equals(HwmfRecordType.extTextOut)) {
HwmfText.WmfExtTextOut textOut = (HwmfText.WmfExtTextOut) record;
xhtml.startElement("p");
xhtml.characters(textOut.getText(charset));
xhtml.endElement("p");
} else if (record.getRecordType().equals(HwmfRecordType.textOut)) {
HwmfText.WmfTextOut textOut = (HwmfText.WmfTextOut) record;
xhtml.startElement("p");
xhtml.characters(textOut.getText(charset));
xhtml.endElement("p");
}
}
} catch (RecordFormatException e) {
//POI's hwmfparser can throw these for "parse exceptions"
throw new TikaException(e.getMessage(), e);
} catch (RuntimeException e) {
//convert Runtime to RecordFormatExceptions
throw new TikaException(e.getMessage(), e);
} catch (AssertionError e) {
//POI's hwmfparser can throw these for parse exceptions
throw new TikaException(e.getMessage(), e);
}
xhtml.endDocument();
}
Aggregations