use of org.apache.poi.hemf.record.AbstractHemfComment in project poi by apache.
the class HemfExtractorTest method testBasicMac.
@Test
public void testBasicMac() throws Exception {
InputStream is = POIDataSamples.getSpreadSheetInstance().openResourceAsStream("SimpleEMF_mac.emf");
HemfExtractor ex = new HemfExtractor(is);
HemfHeader header = ex.getHeader();
int records = 0;
boolean extractedData = false;
for (HemfRecord record : ex) {
if (record.getRecordType() == HemfRecordType.comment) {
AbstractHemfComment comment = ((HemfCommentRecord) record).getComment();
if (comment instanceof HemfCommentPublic.MultiFormats) {
for (HemfCommentPublic.HemfMultiFormatsData d : ((HemfCommentPublic.MultiFormats) comment).getData()) {
byte[] data = d.getData();
//make sure header starts at 0
assertEquals('%', data[0]);
assertEquals('P', data[1]);
assertEquals('D', data[2]);
assertEquals('F', data[3]);
//make sure byte array ends at EOF\n
assertEquals('E', data[data.length - 4]);
assertEquals('O', data[data.length - 3]);
assertEquals('F', data[data.length - 2]);
assertEquals('\n', data[data.length - 1]);
extractedData = true;
}
}
}
records++;
}
assertTrue(extractedData);
assertEquals(header.getRecords() - 1, records);
}
use of org.apache.poi.hemf.record.AbstractHemfComment in project tika by apache.
the class EMFParser method parse.
@Override
public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException {
EmbeddedDocumentExtractor embeddedDocumentExtractor = null;
XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
xhtml.startDocument();
try {
HemfExtractor ex = new HemfExtractor(stream);
long lastY = -1;
long lastX = -1;
//derive this from the font or frame/bounds information
long fudgeFactorX = 1000;
StringBuilder buffer = new StringBuilder();
for (HemfRecord record : ex) {
if (record.getRecordType() == HemfRecordType.comment) {
AbstractHemfComment comment = ((HemfCommentRecord) record).getComment();
if (comment instanceof HemfCommentPublic.MultiFormats) {
if (embeddedDocumentExtractor == null) {
embeddedDocumentExtractor = EmbeddedDocumentUtil.getEmbeddedDocumentExtractor(context);
}
handleMultiFormats((HemfCommentPublic.MultiFormats) comment, xhtml, embeddedDocumentExtractor);
} else if (comment instanceof HemfCommentPublic.WindowsMetafile) {
if (embeddedDocumentExtractor == null) {
embeddedDocumentExtractor = EmbeddedDocumentUtil.getEmbeddedDocumentExtractor(context);
}
handleWMF((HemfCommentPublic.WindowsMetafile) comment, xhtml, embeddedDocumentExtractor);
}
} else if (record.getRecordType().equals(HemfRecordType.exttextoutw)) {
HemfText.ExtTextOutW extTextOutW = (HemfText.ExtTextOutW) record;
if (lastY > -1 && lastY != extTextOutW.getY()) {
xhtml.startElement("p");
xhtml.characters(buffer.toString());
xhtml.endElement("p");
buffer.setLength(0);
lastX = -1;
}
if (lastX > -1 && extTextOutW.getX() - lastX > fudgeFactorX) {
buffer.append(" ");
}
String txt = extTextOutW.getText();
buffer.append(txt);
lastY = extTextOutW.getY();
lastX = extTextOutW.getX();
}
}
if (buffer.length() > 0) {
xhtml.startElement("p");
xhtml.characters(buffer.toString());
xhtml.endElement("p");
}
} catch (RecordFormatException e) {
//POI's hemfparser can throw these for "parse exceptions"
throw new TikaException(e.getMessage(), e);
} catch (RuntimeException e) {
//convert Runtime to RecordFormatExceptions
throw new TikaException(e.getMessage(), e);
}
xhtml.endDocument();
}
Aggregations