use of org.apache.poi.util.ReplacingInputStream in project poi by apache.
the class XSSFVMLDrawing method read.
@SuppressWarnings("resource")
protected void read(InputStream is) throws IOException, XmlException {
Document doc;
try {
/*
* This is a seriously sick fix for the fact that some .xlsx files contain raw bits
* of HTML, without being escaped or properly turned into XML.
* The result is that they contain things like >br<, which breaks the XML parsing.
* This very sick InputStream wrapper attempts to spot these go past, and fix them.
*/
doc = DocumentHelper.readDocument(new ReplacingInputStream(is, "<br>", "<br/>"));
} catch (SAXException e) {
throw new XmlException(e.getMessage(), e);
}
XmlObject root = XmlObject.Factory.parse(doc, DEFAULT_XML_OPTIONS);
_qnames = new ArrayList<QName>();
_items = new ArrayList<XmlObject>();
for (XmlObject obj : root.selectPath("$this/xml/*")) {
Node nd = obj.getDomNode();
QName qname = new QName(nd.getNamespaceURI(), nd.getLocalName());
if (qname.equals(QNAME_SHAPE_LAYOUT)) {
_items.add(CTShapeLayout.Factory.parse(obj.xmlText(), DEFAULT_XML_OPTIONS));
} else if (qname.equals(QNAME_SHAPE_TYPE)) {
CTShapetype st = CTShapetype.Factory.parse(obj.xmlText(), DEFAULT_XML_OPTIONS);
_items.add(st);
_shapeTypeId = st.getId();
} else if (qname.equals(QNAME_SHAPE)) {
CTShape shape = CTShape.Factory.parse(obj.xmlText(), DEFAULT_XML_OPTIONS);
String id = shape.getId();
if (id != null) {
Matcher m = ptrn_shapeId.matcher(id);
if (m.find()) {
_shapeId = Math.max(_shapeId, Integer.parseInt(m.group(1)));
}
}
_items.add(shape);
} else {
Document doc2;
try {
InputSource is2 = new InputSource(new StringReader(obj.xmlText()));
doc2 = DocumentHelper.readDocument(is2);
} catch (SAXException e) {
throw new XmlException(e.getMessage(), e);
}
_items.add(XmlObject.Factory.parse(doc2, DEFAULT_XML_OPTIONS));
}
_qnames.add(qname);
}
}
Aggregations