use of org.apache.poi.sl.usermodel.Placeholder in project tika by apache.
the class XSLFPowerPointExtractorDecorator method extractContent.
private void extractContent(List<? extends XSLFShape> shapes, boolean skipPlaceholders, XHTMLContentHandler xhtml, String slideDesc) throws SAXException {
for (XSLFShape sh : shapes) {
if (sh instanceof XSLFTextShape) {
XSLFTextShape txt = (XSLFTextShape) sh;
Placeholder ph = txt.getTextType();
if (skipPlaceholders && ph != null) {
continue;
}
boolean inHyperlink = false;
for (XSLFTextParagraph p : txt.getTextParagraphs()) {
xhtml.startElement("p");
for (XSLFTextRun run : p.getTextRuns()) {
//TODO: add check for targetmode=external into POI
//then check to confirm that the urls are actually
//external and not footnote refs via the current hack
Hyperlink hyperlink = run.getHyperlink();
if (hyperlink != null && hyperlink.getAddress() != null && !hyperlink.getAddress().contains("#_ftn")) {
xhtml.startElement("a", "href", hyperlink.getAddress());
inHyperlink = true;
}
xhtml.characters(run.getRawText());
if (inHyperlink == true) {
xhtml.endElement("a");
}
inHyperlink = false;
}
xhtml.endElement("p");
}
} else if (sh instanceof XSLFGroupShape) {
// recurse into groups of shapes
XSLFGroupShape group = (XSLFGroupShape) sh;
extractContent(group.getShapes(), skipPlaceholders, xhtml, slideDesc);
} else if (sh instanceof XSLFTable) {
//unlike tables in Word, ppt/x can't have recursive tables...I don't think
extractTable((XSLFTable) sh, xhtml);
} else if (sh instanceof XSLFGraphicFrame) {
XSLFGraphicFrame frame = (XSLFGraphicFrame) sh;
XmlObject[] sp = frame.getXmlObject().selectPath("declare namespace p='http://schemas.openxmlformats.org/presentationml/2006/main' .//*/p:oleObj");
if (sp != null) {
for (XmlObject emb : sp) {
XmlObject relIDAtt = emb.selectAttribute(new QName("http://schemas.openxmlformats.org/officeDocument/2006/relationships", "id"));
if (relIDAtt != null) {
String relID = relIDAtt.getDomNode().getNodeValue();
if (slideDesc != null) {
relID = slideDesc + relID;
}
AttributesImpl attributes = new AttributesImpl();
attributes.addAttribute("", "class", "class", "CDATA", "embedded");
attributes.addAttribute("", "id", "id", "CDATA", relID);
xhtml.startElement("div", attributes);
xhtml.endElement("div");
}
}
}
} else if (sh instanceof XSLFPictureShape) {
if (!skipPlaceholders && (sh.getXmlObject() instanceof CTPicture)) {
CTPicture ctPic = ((CTPicture) sh.getXmlObject());
if (ctPic.getBlipFill() != null && ctPic.getBlipFill().getBlip() != null) {
String relID = ctPic.getBlipFill().getBlip().getEmbed();
if (relID != null) {
if (slideDesc != null) {
relID = slideDesc + relID;
}
AttributesImpl attributes = new AttributesImpl();
attributes.addAttribute("", "class", "class", "CDATA", "embedded");
attributes.addAttribute("", "id", "id", "CDATA", relID);
xhtml.startElement("div", attributes);
xhtml.endElement("div");
}
}
}
}
}
}
Aggregations