use of org.apache.poi.hslf.usermodel.HSLFTable in project tika by apache.
the class HSLFExtractor method parse.
protected void parse(DirectoryNode root, XHTMLContentHandler xhtml) throws IOException, SAXException, TikaException {
HSLFSlideShow ss = new HSLFSlideShow(root);
List<HSLFSlide> _slides = ss.getSlides();
xhtml.startElement("div", "class", "slideShow");
/* Iterate over slides and extract text */
for (HSLFSlide slide : _slides) {
xhtml.startElement("div", "class", "slide");
// Slide header, if present
HeadersFooters hf = slide.getHeadersFooters();
if (hf != null && hf.isHeaderVisible() && hf.getHeaderText() != null) {
xhtml.startElement("p", "class", "slide-header");
xhtml.characters(hf.getHeaderText());
xhtml.endElement("p");
}
// Slide master, if present
extractMaster(xhtml, slide.getMasterSheet());
// Slide text
{
xhtml.startElement("div", "class", "slide-content");
textRunsToText(xhtml, slide.getTextParagraphs());
xhtml.endElement("div");
}
// Table text
for (HSLFShape shape : slide.getShapes()) {
if (shape instanceof HSLFTable) {
extractTableText(xhtml, (HSLFTable) shape);
}
}
// Slide footer, if present
if (hf != null && hf.isFooterVisible() && hf.getFooterText() != null) {
xhtml.startElement("p", "class", "slide-footer");
xhtml.characters(hf.getFooterText());
xhtml.endElement("p");
}
// Comments, if present
StringBuilder authorStringBuilder = new StringBuilder();
for (Comment comment : slide.getComments()) {
authorStringBuilder.setLength(0);
xhtml.startElement("p", "class", "slide-comment");
if (comment.getAuthor() != null) {
authorStringBuilder.append(comment.getAuthor());
}
if (comment.getAuthorInitials() != null) {
if (authorStringBuilder.length() > 0) {
authorStringBuilder.append(" ");
}
authorStringBuilder.append("(" + comment.getAuthorInitials() + ")");
}
if (authorStringBuilder.length() > 0) {
if (comment.getText() != null) {
authorStringBuilder.append(" - ");
}
xhtml.startElement("b");
xhtml.characters(authorStringBuilder.toString());
xhtml.endElement("b");
}
if (comment.getText() != null) {
xhtml.characters(comment.getText());
}
xhtml.endElement("p");
}
// Now any embedded resources
handleSlideEmbeddedResources(slide, xhtml);
// Find the Notes for this slide and extract inline
HSLFNotes notes = slide.getNotes();
if (notes != null) {
xhtml.startElement("div", "class", "slide-notes");
textRunsToText(xhtml, notes.getTextParagraphs());
xhtml.endElement("div");
}
// Slide complete
xhtml.endElement("div");
}
// All slides done
xhtml.endElement("div");
/* notes */
xhtml.startElement("div", "class", "slide-notes");
HashSet<Integer> seenNotes = new HashSet<>();
HeadersFooters hf = ss.getNotesHeadersFooters();
for (HSLFSlide slide : _slides) {
HSLFNotes notes = slide.getNotes();
if (notes == null) {
continue;
}
Integer id = notes._getSheetNumber();
if (seenNotes.contains(id)) {
continue;
}
seenNotes.add(id);
// Repeat the Notes header, if set
if (hf != null && hf.isHeaderVisible() && hf.getHeaderText() != null) {
xhtml.startElement("p", "class", "slide-note-header");
xhtml.characters(hf.getHeaderText());
xhtml.endElement("p");
}
// Notes text
textRunsToText(xhtml, notes.getTextParagraphs());
// Repeat the notes footer, if set
if (hf != null && hf.isFooterVisible() && hf.getFooterText() != null) {
xhtml.startElement("p", "class", "slide-note-footer");
xhtml.characters(hf.getFooterText());
xhtml.endElement("p");
}
}
handleSlideEmbeddedPictures(ss, xhtml);
xhtml.endElement("div");
}
Aggregations