use of org.apache.poi.hslf.usermodel.HSLFNotes in project poi by apache.
the class PowerPointExtractor method getText.
public String getText(boolean getSlideText, boolean getNoteText, boolean getCommentText, boolean getMasterText) {
StringBuffer ret = new StringBuffer();
if (getSlideText) {
if (getMasterText) {
for (HSLFSlideMaster master : _show.getSlideMasters()) {
for (HSLFShape sh : master.getShapes()) {
if (sh instanceof HSLFTextShape) {
HSLFTextShape hsh = (HSLFTextShape) sh;
final String text = hsh.getText();
if (text == null || "".equals(text) || "*".equals(text)) {
continue;
}
if (HSLFMasterSheet.isPlaceholder(sh)) {
// check for metro shape of complex placeholder
boolean isMetro = new HSLFMetroShape<HSLFShape>(sh).hasMetroBlob();
if (!isMetro) {
// don't bother about boiler plate text on master sheets
LOG.log(POILogger.INFO, "Ignoring boiler plate (placeholder) text on slide master:", text);
continue;
}
}
ret.append(text);
if (!text.endsWith("\n")) {
ret.append("\n");
}
}
}
}
}
for (HSLFSlide slide : _slides) {
String headerText = "";
String footerText = "";
HeadersFooters hf = slide.getHeadersFooters();
if (hf != null) {
if (hf.isHeaderVisible()) {
headerText = safeLine(hf.getHeaderText());
}
if (hf.isFooterVisible()) {
footerText = safeLine(hf.getFooterText());
}
}
// Slide header, if set
ret.append(headerText);
// Slide text
textRunsToText(ret, slide.getTextParagraphs());
// Table text
for (HSLFShape shape : slide.getShapes()) {
if (shape instanceof HSLFTable) {
extractTableText(ret, (HSLFTable) shape);
}
}
// Slide footer, if set
ret.append(footerText);
// Comments, if requested and present
if (getCommentText) {
for (Comment comment : slide.getComments()) {
ret.append(comment.getAuthor() + " - " + comment.getText() + "\n");
}
}
}
if (getNoteText) {
ret.append('\n');
}
}
if (getNoteText) {
// Not currently using _notes, as that can have the notes of
// master sheets in. Grab Slide list, then work from there,
// but ensure no duplicates
Set<Integer> seenNotes = new HashSet<Integer>();
String headerText = "";
String footerText = "";
HeadersFooters hf = _show.getNotesHeadersFooters();
if (hf != null) {
if (hf.isHeaderVisible()) {
headerText = safeLine(hf.getHeaderText());
}
if (hf.isFooterVisible()) {
footerText = safeLine(hf.getFooterText());
}
}
for (HSLFSlide slide : _slides) {
HSLFNotes notes = slide.getNotes();
if (notes == null) {
continue;
}
Integer id = Integer.valueOf(notes._getSheetNumber());
if (seenNotes.contains(id)) {
continue;
}
seenNotes.add(id);
// Repeat the Notes header, if set
ret.append(headerText);
// Notes text
textRunsToText(ret, notes.getTextParagraphs());
// Repeat the notes footer, if set
ret.append(footerText);
}
}
return ret.toString();
}
use of org.apache.poi.hslf.usermodel.HSLFNotes in project tika by apache.
the class HSLFExtractor method parse.
protected void parse(DirectoryNode root, XHTMLContentHandler xhtml) throws IOException, SAXException, TikaException {
HSLFSlideShow ss = new HSLFSlideShow(root);
List<HSLFSlide> _slides = ss.getSlides();
xhtml.startElement("div", "class", "slideShow");
/* Iterate over slides and extract text */
for (HSLFSlide slide : _slides) {
xhtml.startElement("div", "class", "slide");
// Slide header, if present
HeadersFooters hf = slide.getHeadersFooters();
if (hf != null && hf.isHeaderVisible() && hf.getHeaderText() != null) {
xhtml.startElement("p", "class", "slide-header");
xhtml.characters(hf.getHeaderText());
xhtml.endElement("p");
}
// Slide master, if present
extractMaster(xhtml, slide.getMasterSheet());
// Slide text
{
xhtml.startElement("div", "class", "slide-content");
textRunsToText(xhtml, slide.getTextParagraphs());
xhtml.endElement("div");
}
// Table text
for (HSLFShape shape : slide.getShapes()) {
if (shape instanceof HSLFTable) {
extractTableText(xhtml, (HSLFTable) shape);
}
}
// Slide footer, if present
if (hf != null && hf.isFooterVisible() && hf.getFooterText() != null) {
xhtml.startElement("p", "class", "slide-footer");
xhtml.characters(hf.getFooterText());
xhtml.endElement("p");
}
// Comments, if present
StringBuilder authorStringBuilder = new StringBuilder();
for (Comment comment : slide.getComments()) {
authorStringBuilder.setLength(0);
xhtml.startElement("p", "class", "slide-comment");
if (comment.getAuthor() != null) {
authorStringBuilder.append(comment.getAuthor());
}
if (comment.getAuthorInitials() != null) {
if (authorStringBuilder.length() > 0) {
authorStringBuilder.append(" ");
}
authorStringBuilder.append("(" + comment.getAuthorInitials() + ")");
}
if (authorStringBuilder.length() > 0) {
if (comment.getText() != null) {
authorStringBuilder.append(" - ");
}
xhtml.startElement("b");
xhtml.characters(authorStringBuilder.toString());
xhtml.endElement("b");
}
if (comment.getText() != null) {
xhtml.characters(comment.getText());
}
xhtml.endElement("p");
}
// Now any embedded resources
handleSlideEmbeddedResources(slide, xhtml);
// Find the Notes for this slide and extract inline
HSLFNotes notes = slide.getNotes();
if (notes != null) {
xhtml.startElement("div", "class", "slide-notes");
textRunsToText(xhtml, notes.getTextParagraphs());
xhtml.endElement("div");
}
// Slide complete
xhtml.endElement("div");
}
// All slides done
xhtml.endElement("div");
/* notes */
xhtml.startElement("div", "class", "slide-notes");
HashSet<Integer> seenNotes = new HashSet<>();
HeadersFooters hf = ss.getNotesHeadersFooters();
for (HSLFSlide slide : _slides) {
HSLFNotes notes = slide.getNotes();
if (notes == null) {
continue;
}
Integer id = notes._getSheetNumber();
if (seenNotes.contains(id)) {
continue;
}
seenNotes.add(id);
// Repeat the Notes header, if set
if (hf != null && hf.isHeaderVisible() && hf.getHeaderText() != null) {
xhtml.startElement("p", "class", "slide-note-header");
xhtml.characters(hf.getHeaderText());
xhtml.endElement("p");
}
// Notes text
textRunsToText(xhtml, notes.getTextParagraphs());
// Repeat the notes footer, if set
if (hf != null && hf.isFooterVisible() && hf.getFooterText() != null) {
xhtml.startElement("p", "class", "slide-note-footer");
xhtml.characters(hf.getFooterText());
xhtml.endElement("p");
}
}
handleSlideEmbeddedPictures(ss, xhtml);
xhtml.endElement("div");
}
Aggregations