Search in sources :

Example 1 with HSLFNotes

use of org.apache.poi.hslf.usermodel.HSLFNotes in project poi by apache.

the class PowerPointExtractor method getText.

public String getText(boolean getSlideText, boolean getNoteText, boolean getCommentText, boolean getMasterText) {
    StringBuffer ret = new StringBuffer();
    if (getSlideText) {
        if (getMasterText) {
            for (HSLFSlideMaster master : _show.getSlideMasters()) {
                for (HSLFShape sh : master.getShapes()) {
                    if (sh instanceof HSLFTextShape) {
                        HSLFTextShape hsh = (HSLFTextShape) sh;
                        final String text = hsh.getText();
                        if (text == null || "".equals(text) || "*".equals(text)) {
                            continue;
                        }
                        if (HSLFMasterSheet.isPlaceholder(sh)) {
                            // check for metro shape of complex placeholder
                            boolean isMetro = new HSLFMetroShape<HSLFShape>(sh).hasMetroBlob();
                            if (!isMetro) {
                                // don't bother about boiler plate text on master sheets
                                LOG.log(POILogger.INFO, "Ignoring boiler plate (placeholder) text on slide master:", text);
                                continue;
                            }
                        }
                        ret.append(text);
                        if (!text.endsWith("\n")) {
                            ret.append("\n");
                        }
                    }
                }
            }
        }
        for (HSLFSlide slide : _slides) {
            String headerText = "";
            String footerText = "";
            HeadersFooters hf = slide.getHeadersFooters();
            if (hf != null) {
                if (hf.isHeaderVisible()) {
                    headerText = safeLine(hf.getHeaderText());
                }
                if (hf.isFooterVisible()) {
                    footerText = safeLine(hf.getFooterText());
                }
            }
            // Slide header, if set
            ret.append(headerText);
            // Slide text
            textRunsToText(ret, slide.getTextParagraphs());
            // Table text
            for (HSLFShape shape : slide.getShapes()) {
                if (shape instanceof HSLFTable) {
                    extractTableText(ret, (HSLFTable) shape);
                }
            }
            // Slide footer, if set
            ret.append(footerText);
            // Comments, if requested and present
            if (getCommentText) {
                for (Comment comment : slide.getComments()) {
                    ret.append(comment.getAuthor() + " - " + comment.getText() + "\n");
                }
            }
        }
        if (getNoteText) {
            ret.append('\n');
        }
    }
    if (getNoteText) {
        // Not currently using _notes, as that can have the notes of
        // master sheets in. Grab Slide list, then work from there,
        // but ensure no duplicates
        Set<Integer> seenNotes = new HashSet<Integer>();
        String headerText = "";
        String footerText = "";
        HeadersFooters hf = _show.getNotesHeadersFooters();
        if (hf != null) {
            if (hf.isHeaderVisible()) {
                headerText = safeLine(hf.getHeaderText());
            }
            if (hf.isFooterVisible()) {
                footerText = safeLine(hf.getFooterText());
            }
        }
        for (HSLFSlide slide : _slides) {
            HSLFNotes notes = slide.getNotes();
            if (notes == null) {
                continue;
            }
            Integer id = Integer.valueOf(notes._getSheetNumber());
            if (seenNotes.contains(id)) {
                continue;
            }
            seenNotes.add(id);
            // Repeat the Notes header, if set
            ret.append(headerText);
            // Notes text
            textRunsToText(ret, notes.getTextParagraphs());
            // Repeat the notes footer, if set
            ret.append(footerText);
        }
    }
    return ret.toString();
}
Also used : Comment(org.apache.poi.hslf.model.Comment) HSLFSlideMaster(org.apache.poi.hslf.usermodel.HSLFSlideMaster) HeadersFooters(org.apache.poi.hslf.model.HeadersFooters) HSLFNotes(org.apache.poi.hslf.usermodel.HSLFNotes) HSLFShape(org.apache.poi.hslf.usermodel.HSLFShape) HSLFTable(org.apache.poi.hslf.usermodel.HSLFTable) HSLFTextShape(org.apache.poi.hslf.usermodel.HSLFTextShape) HSLFSlide(org.apache.poi.hslf.usermodel.HSLFSlide) HashSet(java.util.HashSet)

Example 2 with HSLFNotes

use of org.apache.poi.hslf.usermodel.HSLFNotes in project tika by apache.

the class HSLFExtractor method parse.

protected void parse(DirectoryNode root, XHTMLContentHandler xhtml) throws IOException, SAXException, TikaException {
    HSLFSlideShow ss = new HSLFSlideShow(root);
    List<HSLFSlide> _slides = ss.getSlides();
    xhtml.startElement("div", "class", "slideShow");
    /* Iterate over slides and extract text */
    for (HSLFSlide slide : _slides) {
        xhtml.startElement("div", "class", "slide");
        // Slide header, if present
        HeadersFooters hf = slide.getHeadersFooters();
        if (hf != null && hf.isHeaderVisible() && hf.getHeaderText() != null) {
            xhtml.startElement("p", "class", "slide-header");
            xhtml.characters(hf.getHeaderText());
            xhtml.endElement("p");
        }
        // Slide master, if present
        extractMaster(xhtml, slide.getMasterSheet());
        // Slide text
        {
            xhtml.startElement("div", "class", "slide-content");
            textRunsToText(xhtml, slide.getTextParagraphs());
            xhtml.endElement("div");
        }
        // Table text
        for (HSLFShape shape : slide.getShapes()) {
            if (shape instanceof HSLFTable) {
                extractTableText(xhtml, (HSLFTable) shape);
            }
        }
        // Slide footer, if present
        if (hf != null && hf.isFooterVisible() && hf.getFooterText() != null) {
            xhtml.startElement("p", "class", "slide-footer");
            xhtml.characters(hf.getFooterText());
            xhtml.endElement("p");
        }
        // Comments, if present
        StringBuilder authorStringBuilder = new StringBuilder();
        for (Comment comment : slide.getComments()) {
            authorStringBuilder.setLength(0);
            xhtml.startElement("p", "class", "slide-comment");
            if (comment.getAuthor() != null) {
                authorStringBuilder.append(comment.getAuthor());
            }
            if (comment.getAuthorInitials() != null) {
                if (authorStringBuilder.length() > 0) {
                    authorStringBuilder.append(" ");
                }
                authorStringBuilder.append("(" + comment.getAuthorInitials() + ")");
            }
            if (authorStringBuilder.length() > 0) {
                if (comment.getText() != null) {
                    authorStringBuilder.append(" - ");
                }
                xhtml.startElement("b");
                xhtml.characters(authorStringBuilder.toString());
                xhtml.endElement("b");
            }
            if (comment.getText() != null) {
                xhtml.characters(comment.getText());
            }
            xhtml.endElement("p");
        }
        // Now any embedded resources
        handleSlideEmbeddedResources(slide, xhtml);
        // Find the Notes for this slide and extract inline
        HSLFNotes notes = slide.getNotes();
        if (notes != null) {
            xhtml.startElement("div", "class", "slide-notes");
            textRunsToText(xhtml, notes.getTextParagraphs());
            xhtml.endElement("div");
        }
        // Slide complete
        xhtml.endElement("div");
    }
    // All slides done
    xhtml.endElement("div");
    /* notes */
    xhtml.startElement("div", "class", "slide-notes");
    HashSet<Integer> seenNotes = new HashSet<>();
    HeadersFooters hf = ss.getNotesHeadersFooters();
    for (HSLFSlide slide : _slides) {
        HSLFNotes notes = slide.getNotes();
        if (notes == null) {
            continue;
        }
        Integer id = notes._getSheetNumber();
        if (seenNotes.contains(id)) {
            continue;
        }
        seenNotes.add(id);
        // Repeat the Notes header, if set
        if (hf != null && hf.isHeaderVisible() && hf.getHeaderText() != null) {
            xhtml.startElement("p", "class", "slide-note-header");
            xhtml.characters(hf.getHeaderText());
            xhtml.endElement("p");
        }
        // Notes text
        textRunsToText(xhtml, notes.getTextParagraphs());
        // Repeat the notes footer, if set
        if (hf != null && hf.isFooterVisible() && hf.getFooterText() != null) {
            xhtml.startElement("p", "class", "slide-note-footer");
            xhtml.characters(hf.getFooterText());
            xhtml.endElement("p");
        }
    }
    handleSlideEmbeddedPictures(ss, xhtml);
    xhtml.endElement("div");
}
Also used : HeadersFooters(org.apache.poi.hslf.model.HeadersFooters) HSLFNotes(org.apache.poi.hslf.usermodel.HSLFNotes) Comment(org.apache.poi.hslf.model.Comment) HSLFShape(org.apache.poi.hslf.usermodel.HSLFShape) HSLFTable(org.apache.poi.hslf.usermodel.HSLFTable) HSLFSlideShow(org.apache.poi.hslf.usermodel.HSLFSlideShow) HSLFSlide(org.apache.poi.hslf.usermodel.HSLFSlide) HashSet(java.util.HashSet)

Aggregations

HashSet (java.util.HashSet)2 Comment (org.apache.poi.hslf.model.Comment)2 HeadersFooters (org.apache.poi.hslf.model.HeadersFooters)2 HSLFNotes (org.apache.poi.hslf.usermodel.HSLFNotes)2 HSLFShape (org.apache.poi.hslf.usermodel.HSLFShape)2 HSLFSlide (org.apache.poi.hslf.usermodel.HSLFSlide)2 HSLFTable (org.apache.poi.hslf.usermodel.HSLFTable)2 HSLFSlideMaster (org.apache.poi.hslf.usermodel.HSLFSlideMaster)1 HSLFSlideShow (org.apache.poi.hslf.usermodel.HSLFSlideShow)1 HSLFTextShape (org.apache.poi.hslf.usermodel.HSLFTextShape)1