Search in sources :

Example 6 with HSLFTable

use of org.apache.poi.hslf.usermodel.HSLFTable in project tika by apache.

the class HSLFExtractor method parse.

protected void parse(DirectoryNode root, XHTMLContentHandler xhtml) throws IOException, SAXException, TikaException {
    HSLFSlideShow ss = new HSLFSlideShow(root);
    List<HSLFSlide> _slides = ss.getSlides();
    xhtml.startElement("div", "class", "slideShow");
    /* Iterate over slides and extract text */
    for (HSLFSlide slide : _slides) {
        xhtml.startElement("div", "class", "slide");
        // Slide header, if present
        HeadersFooters hf = slide.getHeadersFooters();
        if (hf != null && hf.isHeaderVisible() && hf.getHeaderText() != null) {
            xhtml.startElement("p", "class", "slide-header");
            xhtml.characters(hf.getHeaderText());
            xhtml.endElement("p");
        }
        // Slide master, if present
        extractMaster(xhtml, slide.getMasterSheet());
        // Slide text
        {
            xhtml.startElement("div", "class", "slide-content");
            textRunsToText(xhtml, slide.getTextParagraphs());
            xhtml.endElement("div");
        }
        // Table text
        for (HSLFShape shape : slide.getShapes()) {
            if (shape instanceof HSLFTable) {
                extractTableText(xhtml, (HSLFTable) shape);
            }
        }
        // Slide footer, if present
        if (hf != null && hf.isFooterVisible() && hf.getFooterText() != null) {
            xhtml.startElement("p", "class", "slide-footer");
            xhtml.characters(hf.getFooterText());
            xhtml.endElement("p");
        }
        // Comments, if present
        StringBuilder authorStringBuilder = new StringBuilder();
        for (Comment comment : slide.getComments()) {
            authorStringBuilder.setLength(0);
            xhtml.startElement("p", "class", "slide-comment");
            if (comment.getAuthor() != null) {
                authorStringBuilder.append(comment.getAuthor());
            }
            if (comment.getAuthorInitials() != null) {
                if (authorStringBuilder.length() > 0) {
                    authorStringBuilder.append(" ");
                }
                authorStringBuilder.append("(" + comment.getAuthorInitials() + ")");
            }
            if (authorStringBuilder.length() > 0) {
                if (comment.getText() != null) {
                    authorStringBuilder.append(" - ");
                }
                xhtml.startElement("b");
                xhtml.characters(authorStringBuilder.toString());
                xhtml.endElement("b");
            }
            if (comment.getText() != null) {
                xhtml.characters(comment.getText());
            }
            xhtml.endElement("p");
        }
        // Now any embedded resources
        handleSlideEmbeddedResources(slide, xhtml);
        // Find the Notes for this slide and extract inline
        HSLFNotes notes = slide.getNotes();
        if (notes != null) {
            xhtml.startElement("div", "class", "slide-notes");
            textRunsToText(xhtml, notes.getTextParagraphs());
            xhtml.endElement("div");
        }
        // Slide complete
        xhtml.endElement("div");
    }
    // All slides done
    xhtml.endElement("div");
    /* notes */
    xhtml.startElement("div", "class", "slide-notes");
    HashSet<Integer> seenNotes = new HashSet<>();
    HeadersFooters hf = ss.getNotesHeadersFooters();
    for (HSLFSlide slide : _slides) {
        HSLFNotes notes = slide.getNotes();
        if (notes == null) {
            continue;
        }
        Integer id = notes._getSheetNumber();
        if (seenNotes.contains(id)) {
            continue;
        }
        seenNotes.add(id);
        // Repeat the Notes header, if set
        if (hf != null && hf.isHeaderVisible() && hf.getHeaderText() != null) {
            xhtml.startElement("p", "class", "slide-note-header");
            xhtml.characters(hf.getHeaderText());
            xhtml.endElement("p");
        }
        // Notes text
        textRunsToText(xhtml, notes.getTextParagraphs());
        // Repeat the notes footer, if set
        if (hf != null && hf.isFooterVisible() && hf.getFooterText() != null) {
            xhtml.startElement("p", "class", "slide-note-footer");
            xhtml.characters(hf.getFooterText());
            xhtml.endElement("p");
        }
    }
    handleSlideEmbeddedPictures(ss, xhtml);
    xhtml.endElement("div");
}
Also used : HeadersFooters(org.apache.poi.hslf.model.HeadersFooters) HSLFNotes(org.apache.poi.hslf.usermodel.HSLFNotes) Comment(org.apache.poi.hslf.model.Comment) HSLFShape(org.apache.poi.hslf.usermodel.HSLFShape) HSLFTable(org.apache.poi.hslf.usermodel.HSLFTable) HSLFSlideShow(org.apache.poi.hslf.usermodel.HSLFSlideShow) HSLFSlide(org.apache.poi.hslf.usermodel.HSLFSlide) HashSet(java.util.HashSet)

Aggregations

HSLFTable (org.apache.poi.hslf.usermodel.HSLFTable)6 HSLFShape (org.apache.poi.hslf.usermodel.HSLFShape)4 HSLFSlide (org.apache.poi.hslf.usermodel.HSLFSlide)4 HSLFSlideShow (org.apache.poi.hslf.usermodel.HSLFSlideShow)3 HSLFTableCell (org.apache.poi.hslf.usermodel.HSLFTableCell)3 Color (java.awt.Color)2 HashSet (java.util.HashSet)2 Comment (org.apache.poi.hslf.model.Comment)2 HeadersFooters (org.apache.poi.hslf.model.HeadersFooters)2 HSLFNotes (org.apache.poi.hslf.usermodel.HSLFNotes)2 HSLFTextRun (org.apache.poi.hslf.usermodel.HSLFTextRun)2 DrawTableShape (org.apache.poi.sl.draw.DrawTableShape)2 Test (org.junit.Test)2 ByteArrayInputStream (java.io.ByteArrayInputStream)1 ByteArrayOutputStream (java.io.ByteArrayOutputStream)1 HSLFSlideMaster (org.apache.poi.hslf.usermodel.HSLFSlideMaster)1 HSLFTextShape (org.apache.poi.hslf.usermodel.HSLFTextShape)1