Search in sources :

Example 1 with XSSFBReader

use of org.apache.poi.xssf.eventusermodel.XSSFBReader in project poi by apache.

the class XSSFBEventBasedExcelExtractor method getText.

/**
     * Processes the file and returns the text
     */
public String getText() {
    try {
        XSSFBSharedStringsTable strings = new XSSFBSharedStringsTable(getPackage());
        XSSFBReader xssfbReader = new XSSFBReader(getPackage());
        XSSFBStylesTable styles = xssfbReader.getXSSFBStylesTable();
        XSSFBReader.SheetIterator iter = (XSSFBReader.SheetIterator) xssfbReader.getSheetsData();
        StringBuffer text = new StringBuffer();
        SheetTextExtractor sheetExtractor = new SheetTextExtractor();
        XSSFBHyperlinksTable hyperlinksTable = null;
        while (iter.hasNext()) {
            InputStream stream = iter.next();
            if (getIncludeSheetNames()) {
                text.append(iter.getSheetName());
                text.append('\n');
            }
            if (handleHyperlinksInCells) {
                hyperlinksTable = new XSSFBHyperlinksTable(iter.getSheetPart());
            }
            XSSFBCommentsTable comments = getIncludeCellComments() ? iter.getXSSFBSheetComments() : null;
            processSheet(sheetExtractor, styles, comments, strings, stream);
            if (getIncludeHeadersFooters()) {
                sheetExtractor.appendHeaderText(text);
            }
            sheetExtractor.appendCellText(text);
            if (getIncludeTextBoxes()) {
                processShapes(iter.getShapes(), text);
            }
            if (getIncludeHeadersFooters()) {
                sheetExtractor.appendFooterText(text);
            }
            sheetExtractor.reset();
            stream.close();
        }
        return text.toString();
    } catch (IOException e) {
        LOGGER.log(POILogger.WARN, e);
        return null;
    } catch (SAXException se) {
        LOGGER.log(POILogger.WARN, se);
        return null;
    } catch (OpenXML4JException o4je) {
        LOGGER.log(POILogger.WARN, o4je);
        return null;
    }
}
Also used : XSSFBReader(org.apache.poi.xssf.eventusermodel.XSSFBReader) XSSFBCommentsTable(org.apache.poi.xssf.binary.XSSFBCommentsTable) InputStream(java.io.InputStream) IOException(java.io.IOException) XSSFBStylesTable(org.apache.poi.xssf.binary.XSSFBStylesTable) SAXException(org.xml.sax.SAXException) OpenXML4JException(org.apache.poi.openxml4j.exceptions.OpenXML4JException) XSSFBHyperlinksTable(org.apache.poi.xssf.binary.XSSFBHyperlinksTable) XSSFBSharedStringsTable(org.apache.poi.xssf.binary.XSSFBSharedStringsTable)

Example 2 with XSSFBReader

use of org.apache.poi.xssf.eventusermodel.XSSFBReader in project poi by apache.

the class TestXSSFBSheetHyperlinkManager method testBasic.

@Test
public void testBasic() throws Exception {
    OPCPackage pkg = OPCPackage.open(_ssTests.openResourceAsStream("hyperlink.xlsb"));
    XSSFBReader reader = new XSSFBReader(pkg);
    XSSFReader.SheetIterator it = (XSSFReader.SheetIterator) reader.getSheetsData();
    it.next();
    XSSFBHyperlinksTable manager = new XSSFBHyperlinksTable(it.getSheetPart());
    List<XSSFHyperlinkRecord> records = manager.getHyperLinks().get(new CellAddress(0, 0));
    assertNotNull(records);
    assertEquals(1, records.size());
    XSSFHyperlinkRecord record = records.get(0);
    assertEquals("http://tika.apache.org/", record.getLocation());
    assertEquals("rId2", record.getRelId());
}
Also used : CellAddress(org.apache.poi.ss.util.CellAddress) XSSFBReader(org.apache.poi.xssf.eventusermodel.XSSFBReader) OPCPackage(org.apache.poi.openxml4j.opc.OPCPackage) XSSFReader(org.apache.poi.xssf.eventusermodel.XSSFReader) Test(org.junit.Test)

Example 3 with XSSFBReader

use of org.apache.poi.xssf.eventusermodel.XSSFBReader in project tika by apache.

the class XSSFBExcelExtractorDecorator method buildXHTML.

/**
     * @see org.apache.poi.xssf.extractor.XSSFBEventBasedExcelExtractor#getText()
     */
@Override
protected void buildXHTML(XHTMLContentHandler xhtml) throws SAXException, XmlException, IOException {
    OPCPackage container = extractor.getPackage();
    XSSFBSharedStringsTable strings;
    XSSFBReader.SheetIterator iter;
    XSSFBReader xssfReader;
    XSSFBStylesTable styles;
    try {
        xssfReader = new XSSFBReader(container);
        styles = xssfReader.getXSSFBStylesTable();
        iter = (XSSFBReader.SheetIterator) xssfReader.getSheetsData();
        strings = new XSSFBSharedStringsTable(container);
    } catch (InvalidFormatException e) {
        throw new XmlException(e);
    } catch (OpenXML4JException oe) {
        throw new XmlException(oe);
    }
    while (iter.hasNext()) {
        InputStream stream = iter.next();
        PackagePart sheetPart = iter.getSheetPart();
        addDrawingHyperLinks(sheetPart);
        sheetParts.add(sheetPart);
        SheetTextAsHTML sheetExtractor = new SheetTextAsHTML(xhtml);
        XSSFBCommentsTable comments = iter.getXSSFBSheetComments();
        // Start, and output the sheet name
        xhtml.startElement("div");
        xhtml.element("h1", iter.getSheetName());
        // Extract the main sheet contents
        xhtml.startElement("table");
        xhtml.startElement("tbody");
        processSheet(sheetExtractor, comments, styles, strings, stream);
        xhtml.endElement("tbody");
        xhtml.endElement("table");
        //  do the headers before the contents)
        for (String header : sheetExtractor.headers) {
            extractHeaderFooter(header, xhtml);
        }
        for (String footer : sheetExtractor.footers) {
            extractHeaderFooter(footer, xhtml);
        }
        List<XSSFShape> shapes = iter.getShapes();
        processShapes(shapes, xhtml);
        //for now dump sheet hyperlinks at bottom of page
        //consider a double-pass of the inputstream to reunite hyperlinks with cells/textboxes
        //step 1: extract hyperlink info from bottom of page
        //step 2: process as we do now, but with cached hyperlink relationship info
        extractHyperLinks(sheetPart, xhtml);
        // All done with this sheet
        xhtml.endElement("div");
    }
}
Also used : XSSFBReader(org.apache.poi.xssf.eventusermodel.XSSFBReader) XSSFBCommentsTable(org.apache.poi.xssf.binary.XSSFBCommentsTable) InputStream(java.io.InputStream) PackagePart(org.apache.poi.openxml4j.opc.PackagePart) XSSFBStylesTable(org.apache.poi.xssf.binary.XSSFBStylesTable) InvalidFormatException(org.apache.poi.openxml4j.exceptions.InvalidFormatException) XSSFShape(org.apache.poi.xssf.usermodel.XSSFShape) OpenXML4JException(org.apache.poi.openxml4j.exceptions.OpenXML4JException) XmlException(org.apache.xmlbeans.XmlException) XSSFBSharedStringsTable(org.apache.poi.xssf.binary.XSSFBSharedStringsTable) OPCPackage(org.apache.poi.openxml4j.opc.OPCPackage)

Aggregations

XSSFBReader (org.apache.poi.xssf.eventusermodel.XSSFBReader)3 InputStream (java.io.InputStream)2 OpenXML4JException (org.apache.poi.openxml4j.exceptions.OpenXML4JException)2 OPCPackage (org.apache.poi.openxml4j.opc.OPCPackage)2 XSSFBCommentsTable (org.apache.poi.xssf.binary.XSSFBCommentsTable)2 XSSFBSharedStringsTable (org.apache.poi.xssf.binary.XSSFBSharedStringsTable)2 XSSFBStylesTable (org.apache.poi.xssf.binary.XSSFBStylesTable)2 IOException (java.io.IOException)1 InvalidFormatException (org.apache.poi.openxml4j.exceptions.InvalidFormatException)1 PackagePart (org.apache.poi.openxml4j.opc.PackagePart)1 CellAddress (org.apache.poi.ss.util.CellAddress)1 XSSFBHyperlinksTable (org.apache.poi.xssf.binary.XSSFBHyperlinksTable)1 XSSFReader (org.apache.poi.xssf.eventusermodel.XSSFReader)1 XSSFShape (org.apache.poi.xssf.usermodel.XSSFShape)1 XmlException (org.apache.xmlbeans.XmlException)1 Test (org.junit.Test)1 SAXException (org.xml.sax.SAXException)1