Search in sources :

Example 1 with XSSFBStylesTable

use of org.apache.poi.xssf.binary.XSSFBStylesTable in project poi by apache.

the class XSSFBEventBasedExcelExtractor method getText.

/**
     * Processes the file and returns the text
     */
public String getText() {
    try {
        XSSFBSharedStringsTable strings = new XSSFBSharedStringsTable(getPackage());
        XSSFBReader xssfbReader = new XSSFBReader(getPackage());
        XSSFBStylesTable styles = xssfbReader.getXSSFBStylesTable();
        XSSFBReader.SheetIterator iter = (XSSFBReader.SheetIterator) xssfbReader.getSheetsData();
        StringBuffer text = new StringBuffer();
        SheetTextExtractor sheetExtractor = new SheetTextExtractor();
        XSSFBHyperlinksTable hyperlinksTable = null;
        while (iter.hasNext()) {
            InputStream stream = iter.next();
            if (getIncludeSheetNames()) {
                text.append(iter.getSheetName());
                text.append('\n');
            }
            if (handleHyperlinksInCells) {
                hyperlinksTable = new XSSFBHyperlinksTable(iter.getSheetPart());
            }
            XSSFBCommentsTable comments = getIncludeCellComments() ? iter.getXSSFBSheetComments() : null;
            processSheet(sheetExtractor, styles, comments, strings, stream);
            if (getIncludeHeadersFooters()) {
                sheetExtractor.appendHeaderText(text);
            }
            sheetExtractor.appendCellText(text);
            if (getIncludeTextBoxes()) {
                processShapes(iter.getShapes(), text);
            }
            if (getIncludeHeadersFooters()) {
                sheetExtractor.appendFooterText(text);
            }
            sheetExtractor.reset();
            stream.close();
        }
        return text.toString();
    } catch (IOException e) {
        LOGGER.log(POILogger.WARN, e);
        return null;
    } catch (SAXException se) {
        LOGGER.log(POILogger.WARN, se);
        return null;
    } catch (OpenXML4JException o4je) {
        LOGGER.log(POILogger.WARN, o4je);
        return null;
    }
}
Also used : XSSFBReader(org.apache.poi.xssf.eventusermodel.XSSFBReader) XSSFBCommentsTable(org.apache.poi.xssf.binary.XSSFBCommentsTable) InputStream(java.io.InputStream) IOException(java.io.IOException) XSSFBStylesTable(org.apache.poi.xssf.binary.XSSFBStylesTable) SAXException(org.xml.sax.SAXException) OpenXML4JException(org.apache.poi.openxml4j.exceptions.OpenXML4JException) XSSFBHyperlinksTable(org.apache.poi.xssf.binary.XSSFBHyperlinksTable) XSSFBSharedStringsTable(org.apache.poi.xssf.binary.XSSFBSharedStringsTable)

Example 2 with XSSFBStylesTable

use of org.apache.poi.xssf.binary.XSSFBStylesTable in project poi by apache.

the class TestXSSFBReader method getSheets.

private List<String> getSheets(String testFileName) throws Exception {
    OPCPackage pkg = OPCPackage.open(_ssTests.openResourceAsStream(testFileName));
    List<String> sheetTexts = new ArrayList<String>();
    XSSFBReader r = new XSSFBReader(pkg);
    //        assertNotNull(r.getWorkbookData());
    //      assertNotNull(r.getSharedStringsData());
    assertNotNull(r.getXSSFBStylesTable());
    XSSFBSharedStringsTable sst = new XSSFBSharedStringsTable(pkg);
    XSSFBStylesTable xssfbStylesTable = r.getXSSFBStylesTable();
    XSSFBReader.SheetIterator it = (XSSFBReader.SheetIterator) r.getSheetsData();
    while (it.hasNext()) {
        InputStream is = it.next();
        String name = it.getSheetName();
        TestSheetHandler testSheetHandler = new TestSheetHandler();
        testSheetHandler.startSheet(name);
        XSSFBSheetHandler sheetHandler = new XSSFBSheetHandler(is, xssfbStylesTable, it.getXSSFBSheetComments(), sst, testSheetHandler, new DataFormatter(), false);
        sheetHandler.parse();
        testSheetHandler.endSheet();
        sheetTexts.add(testSheetHandler.toString());
    }
    return sheetTexts;
}
Also used : InputStream(java.io.InputStream) ArrayList(java.util.ArrayList) XSSFBStylesTable(org.apache.poi.xssf.binary.XSSFBStylesTable) XSSFBSharedStringsTable(org.apache.poi.xssf.binary.XSSFBSharedStringsTable) XSSFBSheetHandler(org.apache.poi.xssf.binary.XSSFBSheetHandler) OPCPackage(org.apache.poi.openxml4j.opc.OPCPackage) DataFormatter(org.apache.poi.ss.usermodel.DataFormatter)

Example 3 with XSSFBStylesTable

use of org.apache.poi.xssf.binary.XSSFBStylesTable in project tika by apache.

the class XSSFBExcelExtractorDecorator method buildXHTML.

/**
     * @see org.apache.poi.xssf.extractor.XSSFBEventBasedExcelExtractor#getText()
     */
@Override
protected void buildXHTML(XHTMLContentHandler xhtml) throws SAXException, XmlException, IOException {
    OPCPackage container = extractor.getPackage();
    XSSFBSharedStringsTable strings;
    XSSFBReader.SheetIterator iter;
    XSSFBReader xssfReader;
    XSSFBStylesTable styles;
    try {
        xssfReader = new XSSFBReader(container);
        styles = xssfReader.getXSSFBStylesTable();
        iter = (XSSFBReader.SheetIterator) xssfReader.getSheetsData();
        strings = new XSSFBSharedStringsTable(container);
    } catch (InvalidFormatException e) {
        throw new XmlException(e);
    } catch (OpenXML4JException oe) {
        throw new XmlException(oe);
    }
    while (iter.hasNext()) {
        InputStream stream = iter.next();
        PackagePart sheetPart = iter.getSheetPart();
        addDrawingHyperLinks(sheetPart);
        sheetParts.add(sheetPart);
        SheetTextAsHTML sheetExtractor = new SheetTextAsHTML(xhtml);
        XSSFBCommentsTable comments = iter.getXSSFBSheetComments();
        // Start, and output the sheet name
        xhtml.startElement("div");
        xhtml.element("h1", iter.getSheetName());
        // Extract the main sheet contents
        xhtml.startElement("table");
        xhtml.startElement("tbody");
        processSheet(sheetExtractor, comments, styles, strings, stream);
        xhtml.endElement("tbody");
        xhtml.endElement("table");
        //  do the headers before the contents)
        for (String header : sheetExtractor.headers) {
            extractHeaderFooter(header, xhtml);
        }
        for (String footer : sheetExtractor.footers) {
            extractHeaderFooter(footer, xhtml);
        }
        List<XSSFShape> shapes = iter.getShapes();
        processShapes(shapes, xhtml);
        //for now dump sheet hyperlinks at bottom of page
        //consider a double-pass of the inputstream to reunite hyperlinks with cells/textboxes
        //step 1: extract hyperlink info from bottom of page
        //step 2: process as we do now, but with cached hyperlink relationship info
        extractHyperLinks(sheetPart, xhtml);
        // All done with this sheet
        xhtml.endElement("div");
    }
}
Also used : XSSFBReader(org.apache.poi.xssf.eventusermodel.XSSFBReader) XSSFBCommentsTable(org.apache.poi.xssf.binary.XSSFBCommentsTable) InputStream(java.io.InputStream) PackagePart(org.apache.poi.openxml4j.opc.PackagePart) XSSFBStylesTable(org.apache.poi.xssf.binary.XSSFBStylesTable) InvalidFormatException(org.apache.poi.openxml4j.exceptions.InvalidFormatException) XSSFShape(org.apache.poi.xssf.usermodel.XSSFShape) OpenXML4JException(org.apache.poi.openxml4j.exceptions.OpenXML4JException) XmlException(org.apache.xmlbeans.XmlException) XSSFBSharedStringsTable(org.apache.poi.xssf.binary.XSSFBSharedStringsTable) OPCPackage(org.apache.poi.openxml4j.opc.OPCPackage)

Aggregations

InputStream (java.io.InputStream)3 XSSFBSharedStringsTable (org.apache.poi.xssf.binary.XSSFBSharedStringsTable)3 XSSFBStylesTable (org.apache.poi.xssf.binary.XSSFBStylesTable)3 OpenXML4JException (org.apache.poi.openxml4j.exceptions.OpenXML4JException)2 OPCPackage (org.apache.poi.openxml4j.opc.OPCPackage)2 XSSFBCommentsTable (org.apache.poi.xssf.binary.XSSFBCommentsTable)2 XSSFBReader (org.apache.poi.xssf.eventusermodel.XSSFBReader)2 IOException (java.io.IOException)1 ArrayList (java.util.ArrayList)1 InvalidFormatException (org.apache.poi.openxml4j.exceptions.InvalidFormatException)1 PackagePart (org.apache.poi.openxml4j.opc.PackagePart)1 DataFormatter (org.apache.poi.ss.usermodel.DataFormatter)1 XSSFBHyperlinksTable (org.apache.poi.xssf.binary.XSSFBHyperlinksTable)1 XSSFBSheetHandler (org.apache.poi.xssf.binary.XSSFBSheetHandler)1 XSSFShape (org.apache.poi.xssf.usermodel.XSSFShape)1 XmlException (org.apache.xmlbeans.XmlException)1 SAXException (org.xml.sax.SAXException)1