Search in sources :

Example 56 with OPCPackage

use of org.apache.poi.openxml4j.opc.OPCPackage in project poi by apache.

the class TestXSSFReader method testStrings.

public void testStrings() throws Exception {
    OPCPackage pkg = OPCPackage.open(_ssTests.openResourceAsStream("SampleSS.xlsx"));
    XSSFReader r = new XSSFReader(pkg);
    assertEquals(11, r.getSharedStringsTable().getItems().size());
    assertEquals("Test spreadsheet", new XSSFRichTextString(r.getSharedStringsTable().getEntryAt(0)).toString());
}
Also used : XSSFRichTextString(org.apache.poi.xssf.usermodel.XSSFRichTextString) OPCPackage(org.apache.poi.openxml4j.opc.OPCPackage)

Example 57 with OPCPackage

use of org.apache.poi.openxml4j.opc.OPCPackage in project poi by apache.

the class TestXSSFReader method test58747.

/**
    * NPE from XSSFReader$SheetIterator.<init> on XLSX files generated by
    *  the openpyxl library
    */
public void test58747() throws Exception {
    OPCPackage pkg = XSSFTestDataSamples.openSamplePackage("58747.xlsx");
    ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(pkg);
    assertNotNull(strings);
    XSSFReader reader = new XSSFReader(pkg);
    StylesTable styles = reader.getStylesTable();
    assertNotNull(styles);
    XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) reader.getSheetsData();
    assertEquals(true, iter.hasNext());
    iter.next();
    assertEquals(false, iter.hasNext());
    assertEquals("Orders", iter.getSheetName());
    pkg.close();
}
Also used : StylesTable(org.apache.poi.xssf.model.StylesTable) OPCPackage(org.apache.poi.openxml4j.opc.OPCPackage)

Example 58 with OPCPackage

use of org.apache.poi.openxml4j.opc.OPCPackage in project poi by apache.

the class TestXSSFBugs method bug45431.

/**
     * We should carry vba macros over after save
     */
@Test
public void bug45431() throws IOException, InvalidFormatException {
    XSSFWorkbook wb1 = XSSFTestDataSamples.openSampleWorkbook("45431.xlsm");
    OPCPackage pkg1 = wb1.getPackage();
    assertTrue(wb1.isMacroEnabled());
    // Check the various macro related bits can be found
    PackagePart vba = pkg1.getPart(PackagingURIHelper.createPartName("/xl/vbaProject.bin"));
    assertNotNull(vba);
    // And the drawing bit
    PackagePart drw = pkg1.getPart(PackagingURIHelper.createPartName("/xl/drawings/vmlDrawing1.vml"));
    assertNotNull(drw);
    // Save and re-open, both still there
    XSSFWorkbook wb2 = XSSFTestDataSamples.writeOutAndReadBack(wb1);
    pkg1.close();
    wb1.close();
    OPCPackage pkg2 = wb2.getPackage();
    assertTrue(wb2.isMacroEnabled());
    vba = pkg2.getPart(PackagingURIHelper.createPartName("/xl/vbaProject.bin"));
    assertNotNull(vba);
    drw = pkg2.getPart(PackagingURIHelper.createPartName("/xl/drawings/vmlDrawing1.vml"));
    assertNotNull(drw);
    // And again, just to be sure
    XSSFWorkbook wb3 = XSSFTestDataSamples.writeOutAndReadBack(wb2);
    pkg2.close();
    wb2.close();
    OPCPackage pkg3 = wb3.getPackage();
    assertTrue(wb3.isMacroEnabled());
    vba = pkg3.getPart(PackagingURIHelper.createPartName("/xl/vbaProject.bin"));
    assertNotNull(vba);
    drw = pkg3.getPart(PackagingURIHelper.createPartName("/xl/drawings/vmlDrawing1.vml"));
    assertNotNull(drw);
    pkg3.close();
    wb3.close();
}
Also used : SXSSFWorkbook(org.apache.poi.xssf.streaming.SXSSFWorkbook) PackagePart(org.apache.poi.openxml4j.opc.PackagePart) OPCPackage(org.apache.poi.openxml4j.opc.OPCPackage) Test(org.junit.Test)

Example 59 with OPCPackage

use of org.apache.poi.openxml4j.opc.OPCPackage in project tika by apache.

the class XSSFBExcelExtractorDecorator method buildXHTML.

/**
     * @see org.apache.poi.xssf.extractor.XSSFBEventBasedExcelExtractor#getText()
     */
@Override
protected void buildXHTML(XHTMLContentHandler xhtml) throws SAXException, XmlException, IOException {
    OPCPackage container = extractor.getPackage();
    XSSFBSharedStringsTable strings;
    XSSFBReader.SheetIterator iter;
    XSSFBReader xssfReader;
    XSSFBStylesTable styles;
    try {
        xssfReader = new XSSFBReader(container);
        styles = xssfReader.getXSSFBStylesTable();
        iter = (XSSFBReader.SheetIterator) xssfReader.getSheetsData();
        strings = new XSSFBSharedStringsTable(container);
    } catch (InvalidFormatException e) {
        throw new XmlException(e);
    } catch (OpenXML4JException oe) {
        throw new XmlException(oe);
    }
    while (iter.hasNext()) {
        InputStream stream = iter.next();
        PackagePart sheetPart = iter.getSheetPart();
        addDrawingHyperLinks(sheetPart);
        sheetParts.add(sheetPart);
        SheetTextAsHTML sheetExtractor = new SheetTextAsHTML(xhtml);
        XSSFBCommentsTable comments = iter.getXSSFBSheetComments();
        // Start, and output the sheet name
        xhtml.startElement("div");
        xhtml.element("h1", iter.getSheetName());
        // Extract the main sheet contents
        xhtml.startElement("table");
        xhtml.startElement("tbody");
        processSheet(sheetExtractor, comments, styles, strings, stream);
        xhtml.endElement("tbody");
        xhtml.endElement("table");
        //  do the headers before the contents)
        for (String header : sheetExtractor.headers) {
            extractHeaderFooter(header, xhtml);
        }
        for (String footer : sheetExtractor.footers) {
            extractHeaderFooter(footer, xhtml);
        }
        List<XSSFShape> shapes = iter.getShapes();
        processShapes(shapes, xhtml);
        //for now dump sheet hyperlinks at bottom of page
        //consider a double-pass of the inputstream to reunite hyperlinks with cells/textboxes
        //step 1: extract hyperlink info from bottom of page
        //step 2: process as we do now, but with cached hyperlink relationship info
        extractHyperLinks(sheetPart, xhtml);
        // All done with this sheet
        xhtml.endElement("div");
    }
}
Also used : XSSFBReader(org.apache.poi.xssf.eventusermodel.XSSFBReader) XSSFBCommentsTable(org.apache.poi.xssf.binary.XSSFBCommentsTable) InputStream(java.io.InputStream) PackagePart(org.apache.poi.openxml4j.opc.PackagePart) XSSFBStylesTable(org.apache.poi.xssf.binary.XSSFBStylesTable) InvalidFormatException(org.apache.poi.openxml4j.exceptions.InvalidFormatException) XSSFShape(org.apache.poi.xssf.usermodel.XSSFShape) OpenXML4JException(org.apache.poi.openxml4j.exceptions.OpenXML4JException) XmlException(org.apache.xmlbeans.XmlException) XSSFBSharedStringsTable(org.apache.poi.xssf.binary.XSSFBSharedStringsTable) OPCPackage(org.apache.poi.openxml4j.opc.OPCPackage)

Example 60 with OPCPackage

use of org.apache.poi.openxml4j.opc.OPCPackage in project tika by apache.

the class XSSFExcelExtractorDecorator method buildXHTML.

/**
     * @see org.apache.poi.xssf.extractor.XSSFExcelExtractor#getText()
     */
@Override
protected void buildXHTML(XHTMLContentHandler xhtml) throws SAXException, XmlException, IOException {
    OPCPackage container = extractor.getPackage();
    ReadOnlySharedStringsTable strings;
    XSSFReader.SheetIterator iter;
    XSSFReader xssfReader;
    StylesTable styles;
    try {
        xssfReader = new XSSFReader(container);
        styles = xssfReader.getStylesTable();
        iter = (XSSFReader.SheetIterator) xssfReader.getSheetsData();
        strings = new ReadOnlySharedStringsTable(container);
    } catch (InvalidFormatException e) {
        throw new XmlException(e);
    } catch (OpenXML4JException oe) {
        throw new XmlException(oe);
    }
    //temporary workaround for POI-61034
    //remove once POI 3.17-beta1 is released
    Set<String> seen = new HashSet<>();
    while (iter.hasNext()) {
        SheetTextAsHTML sheetExtractor = new SheetTextAsHTML(xhtml);
        PackagePart sheetPart = null;
        try (InputStream stream = iter.next()) {
            sheetPart = iter.getSheetPart();
            final String partName = sheetPart.getPartName().toString();
            if (seen.contains(partName)) {
                continue;
            }
            seen.add(partName);
            addDrawingHyperLinks(sheetPart);
            sheetParts.add(sheetPart);
            CommentsTable comments = iter.getSheetComments();
            // Start, and output the sheet name
            xhtml.startElement("div");
            xhtml.element("h1", iter.getSheetName());
            // Extract the main sheet contents
            xhtml.startElement("table");
            xhtml.startElement("tbody");
            processSheet(sheetExtractor, comments, styles, strings, stream);
        }
        xhtml.endElement("tbody");
        xhtml.endElement("table");
        //  do the headers before the contents)
        for (String header : sheetExtractor.headers) {
            extractHeaderFooter(header, xhtml);
        }
        for (String footer : sheetExtractor.footers) {
            extractHeaderFooter(footer, xhtml);
        }
        // Do text held in shapes, if required
        if (config.getIncludeShapeBasedContent()) {
            List<XSSFShape> shapes = iter.getShapes();
            processShapes(shapes, xhtml);
        }
        //for now dump sheet hyperlinks at bottom of page
        //consider a double-pass of the inputstream to reunite hyperlinks with cells/textboxes
        //step 1: extract hyperlink info from bottom of page
        //step 2: process as we do now, but with cached hyperlink relationship info
        extractHyperLinks(sheetPart, xhtml);
        // All done with this sheet
        xhtml.endElement("div");
    }
}
Also used : ReadOnlySharedStringsTable(org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable) InputStream(java.io.InputStream) StylesTable(org.apache.poi.xssf.model.StylesTable) PackagePart(org.apache.poi.openxml4j.opc.PackagePart) InvalidFormatException(org.apache.poi.openxml4j.exceptions.InvalidFormatException) CommentsTable(org.apache.poi.xssf.model.CommentsTable) XSSFShape(org.apache.poi.xssf.usermodel.XSSFShape) OpenXML4JException(org.apache.poi.openxml4j.exceptions.OpenXML4JException) XmlException(org.apache.xmlbeans.XmlException) OPCPackage(org.apache.poi.openxml4j.opc.OPCPackage) XSSFReader(org.apache.poi.xssf.eventusermodel.XSSFReader) HashSet(java.util.HashSet)

Aggregations

OPCPackage (org.apache.poi.openxml4j.opc.OPCPackage)116 Test (org.junit.Test)54 InputStream (java.io.InputStream)29 PackagePart (org.apache.poi.openxml4j.opc.PackagePart)25 InvalidFormatException (org.apache.poi.openxml4j.exceptions.InvalidFormatException)21 File (java.io.File)19 IOException (java.io.IOException)17 PackagePartName (org.apache.poi.openxml4j.opc.PackagePartName)16 ByteArrayInputStream (java.io.ByteArrayInputStream)14 ByteArrayOutputStream (java.io.ByteArrayOutputStream)13 XSSFWorkbook (org.apache.poi.xssf.usermodel.XSSFWorkbook)13 SignatureConfig (org.apache.poi.poifs.crypt.dsig.SignatureConfig)10 SignatureInfo (org.apache.poi.poifs.crypt.dsig.SignatureInfo)10 XSSFReader (org.apache.poi.xssf.eventusermodel.XSSFReader)10 OutputStream (java.io.OutputStream)9 ArrayList (java.util.ArrayList)9 FileOutputStream (java.io.FileOutputStream)8 InvalidOperationException (org.apache.poi.openxml4j.exceptions.InvalidOperationException)8 XmlException (org.apache.xmlbeans.XmlException)8 FileInputStream (java.io.FileInputStream)7