use of org.apache.poi.openxml4j.opc.OPCPackage in project poi by apache.
the class TestXSSFReader method testStrings.
public void testStrings() throws Exception {
OPCPackage pkg = OPCPackage.open(_ssTests.openResourceAsStream("SampleSS.xlsx"));
XSSFReader r = new XSSFReader(pkg);
assertEquals(11, r.getSharedStringsTable().getItems().size());
assertEquals("Test spreadsheet", new XSSFRichTextString(r.getSharedStringsTable().getEntryAt(0)).toString());
}
use of org.apache.poi.openxml4j.opc.OPCPackage in project poi by apache.
the class TestXSSFReader method test58747.
/**
* NPE from XSSFReader$SheetIterator.<init> on XLSX files generated by
* the openpyxl library
*/
public void test58747() throws Exception {
OPCPackage pkg = XSSFTestDataSamples.openSamplePackage("58747.xlsx");
ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(pkg);
assertNotNull(strings);
XSSFReader reader = new XSSFReader(pkg);
StylesTable styles = reader.getStylesTable();
assertNotNull(styles);
XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) reader.getSheetsData();
assertEquals(true, iter.hasNext());
iter.next();
assertEquals(false, iter.hasNext());
assertEquals("Orders", iter.getSheetName());
pkg.close();
}
use of org.apache.poi.openxml4j.opc.OPCPackage in project poi by apache.
the class TestXSSFBugs method bug45431.
/**
* We should carry vba macros over after save
*/
@Test
public void bug45431() throws IOException, InvalidFormatException {
XSSFWorkbook wb1 = XSSFTestDataSamples.openSampleWorkbook("45431.xlsm");
OPCPackage pkg1 = wb1.getPackage();
assertTrue(wb1.isMacroEnabled());
// Check the various macro related bits can be found
PackagePart vba = pkg1.getPart(PackagingURIHelper.createPartName("/xl/vbaProject.bin"));
assertNotNull(vba);
// And the drawing bit
PackagePart drw = pkg1.getPart(PackagingURIHelper.createPartName("/xl/drawings/vmlDrawing1.vml"));
assertNotNull(drw);
// Save and re-open, both still there
XSSFWorkbook wb2 = XSSFTestDataSamples.writeOutAndReadBack(wb1);
pkg1.close();
wb1.close();
OPCPackage pkg2 = wb2.getPackage();
assertTrue(wb2.isMacroEnabled());
vba = pkg2.getPart(PackagingURIHelper.createPartName("/xl/vbaProject.bin"));
assertNotNull(vba);
drw = pkg2.getPart(PackagingURIHelper.createPartName("/xl/drawings/vmlDrawing1.vml"));
assertNotNull(drw);
// And again, just to be sure
XSSFWorkbook wb3 = XSSFTestDataSamples.writeOutAndReadBack(wb2);
pkg2.close();
wb2.close();
OPCPackage pkg3 = wb3.getPackage();
assertTrue(wb3.isMacroEnabled());
vba = pkg3.getPart(PackagingURIHelper.createPartName("/xl/vbaProject.bin"));
assertNotNull(vba);
drw = pkg3.getPart(PackagingURIHelper.createPartName("/xl/drawings/vmlDrawing1.vml"));
assertNotNull(drw);
pkg3.close();
wb3.close();
}
use of org.apache.poi.openxml4j.opc.OPCPackage in project tika by apache.
the class XSSFBExcelExtractorDecorator method buildXHTML.
/**
* @see org.apache.poi.xssf.extractor.XSSFBEventBasedExcelExtractor#getText()
*/
@Override
protected void buildXHTML(XHTMLContentHandler xhtml) throws SAXException, XmlException, IOException {
OPCPackage container = extractor.getPackage();
XSSFBSharedStringsTable strings;
XSSFBReader.SheetIterator iter;
XSSFBReader xssfReader;
XSSFBStylesTable styles;
try {
xssfReader = new XSSFBReader(container);
styles = xssfReader.getXSSFBStylesTable();
iter = (XSSFBReader.SheetIterator) xssfReader.getSheetsData();
strings = new XSSFBSharedStringsTable(container);
} catch (InvalidFormatException e) {
throw new XmlException(e);
} catch (OpenXML4JException oe) {
throw new XmlException(oe);
}
while (iter.hasNext()) {
InputStream stream = iter.next();
PackagePart sheetPart = iter.getSheetPart();
addDrawingHyperLinks(sheetPart);
sheetParts.add(sheetPart);
SheetTextAsHTML sheetExtractor = new SheetTextAsHTML(xhtml);
XSSFBCommentsTable comments = iter.getXSSFBSheetComments();
// Start, and output the sheet name
xhtml.startElement("div");
xhtml.element("h1", iter.getSheetName());
// Extract the main sheet contents
xhtml.startElement("table");
xhtml.startElement("tbody");
processSheet(sheetExtractor, comments, styles, strings, stream);
xhtml.endElement("tbody");
xhtml.endElement("table");
// do the headers before the contents)
for (String header : sheetExtractor.headers) {
extractHeaderFooter(header, xhtml);
}
for (String footer : sheetExtractor.footers) {
extractHeaderFooter(footer, xhtml);
}
List<XSSFShape> shapes = iter.getShapes();
processShapes(shapes, xhtml);
//for now dump sheet hyperlinks at bottom of page
//consider a double-pass of the inputstream to reunite hyperlinks with cells/textboxes
//step 1: extract hyperlink info from bottom of page
//step 2: process as we do now, but with cached hyperlink relationship info
extractHyperLinks(sheetPart, xhtml);
// All done with this sheet
xhtml.endElement("div");
}
}
use of org.apache.poi.openxml4j.opc.OPCPackage in project tika by apache.
the class XSSFExcelExtractorDecorator method buildXHTML.
/**
* @see org.apache.poi.xssf.extractor.XSSFExcelExtractor#getText()
*/
@Override
protected void buildXHTML(XHTMLContentHandler xhtml) throws SAXException, XmlException, IOException {
OPCPackage container = extractor.getPackage();
ReadOnlySharedStringsTable strings;
XSSFReader.SheetIterator iter;
XSSFReader xssfReader;
StylesTable styles;
try {
xssfReader = new XSSFReader(container);
styles = xssfReader.getStylesTable();
iter = (XSSFReader.SheetIterator) xssfReader.getSheetsData();
strings = new ReadOnlySharedStringsTable(container);
} catch (InvalidFormatException e) {
throw new XmlException(e);
} catch (OpenXML4JException oe) {
throw new XmlException(oe);
}
//temporary workaround for POI-61034
//remove once POI 3.17-beta1 is released
Set<String> seen = new HashSet<>();
while (iter.hasNext()) {
SheetTextAsHTML sheetExtractor = new SheetTextAsHTML(xhtml);
PackagePart sheetPart = null;
try (InputStream stream = iter.next()) {
sheetPart = iter.getSheetPart();
final String partName = sheetPart.getPartName().toString();
if (seen.contains(partName)) {
continue;
}
seen.add(partName);
addDrawingHyperLinks(sheetPart);
sheetParts.add(sheetPart);
CommentsTable comments = iter.getSheetComments();
// Start, and output the sheet name
xhtml.startElement("div");
xhtml.element("h1", iter.getSheetName());
// Extract the main sheet contents
xhtml.startElement("table");
xhtml.startElement("tbody");
processSheet(sheetExtractor, comments, styles, strings, stream);
}
xhtml.endElement("tbody");
xhtml.endElement("table");
// do the headers before the contents)
for (String header : sheetExtractor.headers) {
extractHeaderFooter(header, xhtml);
}
for (String footer : sheetExtractor.footers) {
extractHeaderFooter(footer, xhtml);
}
// Do text held in shapes, if required
if (config.getIncludeShapeBasedContent()) {
List<XSSFShape> shapes = iter.getShapes();
processShapes(shapes, xhtml);
}
//for now dump sheet hyperlinks at bottom of page
//consider a double-pass of the inputstream to reunite hyperlinks with cells/textboxes
//step 1: extract hyperlink info from bottom of page
//step 2: process as we do now, but with cached hyperlink relationship info
extractHyperLinks(sheetPart, xhtml);
// All done with this sheet
xhtml.endElement("div");
}
}
Aggregations