use of org.apache.poi.xssf.binary.XSSFBSharedStringsTable in project poi by apache.
the class XSSFBEventBasedExcelExtractor method getText.
/**
* Processes the file and returns the text
*/
public String getText() {
try {
XSSFBSharedStringsTable strings = new XSSFBSharedStringsTable(getPackage());
XSSFBReader xssfbReader = new XSSFBReader(getPackage());
XSSFBStylesTable styles = xssfbReader.getXSSFBStylesTable();
XSSFBReader.SheetIterator iter = (XSSFBReader.SheetIterator) xssfbReader.getSheetsData();
StringBuffer text = new StringBuffer();
SheetTextExtractor sheetExtractor = new SheetTextExtractor();
XSSFBHyperlinksTable hyperlinksTable = null;
while (iter.hasNext()) {
InputStream stream = iter.next();
if (getIncludeSheetNames()) {
text.append(iter.getSheetName());
text.append('\n');
}
if (handleHyperlinksInCells) {
hyperlinksTable = new XSSFBHyperlinksTable(iter.getSheetPart());
}
XSSFBCommentsTable comments = getIncludeCellComments() ? iter.getXSSFBSheetComments() : null;
processSheet(sheetExtractor, styles, comments, strings, stream);
if (getIncludeHeadersFooters()) {
sheetExtractor.appendHeaderText(text);
}
sheetExtractor.appendCellText(text);
if (getIncludeTextBoxes()) {
processShapes(iter.getShapes(), text);
}
if (getIncludeHeadersFooters()) {
sheetExtractor.appendFooterText(text);
}
sheetExtractor.reset();
stream.close();
}
return text.toString();
} catch (IOException e) {
LOGGER.log(POILogger.WARN, e);
return null;
} catch (SAXException se) {
LOGGER.log(POILogger.WARN, se);
return null;
} catch (OpenXML4JException o4je) {
LOGGER.log(POILogger.WARN, o4je);
return null;
}
}
use of org.apache.poi.xssf.binary.XSSFBSharedStringsTable in project poi by apache.
the class TestXSSFBReader method getSheets.
private List<String> getSheets(String testFileName) throws Exception {
OPCPackage pkg = OPCPackage.open(_ssTests.openResourceAsStream(testFileName));
List<String> sheetTexts = new ArrayList<String>();
XSSFBReader r = new XSSFBReader(pkg);
// assertNotNull(r.getWorkbookData());
// assertNotNull(r.getSharedStringsData());
assertNotNull(r.getXSSFBStylesTable());
XSSFBSharedStringsTable sst = new XSSFBSharedStringsTable(pkg);
XSSFBStylesTable xssfbStylesTable = r.getXSSFBStylesTable();
XSSFBReader.SheetIterator it = (XSSFBReader.SheetIterator) r.getSheetsData();
while (it.hasNext()) {
InputStream is = it.next();
String name = it.getSheetName();
TestSheetHandler testSheetHandler = new TestSheetHandler();
testSheetHandler.startSheet(name);
XSSFBSheetHandler sheetHandler = new XSSFBSheetHandler(is, xssfbStylesTable, it.getXSSFBSheetComments(), sst, testSheetHandler, new DataFormatter(), false);
sheetHandler.parse();
testSheetHandler.endSheet();
sheetTexts.add(testSheetHandler.toString());
}
return sheetTexts;
}
use of org.apache.poi.xssf.binary.XSSFBSharedStringsTable in project tika by apache.
the class XSSFBExcelExtractorDecorator method buildXHTML.
/**
* @see org.apache.poi.xssf.extractor.XSSFBEventBasedExcelExtractor#getText()
*/
@Override
protected void buildXHTML(XHTMLContentHandler xhtml) throws SAXException, XmlException, IOException {
OPCPackage container = extractor.getPackage();
XSSFBSharedStringsTable strings;
XSSFBReader.SheetIterator iter;
XSSFBReader xssfReader;
XSSFBStylesTable styles;
try {
xssfReader = new XSSFBReader(container);
styles = xssfReader.getXSSFBStylesTable();
iter = (XSSFBReader.SheetIterator) xssfReader.getSheetsData();
strings = new XSSFBSharedStringsTable(container);
} catch (InvalidFormatException e) {
throw new XmlException(e);
} catch (OpenXML4JException oe) {
throw new XmlException(oe);
}
while (iter.hasNext()) {
InputStream stream = iter.next();
PackagePart sheetPart = iter.getSheetPart();
addDrawingHyperLinks(sheetPart);
sheetParts.add(sheetPart);
SheetTextAsHTML sheetExtractor = new SheetTextAsHTML(xhtml);
XSSFBCommentsTable comments = iter.getXSSFBSheetComments();
// Start, and output the sheet name
xhtml.startElement("div");
xhtml.element("h1", iter.getSheetName());
// Extract the main sheet contents
xhtml.startElement("table");
xhtml.startElement("tbody");
processSheet(sheetExtractor, comments, styles, strings, stream);
xhtml.endElement("tbody");
xhtml.endElement("table");
// do the headers before the contents)
for (String header : sheetExtractor.headers) {
extractHeaderFooter(header, xhtml);
}
for (String footer : sheetExtractor.footers) {
extractHeaderFooter(footer, xhtml);
}
List<XSSFShape> shapes = iter.getShapes();
processShapes(shapes, xhtml);
//for now dump sheet hyperlinks at bottom of page
//consider a double-pass of the inputstream to reunite hyperlinks with cells/textboxes
//step 1: extract hyperlink info from bottom of page
//step 2: process as we do now, but with cached hyperlink relationship info
extractHyperLinks(sheetPart, xhtml);
// All done with this sheet
xhtml.endElement("div");
}
}
Aggregations