use of org.apache.poi.xssf.eventusermodel.XSSFBReader in project poi by apache.
the class XSSFBEventBasedExcelExtractor method getText.
/**
* Processes the file and returns the text
*/
public String getText() {
try {
XSSFBSharedStringsTable strings = new XSSFBSharedStringsTable(getPackage());
XSSFBReader xssfbReader = new XSSFBReader(getPackage());
XSSFBStylesTable styles = xssfbReader.getXSSFBStylesTable();
XSSFBReader.SheetIterator iter = (XSSFBReader.SheetIterator) xssfbReader.getSheetsData();
StringBuffer text = new StringBuffer();
SheetTextExtractor sheetExtractor = new SheetTextExtractor();
XSSFBHyperlinksTable hyperlinksTable = null;
while (iter.hasNext()) {
InputStream stream = iter.next();
if (getIncludeSheetNames()) {
text.append(iter.getSheetName());
text.append('\n');
}
if (handleHyperlinksInCells) {
hyperlinksTable = new XSSFBHyperlinksTable(iter.getSheetPart());
}
XSSFBCommentsTable comments = getIncludeCellComments() ? iter.getXSSFBSheetComments() : null;
processSheet(sheetExtractor, styles, comments, strings, stream);
if (getIncludeHeadersFooters()) {
sheetExtractor.appendHeaderText(text);
}
sheetExtractor.appendCellText(text);
if (getIncludeTextBoxes()) {
processShapes(iter.getShapes(), text);
}
if (getIncludeHeadersFooters()) {
sheetExtractor.appendFooterText(text);
}
sheetExtractor.reset();
stream.close();
}
return text.toString();
} catch (IOException e) {
LOGGER.log(POILogger.WARN, e);
return null;
} catch (SAXException se) {
LOGGER.log(POILogger.WARN, se);
return null;
} catch (OpenXML4JException o4je) {
LOGGER.log(POILogger.WARN, o4je);
return null;
}
}
use of org.apache.poi.xssf.eventusermodel.XSSFBReader in project poi by apache.
the class TestXSSFBSheetHyperlinkManager method testBasic.
@Test
public void testBasic() throws Exception {
OPCPackage pkg = OPCPackage.open(_ssTests.openResourceAsStream("hyperlink.xlsb"));
XSSFBReader reader = new XSSFBReader(pkg);
XSSFReader.SheetIterator it = (XSSFReader.SheetIterator) reader.getSheetsData();
it.next();
XSSFBHyperlinksTable manager = new XSSFBHyperlinksTable(it.getSheetPart());
List<XSSFHyperlinkRecord> records = manager.getHyperLinks().get(new CellAddress(0, 0));
assertNotNull(records);
assertEquals(1, records.size());
XSSFHyperlinkRecord record = records.get(0);
assertEquals("http://tika.apache.org/", record.getLocation());
assertEquals("rId2", record.getRelId());
}
use of org.apache.poi.xssf.eventusermodel.XSSFBReader in project tika by apache.
the class XSSFBExcelExtractorDecorator method buildXHTML.
/**
* @see org.apache.poi.xssf.extractor.XSSFBEventBasedExcelExtractor#getText()
*/
@Override
protected void buildXHTML(XHTMLContentHandler xhtml) throws SAXException, XmlException, IOException {
OPCPackage container = extractor.getPackage();
XSSFBSharedStringsTable strings;
XSSFBReader.SheetIterator iter;
XSSFBReader xssfReader;
XSSFBStylesTable styles;
try {
xssfReader = new XSSFBReader(container);
styles = xssfReader.getXSSFBStylesTable();
iter = (XSSFBReader.SheetIterator) xssfReader.getSheetsData();
strings = new XSSFBSharedStringsTable(container);
} catch (InvalidFormatException e) {
throw new XmlException(e);
} catch (OpenXML4JException oe) {
throw new XmlException(oe);
}
while (iter.hasNext()) {
InputStream stream = iter.next();
PackagePart sheetPart = iter.getSheetPart();
addDrawingHyperLinks(sheetPart);
sheetParts.add(sheetPart);
SheetTextAsHTML sheetExtractor = new SheetTextAsHTML(xhtml);
XSSFBCommentsTable comments = iter.getXSSFBSheetComments();
// Start, and output the sheet name
xhtml.startElement("div");
xhtml.element("h1", iter.getSheetName());
// Extract the main sheet contents
xhtml.startElement("table");
xhtml.startElement("tbody");
processSheet(sheetExtractor, comments, styles, strings, stream);
xhtml.endElement("tbody");
xhtml.endElement("table");
// do the headers before the contents)
for (String header : sheetExtractor.headers) {
extractHeaderFooter(header, xhtml);
}
for (String footer : sheetExtractor.footers) {
extractHeaderFooter(footer, xhtml);
}
List<XSSFShape> shapes = iter.getShapes();
processShapes(shapes, xhtml);
//for now dump sheet hyperlinks at bottom of page
//consider a double-pass of the inputstream to reunite hyperlinks with cells/textboxes
//step 1: extract hyperlink info from bottom of page
//step 2: process as we do now, but with cached hyperlink relationship info
extractHyperLinks(sheetPart, xhtml);
// All done with this sheet
xhtml.endElement("div");
}
}
Aggregations