use of org.apache.poi.xssf.eventusermodel.XSSFReader in project tika by apache.
the class XSSFExcelExtractorDecorator method buildXHTML.
/**
* @see org.apache.poi.xssf.extractor.XSSFExcelExtractor#getText()
*/
@Override
protected void buildXHTML(XHTMLContentHandler xhtml) throws SAXException, XmlException, IOException {
OPCPackage container = extractor.getPackage();
ReadOnlySharedStringsTable strings;
XSSFReader.SheetIterator iter;
XSSFReader xssfReader;
StylesTable styles;
try {
xssfReader = new XSSFReader(container);
styles = xssfReader.getStylesTable();
iter = (XSSFReader.SheetIterator) xssfReader.getSheetsData();
strings = new ReadOnlySharedStringsTable(container);
} catch (InvalidFormatException e) {
throw new XmlException(e);
} catch (OpenXML4JException oe) {
throw new XmlException(oe);
}
//temporary workaround for POI-61034
//remove once POI 3.17-beta1 is released
Set<String> seen = new HashSet<>();
while (iter.hasNext()) {
SheetTextAsHTML sheetExtractor = new SheetTextAsHTML(xhtml);
PackagePart sheetPart = null;
try (InputStream stream = iter.next()) {
sheetPart = iter.getSheetPart();
final String partName = sheetPart.getPartName().toString();
if (seen.contains(partName)) {
continue;
}
seen.add(partName);
addDrawingHyperLinks(sheetPart);
sheetParts.add(sheetPart);
CommentsTable comments = iter.getSheetComments();
// Start, and output the sheet name
xhtml.startElement("div");
xhtml.element("h1", iter.getSheetName());
// Extract the main sheet contents
xhtml.startElement("table");
xhtml.startElement("tbody");
processSheet(sheetExtractor, comments, styles, strings, stream);
}
xhtml.endElement("tbody");
xhtml.endElement("table");
// do the headers before the contents)
for (String header : sheetExtractor.headers) {
extractHeaderFooter(header, xhtml);
}
for (String footer : sheetExtractor.footers) {
extractHeaderFooter(footer, xhtml);
}
// Do text held in shapes, if required
if (config.getIncludeShapeBasedContent()) {
List<XSSFShape> shapes = iter.getShapes();
processShapes(shapes, xhtml);
}
//for now dump sheet hyperlinks at bottom of page
//consider a double-pass of the inputstream to reunite hyperlinks with cells/textboxes
//step 1: extract hyperlink info from bottom of page
//step 2: process as we do now, but with cached hyperlink relationship info
extractHyperLinks(sheetPart, xhtml);
// All done with this sheet
xhtml.endElement("div");
}
}
use of org.apache.poi.xssf.eventusermodel.XSSFReader in project translationstudio8 by heartsome.
the class XlsxRowReader method readRows.
public void readRows(IProgressMonitor monitor) throws ParserConfigurationException, SAXException, IOException, OpenXML4JException {
monitor.beginTask("", 10);
monitor.worked(1);
OPCPackage p = OPCPackage.open(xlsxFile, PackageAccess.READ);
ReadOnlySharedStringsTable shareString = new ReadOnlySharedStringsTable(p);
XSSFReader xssfReader = new XSSFReader(p);
XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) xssfReader.getSheetsData();
try {
while (iter.hasNext()) {
InputStream stream = iter.next();
readCells(stream, shareString, new SubProgressMonitor(monitor, 9));
stream.close();
// 目前只处理第一个sheet
break;
}
} finally {
p.close();
monitor.done();
}
}
use of org.apache.poi.xssf.eventusermodel.XSSFReader in project cubrid-manager by CUBRID.
the class XlsxRowNumberHandler method process.
/**
* the thread method
*/
public void process() {
numberAllRow = 0;
firstRowLst.clear();
InputStream stream = null;
itemsNumberOfSheets = new ArrayList<Integer>();
try {
stream = new BufferedInputStream(new FileInputStream(fileName));
OPCPackage pkg = OPCPackage.open(stream);
XSSFReader reader = new XSSFReader(pkg);
sharedStringTable = reader.getSharedStringsTable();
//$NON-NLS-1$
XMLReader xmlReader = XMLReaderFactory.createXMLReader("org.apache.xerces.parsers.SAXParser");
xmlReader.setContentHandler(this);
Iterator<InputStream> sheets = reader.getSheetsData();
int sheetNum = 0;
while (sheets.hasNext()) {
InputStream sheet = sheets.next();
InputSource sheetSource = new InputSource(sheet);
try {
xmlReader.parse(sheetSource);
} finally {
try {
if (sheet != null) {
sheet.close();
}
} catch (Exception e) {
LOGGER.error("", e);
}
}
if (sheetNum == 0) {
itemsNumberOfSheets.add(numberAllRow);
} else {
int numberBefore = 0;
for (int i = 0; i < itemsNumberOfSheets.size(); i++) {
numberBefore += itemsNumberOfSheets.get(i);
}
int items = numberAllRow - numberBefore;
itemsNumberOfSheets.add(items);
}
sheetNum++;
}
} catch (Exception ex) {
throw new RuntimeException(ex);
} finally {
isEnd = true;
if (stream != null) {
try {
stream.close();
} catch (IOException ex) {
LOGGER.error(ex.getMessage());
}
}
}
}
use of org.apache.poi.xssf.eventusermodel.XSSFReader in project tdi-studio-se by Talend.
the class ExcelReader method call.
public Object call() throws Exception {
OPCPackage pkg = null;
try {
if (fileURL != null) {
pkg = OPCPackage.open(fileURL);
} else {
pkg = PackageHelper.open(is);
}
XSSFReader r = new XSSFReader(pkg);
StylesTable styles = r.getStylesTable();
ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(pkg);
sheetContentsHandler = new DefaultTalendSheetContentsHandler(cache);
DataFormatter formatter = new DataFormatter();
boolean formulasNotResults = false;
XMLReader parser = XMLReaderFactory.createXMLReader();
ContentHandler handler = new TalendXSSFSheetXMLHandler(styles, strings, sheetContentsHandler, formatter, formulasNotResults);
parser.setContentHandler(handler);
XSSFReader.SheetIterator sheets = (XSSFReader.SheetIterator) r.getSheetsData();
// List<InputStream> iss = new ArrayList<InputStream>();
LinkedHashMap<String, InputStream> issmap = new LinkedHashMap<String, InputStream>();
while (sheets.hasNext()) {
InputStream sheet = sheets.next();
String sheetName = sheets.getSheetName();
boolean match = false;
for (int i = 0; i < sheetNames.size(); i++) {
if ((asRegexs.get(i) && sheetName.matches(sheetNames.get(i))) || (!asRegexs.get(i) && sheetName.equals(sheetNames.get(i)))) {
match = true;
// iss.add(sheet);
issmap.put(sheetName, sheet);
break;
}
}
if (!match) {
sheet.close();
}
}
if (issmap.size() < 1) {
throw new RuntimeException("No match sheets");
}
for (InputStream is : issmap.values()) {
try {
InputSource sheetSource = new InputSource(is);
sheetSource.setEncoding(charset);
parser.parse(sheetSource);
} finally {
is.close();
}
}
} finally {
if (pkg != null) {
pkg.revert();
}
cache.notifyErrorOccurred();
}
return null;
}
use of org.apache.poi.xssf.eventusermodel.XSSFReader in project poi by apache.
the class XSSFFileHandler method checkXSSFReader.
private void checkXSSFReader(OPCPackage p) throws IOException, OpenXML4JException {
XSSFReader reader = new XSSFReader(p);
// these can be null...
InputStream sharedStringsData = reader.getSharedStringsData();
if (sharedStringsData != null) {
sharedStringsData.close();
}
reader.getSharedStringsTable();
InputStream stylesData = reader.getStylesData();
if (stylesData != null) {
stylesData.close();
}
reader.getStylesTable();
InputStream themesData = reader.getThemesData();
if (themesData != null) {
themesData.close();
}
assertNotNull(reader.getWorkbookData());
Iterator<InputStream> sheetsData = reader.getSheetsData();
while (sheetsData.hasNext()) {
InputStream str = sheetsData.next();
str.close();
}
}
Aggregations