use of org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler in project poi by apache.
the class XSSFEventBasedExcelExtractor method processSheet.
/**
* Processes the given sheet
*/
public void processSheet(SheetContentsHandler sheetContentsExtractor, StylesTable styles, CommentsTable comments, ReadOnlySharedStringsTable strings, InputStream sheetInputStream) throws IOException, SAXException {
DataFormatter formatter;
if (locale == null) {
formatter = new DataFormatter();
} else {
formatter = new DataFormatter(locale);
}
InputSource sheetSource = new InputSource(sheetInputStream);
try {
XMLReader sheetParser = SAXHelper.newXMLReader();
ContentHandler handler = new XSSFSheetXMLHandler(styles, comments, strings, sheetContentsExtractor, formatter, formulasNotResults);
sheetParser.setContentHandler(handler);
sheetParser.parse(sheetSource);
} catch (ParserConfigurationException e) {
throw new RuntimeException("SAX parser appears to be broken - " + e.getMessage());
}
}
use of org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler in project poi by apache.
the class HybridStreaming method main.
public static void main(String[] args) throws IOException, SAXException {
InputStream sourceBytes = new FileInputStream("workbook.xlsx");
XSSFWorkbook workbook = new XSSFWorkbook(sourceBytes) {
/** Avoid DOM parse of large sheet */
@Override
public void parseSheet(java.util.Map<String, XSSFSheet> shIdMap, CTSheet ctSheet) {
if (!SHEET_TO_STREAM.equals(ctSheet.getName())) {
super.parseSheet(shIdMap, ctSheet);
}
}
};
// Having avoided a DOM-based parse of the sheet, we can stream it instead.
ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(workbook.getPackage());
new XSSFSheetXMLHandler(workbook.getStylesSource(), strings, createSheetContentsHandler(), false);
workbook.close();
sourceBytes.close();
}
use of org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler in project jeesuite-libs by vakinge.
the class XLSX2CSV method processSheet.
/**
* Parses and shows the content of one sheet using the specified styles and
* shared-strings tables.
*
* @param styles
* @param strings
* @param sheetInputStream
*/
public void processSheet(StylesTable styles, ReadOnlySharedStringsTable strings, SheetContentsHandler sheetHandler, InputStream sheetInputStream) throws IOException, ParserConfigurationException, SAXException {
DataFormatter formatter = new DataFormatter();
InputSource sheetSource = new InputSource(sheetInputStream);
try {
XMLReader sheetParser = SAXHelper.newXMLReader();
ContentHandler handler = new XSSFSheetXMLHandler(styles, null, strings, sheetHandler, formatter, false);
sheetParser.setContentHandler(handler);
sheetParser.parse(sheetSource);
} catch (ParserConfigurationException e) {
throw new RuntimeException("SAX parser appears to be broken - " + e.getMessage());
}
}
use of org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler in project tika by apache.
the class XSSFExcelExtractorDecorator method processSheet.
public void processSheet(SheetContentsHandler sheetContentsExtractor, CommentsTable comments, StylesTable styles, ReadOnlySharedStringsTable strings, InputStream sheetInputStream) throws IOException, SAXException {
InputSource sheetSource = new InputSource(sheetInputStream);
try {
XMLReader sheetParser = parseContext.getXMLReader();
XSSFSheetInterestingPartsCapturer handler = new XSSFSheetInterestingPartsCapturer(new XSSFSheetXMLHandler(styles, comments, strings, sheetContentsExtractor, formatter, false));
sheetParser.setContentHandler(handler);
sheetParser.parse(sheetSource);
sheetInputStream.close();
if (handler.hasProtection) {
metadata.set(TikaMetadataKeys.PROTECTED, "true");
}
} catch (TikaException e) {
throw new RuntimeException("SAX parser appears to be broken - " + e.getMessage());
}
}
use of org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler in project nifi by apache.
the class ConvertExcelToCSVProcessor method handleExcelSheet.
/**
* Handles an individual Excel sheet from the entire Excel document. Each sheet will result in an individual flowfile.
*
* @param session
* The NiFi ProcessSession instance for the current invocation.
*/
private void handleExcelSheet(ProcessSession session, FlowFile originalParentFF, final InputStream sheetInputStream, ExcelSheetReadConfig readConfig, CSVFormat csvFormat) throws IOException {
FlowFile ff = session.create(originalParentFF);
try {
final DataFormatter formatter = new DataFormatter();
final InputSource sheetSource = new InputSource(sheetInputStream);
final SheetToCSV sheetHandler = new SheetToCSV(readConfig, csvFormat);
final XMLReader parser = SAXHelper.newXMLReader();
// If Value Formatting is set to false then don't pass in the styles table.
// This will cause the XSSF Handler to return the raw value instead of the formatted one.
final StylesTable sst = readConfig.getFormatValues() ? readConfig.getStyles() : null;
final XSSFSheetXMLHandler handler = new XSSFSheetXMLHandler(sst, null, readConfig.getSharedStringsTable(), sheetHandler, formatter, false);
parser.setContentHandler(handler);
ff = session.write(ff, new OutputStreamCallback() {
@Override
public void process(OutputStream out) throws IOException {
PrintStream outPrint = new PrintStream(out);
sheetHandler.setOutput(outPrint);
try {
parser.parse(sheetSource);
sheetInputStream.close();
sheetHandler.close();
outPrint.close();
} catch (SAXException se) {
getLogger().error("Error occurred while processing Excel sheet {}", new Object[] { readConfig.getSheetName() }, se);
}
}
});
ff = session.putAttribute(ff, SHEET_NAME, readConfig.getSheetName());
ff = session.putAttribute(ff, ROW_NUM, new Long(sheetHandler.getRowCount()).toString());
if (StringUtils.isNotEmpty(originalParentFF.getAttribute(CoreAttributes.FILENAME.key()))) {
ff = session.putAttribute(ff, SOURCE_FILE_NAME, originalParentFF.getAttribute(CoreAttributes.FILENAME.key()));
} else {
ff = session.putAttribute(ff, SOURCE_FILE_NAME, UNKNOWN_SHEET_NAME);
}
// Update the CoreAttributes.FILENAME to have the .csv extension now. Also update MIME.TYPE
ff = session.putAttribute(ff, CoreAttributes.FILENAME.key(), updateFilenameToCSVExtension(ff.getAttribute(CoreAttributes.UUID.key()), ff.getAttribute(CoreAttributes.FILENAME.key()), readConfig.getSheetName()));
ff = session.putAttribute(ff, CoreAttributes.MIME_TYPE.key(), CSV_MIME_TYPE);
session.transfer(ff, SUCCESS);
} catch (SAXException | ParserConfigurationException saxE) {
getLogger().error("Failed to create instance of Parser.", saxE);
ff = session.putAttribute(ff, ConvertExcelToCSVProcessor.class.getName() + ".error", saxE.getMessage());
session.transfer(ff, FAILURE);
} finally {
sheetInputStream.close();
}
}
Aggregations