use of org.talend.dataprep.schema.xls.streaming.StreamingSheet in project data-prep by Talend.
the class XlsSchemaParser method parseAllSheetsStream.
private List<Schema.SheetContent> parseAllSheetsStream(Request request) {
Workbook workbook = //
StreamingReader.builder().bufferSize(//
4096).rowCacheSize(//
1).open(request.getContent());
try {
List<Schema.SheetContent> schemas = new ArrayList<>();
int sheetNumber = 0;
for (Sheet sheet : workbook) {
List<ColumnMetadata> columnsMetadata = createMetadataFromFirstNonEmptyRowAndInitSheet(sheet);
int totalColumnsNumber = getTotalColumnsNumber((StreamingSheet) sheet);
/*
* Protecting the app against too large data sets => It would break mongo by submitting too large empty
* column metadata or saturate the memory during analysis.
*
* @see https://jira.talendforge.org/browse/TDP-3459
*/
if (totalColumnsNumber > maxNumberOfColumns) {
throw new TDPException(DataSetErrorCodes.DATASET_HAS_TOO_MANY_COLUMNS, ExceptionContext.build().put("number-of-columns", totalColumnsNumber).put("max-allowed", maxNumberOfColumns));
}
String sheetName = sheet.getSheetName();
Schema.SheetContent sheetContent = new Schema.SheetContent(StringUtils.isEmpty(sheetName) ? "sheet-" + sheetNumber : sheetName, columnsMetadata);
// if less columns found than the metadata we complete
completeWithEmptyColumnsMetadata(columnsMetadata, totalColumnsNumber);
schemas.add(sheetContent);
}
return schemas;
} finally {
try {
workbook.close();
} catch (IOException e) {
LOGGER.error("Unable to close excel file.", e);
}
}
}
Aggregations