use of org.apache.poi.poifs.filesystem.POIFSFileSystem in project Gargoyle by callakrsos.
the class DocFileParser method DocFileContentParser.
public String DocFileContentParser(String fileName) {
POIFSFileSystem fs = null;
try {
fs = new POIFSFileSystem(new FileInputStream(fileName));
if (fileName.endsWith(".doc")) {
HWPFDocument doc = new HWPFDocument(fs);
WordExtractor we = new WordExtractor(doc);
return we.getText();
} else if (fileName.endsWith(".xls")) {
ExcelExtractor ex = new ExcelExtractor(fs);
ex.setFormulasNotResults(true);
ex.setIncludeSheetNames(true);
return ex.getText();
} else if (fileName.endsWith(".ppt")) {
PowerPointExtractor extractor = new PowerPointExtractor(fs);
return extractor.getText();
}
} catch (Exception e) {
LOGGER.debug("document file cant be indexed");
}
return "";
}
use of org.apache.poi.poifs.filesystem.POIFSFileSystem in project OpenClinica by OpenClinica.
the class SpreadsheetPreview method main.
public static void main(String[] args) throws IOException {
// Simple3.xls , Cancer_History5.xls , Can3.xls
POIFSFileSystem fs = new POIFSFileSystem(new FileInputStream(new File("/Users/bruceperry/work/OpenClinica-Cancer-Demo-Study/Cancer_History5.xls")));
HSSFWorkbook wb = new HSSFWorkbook(fs);
SpreadsheetPreview prev = new SpreadsheetPreview();
// createSectionsMap createItemsMap
Map map = prev.createItemsOrSectionMap(wb, "sections");
Map.Entry me;
Map.Entry me2;
for (Iterator iter = map.entrySet().iterator(); iter.hasNext(); ) {
me = (Map.Entry) iter.next();
Map mp = (Map) me.getValue();
// logger.info(me.getKey() + ": " + me.getValue());
}
}
use of org.apache.poi.poifs.filesystem.POIFSFileSystem in project OpenClinica by OpenClinica.
the class SpreadsheetPreviewNw method main.
public static void main(String[] args) throws IOException {
// Simple3.xls , Cancer_History5.xls , Can3.xls
POIFSFileSystem fs = new POIFSFileSystem(new FileInputStream(new File("d:/23TestComma2.xls")));
HSSFWorkbook wb = new HSSFWorkbook(fs);
SpreadsheetPreviewNw spnw = new SpreadsheetPreviewNw();
// createSectionsMap createItemsMap
Map map = spnw.createCrfMetaObject(wb);
// Map map2 = spnw.createItemsOrSectionMap(wb,"items");
Map.Entry me;
for (Iterator iter = map.entrySet().iterator(); iter.hasNext(); ) {
me = (Map.Entry) iter.next();
Map mp = (Map) me.getValue();
logger.debug(me.getKey() + ": " + me.getValue());
}
}
use of org.apache.poi.poifs.filesystem.POIFSFileSystem in project h2o-2 by h2oai.
the class XlsParser method streamParse.
@Override
public DataOut streamParse(final InputStream is, final DataOut dout) throws Exception {
_dout = dout;
_firstRow = true;
try {
_fs = new POIFSFileSystem(is);
MissingRecordAwareHSSFListener listener = new MissingRecordAwareHSSFListener(this);
_formatListener = new FormatTrackingHSSFListener(listener);
HSSFEventFactory factory = new HSSFEventFactory();
HSSFRequest request = new HSSFRequest();
request.addListenerForAllRecords(_formatListener);
factory.processWorkbookEvents(request, _fs);
} finally {
try {
is.close();
} catch (IOException e) {
}
}
return dout;
}
use of org.apache.poi.poifs.filesystem.POIFSFileSystem in project OpenRefine by OpenRefine.
the class ExcelImporter method parseOneFile.
@Override
public void parseOneFile(Project project, ProjectMetadata metadata, ImportingJob job, String fileSource, InputStream inputStream, int limit, JSONObject options, List<Exception> exceptions) {
Workbook wb = null;
if (!inputStream.markSupported()) {
inputStream = new PushbackInputStream(inputStream, 8);
}
try {
wb = POIXMLDocument.hasOOXMLHeader(inputStream) ? new XSSFWorkbook(inputStream) : new HSSFWorkbook(new POIFSFileSystem(inputStream));
} catch (IOException e) {
exceptions.add(new ImportException("Attempted to parse as an Excel file but failed. " + "Try to use Excel to re-save the file as a different Excel version or as TSV and upload again.", e));
return;
} catch (ArrayIndexOutOfBoundsException e) {
exceptions.add(new ImportException("Attempted to parse file as an Excel file but failed. " + "This is probably caused by a corrupt excel file, or due to the file having previously been created or saved by a non-Microsoft application. " + "Please try opening the file in Microsoft Excel and resaving it, then try re-uploading the file. " + "See https://issues.apache.org/bugzilla/show_bug.cgi?id=48261 for further details", e));
return;
} catch (IllegalArgumentException e) {
exceptions.add(new ImportException("Attempted to parse as an Excel file but failed. " + "Only Excel 97 and later formats are supported.", e));
return;
} catch (POIXMLException e) {
exceptions.add(new ImportException("Attempted to parse as an Excel file but failed. " + "Invalid XML.", e));
return;
}
int[] sheets = JSONUtilities.getIntArray(options, "sheets");
for (int sheetIndex : sheets) {
final Sheet sheet = wb.getSheetAt(sheetIndex);
final int lastRow = sheet.getLastRowNum();
TableDataReader dataReader = new TableDataReader() {
int nextRow = 0;
Map<String, Recon> reconMap = new HashMap<String, Recon>();
@Override
public List<Object> getNextRowOfCells() throws IOException {
if (nextRow > lastRow) {
return null;
}
List<Object> cells = new ArrayList<Object>();
org.apache.poi.ss.usermodel.Row row = sheet.getRow(nextRow++);
if (row != null) {
short lastCell = row.getLastCellNum();
for (short cellIndex = 0; cellIndex < lastCell; cellIndex++) {
Cell cell = null;
org.apache.poi.ss.usermodel.Cell sourceCell = row.getCell(cellIndex);
if (sourceCell != null) {
cell = extractCell(sourceCell, reconMap);
}
cells.add(cell);
}
}
return cells;
}
};
TabularImportingParserBase.readTable(project, metadata, job, dataReader, fileSource + "#" + sheet.getSheetName(), limit, options, exceptions);
}
}
Aggregations