use of org.apache.poi.openxml4j.opc.OPCPackage in project Gargoyle by callakrsos.
the class DocxFileParser method docxFileContentParser.
public String docxFileContentParser(String fileName) {
try {
FileInputStream fs = new FileInputStream(new File(fileName));
OPCPackage d = OPCPackage.open(fs);
if (fileName.endsWith(".docx")) {
XWPFWordExtractor xw = new XWPFWordExtractor(d);
return xw.getText();
} else if (fileName.endsWith(".pptx")) {
XSLFPowerPointExtractor xp = new XSLFPowerPointExtractor(d);
return xp.getText();
} else if (fileName.endsWith(".xlsx")) {
XSSFExcelExtractor xe = new XSSFExcelExtractor(d);
xe.setFormulasNotResults(true);
xe.setIncludeSheetNames(true);
return xe.getText();
}
} catch (Exception e) {
System.out.println("# DocxFileParser Error :" + e.getMessage());
}
return "";
}
use of org.apache.poi.openxml4j.opc.OPCPackage in project translationstudio8 by heartsome.
the class XlsxRowReader method readRows.
public void readRows(IProgressMonitor monitor) throws ParserConfigurationException, SAXException, IOException, OpenXML4JException {
monitor.beginTask("", 10);
monitor.worked(1);
OPCPackage p = OPCPackage.open(xlsxFile, PackageAccess.READ);
ReadOnlySharedStringsTable shareString = new ReadOnlySharedStringsTable(p);
XSSFReader xssfReader = new XSSFReader(p);
XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) xssfReader.getSheetsData();
try {
while (iter.hasNext()) {
InputStream stream = iter.next();
readCells(stream, shareString, new SubProgressMonitor(monitor, 9));
stream.close();
// 目前只处理第一个sheet
break;
}
} finally {
p.close();
monitor.done();
}
}
use of org.apache.poi.openxml4j.opc.OPCPackage in project cubrid-manager by CUBRID.
the class XlsxRowNumberHandler method process.
/**
* the thread method
*/
public void process() {
numberAllRow = 0;
firstRowLst.clear();
InputStream stream = null;
itemsNumberOfSheets = new ArrayList<Integer>();
try {
stream = new BufferedInputStream(new FileInputStream(fileName));
OPCPackage pkg = OPCPackage.open(stream);
XSSFReader reader = new XSSFReader(pkg);
sharedStringTable = reader.getSharedStringsTable();
//$NON-NLS-1$
XMLReader xmlReader = XMLReaderFactory.createXMLReader("org.apache.xerces.parsers.SAXParser");
xmlReader.setContentHandler(this);
Iterator<InputStream> sheets = reader.getSheetsData();
int sheetNum = 0;
while (sheets.hasNext()) {
InputStream sheet = sheets.next();
InputSource sheetSource = new InputSource(sheet);
try {
xmlReader.parse(sheetSource);
} finally {
try {
if (sheet != null) {
sheet.close();
}
} catch (Exception e) {
LOGGER.error("", e);
}
}
if (sheetNum == 0) {
itemsNumberOfSheets.add(numberAllRow);
} else {
int numberBefore = 0;
for (int i = 0; i < itemsNumberOfSheets.size(); i++) {
numberBefore += itemsNumberOfSheets.get(i);
}
int items = numberAllRow - numberBefore;
itemsNumberOfSheets.add(items);
}
sheetNum++;
}
} catch (Exception ex) {
throw new RuntimeException(ex);
} finally {
isEnd = true;
if (stream != null) {
try {
stream.close();
} catch (IOException ex) {
LOGGER.error(ex.getMessage());
}
}
}
}
use of org.apache.poi.openxml4j.opc.OPCPackage in project tdi-studio-se by Talend.
the class ExcelReader method call.
public Object call() throws Exception {
OPCPackage pkg = null;
try {
if (fileURL != null) {
pkg = OPCPackage.open(fileURL);
} else {
pkg = PackageHelper.open(is);
}
XSSFReader r = new XSSFReader(pkg);
StylesTable styles = r.getStylesTable();
ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(pkg);
sheetContentsHandler = new DefaultTalendSheetContentsHandler(cache);
DataFormatter formatter = new DataFormatter();
boolean formulasNotResults = false;
XMLReader parser = XMLReaderFactory.createXMLReader();
ContentHandler handler = new TalendXSSFSheetXMLHandler(styles, strings, sheetContentsHandler, formatter, formulasNotResults);
parser.setContentHandler(handler);
XSSFReader.SheetIterator sheets = (XSSFReader.SheetIterator) r.getSheetsData();
// List<InputStream> iss = new ArrayList<InputStream>();
LinkedHashMap<String, InputStream> issmap = new LinkedHashMap<String, InputStream>();
while (sheets.hasNext()) {
InputStream sheet = sheets.next();
String sheetName = sheets.getSheetName();
boolean match = false;
for (int i = 0; i < sheetNames.size(); i++) {
if ((asRegexs.get(i) && sheetName.matches(sheetNames.get(i))) || (!asRegexs.get(i) && sheetName.equals(sheetNames.get(i)))) {
match = true;
// iss.add(sheet);
issmap.put(sheetName, sheet);
break;
}
}
if (!match) {
sheet.close();
}
}
if (issmap.size() < 1) {
throw new RuntimeException("No match sheets");
}
for (InputStream is : issmap.values()) {
try {
InputSource sheetSource = new InputSource(is);
sheetSource.setEncoding(charset);
parser.parse(sheetSource);
} finally {
is.close();
}
}
} finally {
if (pkg != null) {
pkg.revert();
}
cache.notifyErrorOccurred();
}
return null;
}
use of org.apache.poi.openxml4j.opc.OPCPackage in project poi by apache.
the class XSSFBFileHandler method handleFile.
@Override
public void handleFile(InputStream stream, String path) throws Exception {
ByteArrayOutputStream out = new ByteArrayOutputStream();
IOUtils.copy(stream, out);
final byte[] bytes = out.toByteArray();
OPCPackage opcPackage = OPCPackage.open(new ByteArrayInputStream(bytes));
try {
testOne(opcPackage);
} finally {
opcPackage.close();
}
testNotHandledByWorkbookException(OPCPackage.open(new ByteArrayInputStream(bytes)));
}
Aggregations