use of org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable in project translationstudio8 by heartsome.
the class Xlsx2TmxHelper method parseXlsxFileAndWriteTmxBody.
public void parseXlsxFileAndWriteTmxBody(String fileName, AbstractWriter tmxWriter, IProgressMonitor monitor) throws ParserConfigurationException, SAXException, IOException, OpenXML4JException {
this.tmxWriter = tmxWriter;
this.monitor = monitor;
File file = new File(fileName);
long length = file.length();
monitor.beginTask("", countTotal(length));
OPCPackage p = OPCPackage.open(fileName, PackageAccess.READ);
ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(p);
XSSFReader xssfReader = new XSSFReader(p);
XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) xssfReader.getSheetsData();
try {
while (iter.hasNext()) {
InputStream stream = iter.next();
parse(stream, strings, tmxWriter);
stream.close();
// 目前只处理第一个sheet
break;
}
} finally {
p.close();
}
monitor.done();
}
use of org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable in project poi by apache.
the class HybridStreaming method main.
public static void main(String[] args) throws IOException, SAXException {
InputStream sourceBytes = new FileInputStream("workbook.xlsx");
XSSFWorkbook workbook = new XSSFWorkbook(sourceBytes) {
/** Avoid DOM parse of large sheet */
@Override
public void parseSheet(java.util.Map<String, XSSFSheet> shIdMap, CTSheet ctSheet) {
if (!SHEET_TO_STREAM.equals(ctSheet.getName())) {
super.parseSheet(shIdMap, ctSheet);
}
}
};
// Having avoided a DOM-based parse of the sheet, we can stream it instead.
ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(workbook.getPackage());
new XSSFSheetXMLHandler(workbook.getStylesSource(), strings, createSheetContentsHandler(), false);
workbook.close();
sourceBytes.close();
}
use of org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable in project tika by apache.
the class XSSFExcelExtractorDecorator method buildXHTML.
/**
* @see org.apache.poi.xssf.extractor.XSSFExcelExtractor#getText()
*/
@Override
protected void buildXHTML(XHTMLContentHandler xhtml) throws SAXException, XmlException, IOException {
OPCPackage container = extractor.getPackage();
ReadOnlySharedStringsTable strings;
XSSFReader.SheetIterator iter;
XSSFReader xssfReader;
StylesTable styles;
try {
xssfReader = new XSSFReader(container);
styles = xssfReader.getStylesTable();
iter = (XSSFReader.SheetIterator) xssfReader.getSheetsData();
strings = new ReadOnlySharedStringsTable(container);
} catch (InvalidFormatException e) {
throw new XmlException(e);
} catch (OpenXML4JException oe) {
throw new XmlException(oe);
}
//temporary workaround for POI-61034
//remove once POI 3.17-beta1 is released
Set<String> seen = new HashSet<>();
while (iter.hasNext()) {
SheetTextAsHTML sheetExtractor = new SheetTextAsHTML(xhtml);
PackagePart sheetPart = null;
try (InputStream stream = iter.next()) {
sheetPart = iter.getSheetPart();
final String partName = sheetPart.getPartName().toString();
if (seen.contains(partName)) {
continue;
}
seen.add(partName);
addDrawingHyperLinks(sheetPart);
sheetParts.add(sheetPart);
CommentsTable comments = iter.getSheetComments();
// Start, and output the sheet name
xhtml.startElement("div");
xhtml.element("h1", iter.getSheetName());
// Extract the main sheet contents
xhtml.startElement("table");
xhtml.startElement("tbody");
processSheet(sheetExtractor, comments, styles, strings, stream);
}
xhtml.endElement("tbody");
xhtml.endElement("table");
// do the headers before the contents)
for (String header : sheetExtractor.headers) {
extractHeaderFooter(header, xhtml);
}
for (String footer : sheetExtractor.footers) {
extractHeaderFooter(footer, xhtml);
}
// Do text held in shapes, if required
if (config.getIncludeShapeBasedContent()) {
List<XSSFShape> shapes = iter.getShapes();
processShapes(shapes, xhtml);
}
//for now dump sheet hyperlinks at bottom of page
//consider a double-pass of the inputstream to reunite hyperlinks with cells/textboxes
//step 1: extract hyperlink info from bottom of page
//step 2: process as we do now, but with cached hyperlink relationship info
extractHyperLinks(sheetPart, xhtml);
// All done with this sheet
xhtml.endElement("div");
}
}
use of org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable in project translationstudio8 by heartsome.
the class XlsxRowReader method readRows.
public void readRows(IProgressMonitor monitor) throws ParserConfigurationException, SAXException, IOException, OpenXML4JException {
monitor.beginTask("", 10);
monitor.worked(1);
OPCPackage p = OPCPackage.open(xlsxFile, PackageAccess.READ);
ReadOnlySharedStringsTable shareString = new ReadOnlySharedStringsTable(p);
XSSFReader xssfReader = new XSSFReader(p);
XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) xssfReader.getSheetsData();
try {
while (iter.hasNext()) {
InputStream stream = iter.next();
readCells(stream, shareString, new SubProgressMonitor(monitor, 9));
stream.close();
// 目前只处理第一个sheet
break;
}
} finally {
p.close();
monitor.done();
}
}
use of org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable in project tdi-studio-se by Talend.
the class ExcelReader method call.
public Object call() throws Exception {
OPCPackage pkg = null;
try {
if (fileURL != null) {
pkg = OPCPackage.open(fileURL);
} else {
pkg = PackageHelper.open(is);
}
XSSFReader r = new XSSFReader(pkg);
StylesTable styles = r.getStylesTable();
ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(pkg);
sheetContentsHandler = new DefaultTalendSheetContentsHandler(cache);
DataFormatter formatter = new DataFormatter();
boolean formulasNotResults = false;
XMLReader parser = XMLReaderFactory.createXMLReader();
ContentHandler handler = new TalendXSSFSheetXMLHandler(styles, strings, sheetContentsHandler, formatter, formulasNotResults);
parser.setContentHandler(handler);
XSSFReader.SheetIterator sheets = (XSSFReader.SheetIterator) r.getSheetsData();
// List<InputStream> iss = new ArrayList<InputStream>();
LinkedHashMap<String, InputStream> issmap = new LinkedHashMap<String, InputStream>();
while (sheets.hasNext()) {
InputStream sheet = sheets.next();
String sheetName = sheets.getSheetName();
boolean match = false;
for (int i = 0; i < sheetNames.size(); i++) {
if ((asRegexs.get(i) && sheetName.matches(sheetNames.get(i))) || (!asRegexs.get(i) && sheetName.equals(sheetNames.get(i)))) {
match = true;
// iss.add(sheet);
issmap.put(sheetName, sheet);
break;
}
}
if (!match) {
sheet.close();
}
}
if (issmap.size() < 1) {
throw new RuntimeException("No match sheets");
}
for (InputStream is : issmap.values()) {
try {
InputSource sheetSource = new InputSource(is);
sheetSource.setEncoding(charset);
parser.parse(sheetSource);
} finally {
is.close();
}
}
} finally {
if (pkg != null) {
pkg.revert();
}
cache.notifyErrorOccurred();
}
return null;
}
Aggregations