Search in sources :

Example 1 with ReadOnlySharedStringsTable

use of org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable in project translationstudio8 by heartsome.

the class Xlsx2TmxHelper method parseXlsxFileAndWriteTmxBody.

public void parseXlsxFileAndWriteTmxBody(String fileName, AbstractWriter tmxWriter, IProgressMonitor monitor) throws ParserConfigurationException, SAXException, IOException, OpenXML4JException {
    this.tmxWriter = tmxWriter;
    this.monitor = monitor;
    File file = new File(fileName);
    long length = file.length();
    monitor.beginTask("", countTotal(length));
    OPCPackage p = OPCPackage.open(fileName, PackageAccess.READ);
    ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(p);
    XSSFReader xssfReader = new XSSFReader(p);
    XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) xssfReader.getSheetsData();
    try {
        while (iter.hasNext()) {
            InputStream stream = iter.next();
            parse(stream, strings, tmxWriter);
            stream.close();
            // 目前只处理第一个sheet
            break;
        }
    } finally {
        p.close();
    }
    monitor.done();
}
Also used : ReadOnlySharedStringsTable(org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable) InputStream(java.io.InputStream) File(java.io.File) OPCPackage(org.apache.poi.openxml4j.opc.OPCPackage) XSSFReader(org.apache.poi.xssf.eventusermodel.XSSFReader)

Example 2 with ReadOnlySharedStringsTable

use of org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable in project poi by apache.

the class HybridStreaming method main.

public static void main(String[] args) throws IOException, SAXException {
    InputStream sourceBytes = new FileInputStream("workbook.xlsx");
    XSSFWorkbook workbook = new XSSFWorkbook(sourceBytes) {

        /** Avoid DOM parse of large sheet */
        @Override
        public void parseSheet(java.util.Map<String, XSSFSheet> shIdMap, CTSheet ctSheet) {
            if (!SHEET_TO_STREAM.equals(ctSheet.getName())) {
                super.parseSheet(shIdMap, ctSheet);
            }
        }
    };
    // Having avoided a DOM-based parse of the sheet, we can stream it instead.
    ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(workbook.getPackage());
    new XSSFSheetXMLHandler(workbook.getStylesSource(), strings, createSheetContentsHandler(), false);
    workbook.close();
    sourceBytes.close();
}
Also used : ReadOnlySharedStringsTable(org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable) CTSheet(org.openxmlformats.schemas.spreadsheetml.x2006.main.CTSheet) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) XSSFWorkbook(org.apache.poi.xssf.usermodel.XSSFWorkbook) FileInputStream(java.io.FileInputStream) XSSFSheetXMLHandler(org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler)

Example 3 with ReadOnlySharedStringsTable

use of org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable in project tika by apache.

the class XSSFExcelExtractorDecorator method buildXHTML.

/**
     * @see org.apache.poi.xssf.extractor.XSSFExcelExtractor#getText()
     */
@Override
protected void buildXHTML(XHTMLContentHandler xhtml) throws SAXException, XmlException, IOException {
    OPCPackage container = extractor.getPackage();
    ReadOnlySharedStringsTable strings;
    XSSFReader.SheetIterator iter;
    XSSFReader xssfReader;
    StylesTable styles;
    try {
        xssfReader = new XSSFReader(container);
        styles = xssfReader.getStylesTable();
        iter = (XSSFReader.SheetIterator) xssfReader.getSheetsData();
        strings = new ReadOnlySharedStringsTable(container);
    } catch (InvalidFormatException e) {
        throw new XmlException(e);
    } catch (OpenXML4JException oe) {
        throw new XmlException(oe);
    }
    //temporary workaround for POI-61034
    //remove once POI 3.17-beta1 is released
    Set<String> seen = new HashSet<>();
    while (iter.hasNext()) {
        SheetTextAsHTML sheetExtractor = new SheetTextAsHTML(xhtml);
        PackagePart sheetPart = null;
        try (InputStream stream = iter.next()) {
            sheetPart = iter.getSheetPart();
            final String partName = sheetPart.getPartName().toString();
            if (seen.contains(partName)) {
                continue;
            }
            seen.add(partName);
            addDrawingHyperLinks(sheetPart);
            sheetParts.add(sheetPart);
            CommentsTable comments = iter.getSheetComments();
            // Start, and output the sheet name
            xhtml.startElement("div");
            xhtml.element("h1", iter.getSheetName());
            // Extract the main sheet contents
            xhtml.startElement("table");
            xhtml.startElement("tbody");
            processSheet(sheetExtractor, comments, styles, strings, stream);
        }
        xhtml.endElement("tbody");
        xhtml.endElement("table");
        //  do the headers before the contents)
        for (String header : sheetExtractor.headers) {
            extractHeaderFooter(header, xhtml);
        }
        for (String footer : sheetExtractor.footers) {
            extractHeaderFooter(footer, xhtml);
        }
        // Do text held in shapes, if required
        if (config.getIncludeShapeBasedContent()) {
            List<XSSFShape> shapes = iter.getShapes();
            processShapes(shapes, xhtml);
        }
        //for now dump sheet hyperlinks at bottom of page
        //consider a double-pass of the inputstream to reunite hyperlinks with cells/textboxes
        //step 1: extract hyperlink info from bottom of page
        //step 2: process as we do now, but with cached hyperlink relationship info
        extractHyperLinks(sheetPart, xhtml);
        // All done with this sheet
        xhtml.endElement("div");
    }
}
Also used : ReadOnlySharedStringsTable(org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable) InputStream(java.io.InputStream) StylesTable(org.apache.poi.xssf.model.StylesTable) PackagePart(org.apache.poi.openxml4j.opc.PackagePart) InvalidFormatException(org.apache.poi.openxml4j.exceptions.InvalidFormatException) CommentsTable(org.apache.poi.xssf.model.CommentsTable) XSSFShape(org.apache.poi.xssf.usermodel.XSSFShape) OpenXML4JException(org.apache.poi.openxml4j.exceptions.OpenXML4JException) XmlException(org.apache.xmlbeans.XmlException) OPCPackage(org.apache.poi.openxml4j.opc.OPCPackage) XSSFReader(org.apache.poi.xssf.eventusermodel.XSSFReader) HashSet(java.util.HashSet)

Example 4 with ReadOnlySharedStringsTable

use of org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable in project translationstudio8 by heartsome.

the class XlsxRowReader method readRows.

public void readRows(IProgressMonitor monitor) throws ParserConfigurationException, SAXException, IOException, OpenXML4JException {
    monitor.beginTask("", 10);
    monitor.worked(1);
    OPCPackage p = OPCPackage.open(xlsxFile, PackageAccess.READ);
    ReadOnlySharedStringsTable shareString = new ReadOnlySharedStringsTable(p);
    XSSFReader xssfReader = new XSSFReader(p);
    XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) xssfReader.getSheetsData();
    try {
        while (iter.hasNext()) {
            InputStream stream = iter.next();
            readCells(stream, shareString, new SubProgressMonitor(monitor, 9));
            stream.close();
            // 目前只处理第一个sheet
            break;
        }
    } finally {
        p.close();
        monitor.done();
    }
}
Also used : ReadOnlySharedStringsTable(org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable) InputStream(java.io.InputStream) OPCPackage(org.apache.poi.openxml4j.opc.OPCPackage) SubProgressMonitor(org.eclipse.core.runtime.SubProgressMonitor) XSSFReader(org.apache.poi.xssf.eventusermodel.XSSFReader)

Example 5 with ReadOnlySharedStringsTable

use of org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable in project tdi-studio-se by Talend.

the class ExcelReader method call.

public Object call() throws Exception {
    OPCPackage pkg = null;
    try {
        if (fileURL != null) {
            pkg = OPCPackage.open(fileURL);
        } else {
            pkg = PackageHelper.open(is);
        }
        XSSFReader r = new XSSFReader(pkg);
        StylesTable styles = r.getStylesTable();
        ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(pkg);
        sheetContentsHandler = new DefaultTalendSheetContentsHandler(cache);
        DataFormatter formatter = new DataFormatter();
        boolean formulasNotResults = false;
        XMLReader parser = XMLReaderFactory.createXMLReader();
        ContentHandler handler = new TalendXSSFSheetXMLHandler(styles, strings, sheetContentsHandler, formatter, formulasNotResults);
        parser.setContentHandler(handler);
        XSSFReader.SheetIterator sheets = (XSSFReader.SheetIterator) r.getSheetsData();
        //            List<InputStream> iss = new ArrayList<InputStream>();
        LinkedHashMap<String, InputStream> issmap = new LinkedHashMap<String, InputStream>();
        while (sheets.hasNext()) {
            InputStream sheet = sheets.next();
            String sheetName = sheets.getSheetName();
            boolean match = false;
            for (int i = 0; i < sheetNames.size(); i++) {
                if ((asRegexs.get(i) && sheetName.matches(sheetNames.get(i))) || (!asRegexs.get(i) && sheetName.equals(sheetNames.get(i)))) {
                    match = true;
                    //                        iss.add(sheet);
                    issmap.put(sheetName, sheet);
                    break;
                }
            }
            if (!match) {
                sheet.close();
            }
        }
        if (issmap.size() < 1) {
            throw new RuntimeException("No match sheets");
        }
        for (InputStream is : issmap.values()) {
            try {
                InputSource sheetSource = new InputSource(is);
                sheetSource.setEncoding(charset);
                parser.parse(sheetSource);
            } finally {
                is.close();
            }
        }
    } finally {
        if (pkg != null) {
            pkg.revert();
        }
        cache.notifyErrorOccurred();
    }
    return null;
}
Also used : ReadOnlySharedStringsTable(org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable) InputSource(org.xml.sax.InputSource) InputStream(java.io.InputStream) StylesTable(org.apache.poi.xssf.model.StylesTable) ContentHandler(org.xml.sax.ContentHandler) LinkedHashMap(java.util.LinkedHashMap) OPCPackage(org.apache.poi.openxml4j.opc.OPCPackage) XMLReader(org.xml.sax.XMLReader) XSSFReader(org.apache.poi.xssf.eventusermodel.XSSFReader) DataFormatter(org.apache.poi.ss.usermodel.DataFormatter)

Aggregations

InputStream (java.io.InputStream)6 ReadOnlySharedStringsTable (org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable)6 XSSFReader (org.apache.poi.xssf.eventusermodel.XSSFReader)5 OPCPackage (org.apache.poi.openxml4j.opc.OPCPackage)4 StylesTable (org.apache.poi.xssf.model.StylesTable)3 OpenXML4JException (org.apache.poi.openxml4j.exceptions.OpenXML4JException)2 CommentsTable (org.apache.poi.xssf.model.CommentsTable)2 File (java.io.File)1 FileInputStream (java.io.FileInputStream)1 IOException (java.io.IOException)1 HashSet (java.util.HashSet)1 LinkedHashMap (java.util.LinkedHashMap)1 InvalidFormatException (org.apache.poi.openxml4j.exceptions.InvalidFormatException)1 PackagePart (org.apache.poi.openxml4j.opc.PackagePart)1 DataFormatter (org.apache.poi.ss.usermodel.DataFormatter)1 XSSFSheetXMLHandler (org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler)1 XSSFShape (org.apache.poi.xssf.usermodel.XSSFShape)1 XSSFWorkbook (org.apache.poi.xssf.usermodel.XSSFWorkbook)1 XmlException (org.apache.xmlbeans.XmlException)1 SubProgressMonitor (org.eclipse.core.runtime.SubProgressMonitor)1