Search in sources :

Example 6 with XSSFReader

use of org.apache.poi.xssf.eventusermodel.XSSFReader in project tika by apache.

the class XSSFExcelExtractorDecorator method buildXHTML.

/**
     * @see org.apache.poi.xssf.extractor.XSSFExcelExtractor#getText()
     */
@Override
protected void buildXHTML(XHTMLContentHandler xhtml) throws SAXException, XmlException, IOException {
    OPCPackage container = extractor.getPackage();
    ReadOnlySharedStringsTable strings;
    XSSFReader.SheetIterator iter;
    XSSFReader xssfReader;
    StylesTable styles;
    try {
        xssfReader = new XSSFReader(container);
        styles = xssfReader.getStylesTable();
        iter = (XSSFReader.SheetIterator) xssfReader.getSheetsData();
        strings = new ReadOnlySharedStringsTable(container);
    } catch (InvalidFormatException e) {
        throw new XmlException(e);
    } catch (OpenXML4JException oe) {
        throw new XmlException(oe);
    }
    //temporary workaround for POI-61034
    //remove once POI 3.17-beta1 is released
    Set<String> seen = new HashSet<>();
    while (iter.hasNext()) {
        SheetTextAsHTML sheetExtractor = new SheetTextAsHTML(xhtml);
        PackagePart sheetPart = null;
        try (InputStream stream = iter.next()) {
            sheetPart = iter.getSheetPart();
            final String partName = sheetPart.getPartName().toString();
            if (seen.contains(partName)) {
                continue;
            }
            seen.add(partName);
            addDrawingHyperLinks(sheetPart);
            sheetParts.add(sheetPart);
            CommentsTable comments = iter.getSheetComments();
            // Start, and output the sheet name
            xhtml.startElement("div");
            xhtml.element("h1", iter.getSheetName());
            // Extract the main sheet contents
            xhtml.startElement("table");
            xhtml.startElement("tbody");
            processSheet(sheetExtractor, comments, styles, strings, stream);
        }
        xhtml.endElement("tbody");
        xhtml.endElement("table");
        //  do the headers before the contents)
        for (String header : sheetExtractor.headers) {
            extractHeaderFooter(header, xhtml);
        }
        for (String footer : sheetExtractor.footers) {
            extractHeaderFooter(footer, xhtml);
        }
        // Do text held in shapes, if required
        if (config.getIncludeShapeBasedContent()) {
            List<XSSFShape> shapes = iter.getShapes();
            processShapes(shapes, xhtml);
        }
        //for now dump sheet hyperlinks at bottom of page
        //consider a double-pass of the inputstream to reunite hyperlinks with cells/textboxes
        //step 1: extract hyperlink info from bottom of page
        //step 2: process as we do now, but with cached hyperlink relationship info
        extractHyperLinks(sheetPart, xhtml);
        // All done with this sheet
        xhtml.endElement("div");
    }
}
Also used : ReadOnlySharedStringsTable(org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable) InputStream(java.io.InputStream) StylesTable(org.apache.poi.xssf.model.StylesTable) PackagePart(org.apache.poi.openxml4j.opc.PackagePart) InvalidFormatException(org.apache.poi.openxml4j.exceptions.InvalidFormatException) CommentsTable(org.apache.poi.xssf.model.CommentsTable) XSSFShape(org.apache.poi.xssf.usermodel.XSSFShape) OpenXML4JException(org.apache.poi.openxml4j.exceptions.OpenXML4JException) XmlException(org.apache.xmlbeans.XmlException) OPCPackage(org.apache.poi.openxml4j.opc.OPCPackage) XSSFReader(org.apache.poi.xssf.eventusermodel.XSSFReader) HashSet(java.util.HashSet)

Example 7 with XSSFReader

use of org.apache.poi.xssf.eventusermodel.XSSFReader in project translationstudio8 by heartsome.

the class XlsxRowReader method readRows.

public void readRows(IProgressMonitor monitor) throws ParserConfigurationException, SAXException, IOException, OpenXML4JException {
    monitor.beginTask("", 10);
    monitor.worked(1);
    OPCPackage p = OPCPackage.open(xlsxFile, PackageAccess.READ);
    ReadOnlySharedStringsTable shareString = new ReadOnlySharedStringsTable(p);
    XSSFReader xssfReader = new XSSFReader(p);
    XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) xssfReader.getSheetsData();
    try {
        while (iter.hasNext()) {
            InputStream stream = iter.next();
            readCells(stream, shareString, new SubProgressMonitor(monitor, 9));
            stream.close();
            // 目前只处理第一个sheet
            break;
        }
    } finally {
        p.close();
        monitor.done();
    }
}
Also used : ReadOnlySharedStringsTable(org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable) InputStream(java.io.InputStream) OPCPackage(org.apache.poi.openxml4j.opc.OPCPackage) SubProgressMonitor(org.eclipse.core.runtime.SubProgressMonitor) XSSFReader(org.apache.poi.xssf.eventusermodel.XSSFReader)

Example 8 with XSSFReader

use of org.apache.poi.xssf.eventusermodel.XSSFReader in project cubrid-manager by CUBRID.

the class XlsxRowNumberHandler method process.

/**
	 * the thread method
	 */
public void process() {
    numberAllRow = 0;
    firstRowLst.clear();
    InputStream stream = null;
    itemsNumberOfSheets = new ArrayList<Integer>();
    try {
        stream = new BufferedInputStream(new FileInputStream(fileName));
        OPCPackage pkg = OPCPackage.open(stream);
        XSSFReader reader = new XSSFReader(pkg);
        sharedStringTable = reader.getSharedStringsTable();
        //$NON-NLS-1$
        XMLReader xmlReader = XMLReaderFactory.createXMLReader("org.apache.xerces.parsers.SAXParser");
        xmlReader.setContentHandler(this);
        Iterator<InputStream> sheets = reader.getSheetsData();
        int sheetNum = 0;
        while (sheets.hasNext()) {
            InputStream sheet = sheets.next();
            InputSource sheetSource = new InputSource(sheet);
            try {
                xmlReader.parse(sheetSource);
            } finally {
                try {
                    if (sheet != null) {
                        sheet.close();
                    }
                } catch (Exception e) {
                    LOGGER.error("", e);
                }
            }
            if (sheetNum == 0) {
                itemsNumberOfSheets.add(numberAllRow);
            } else {
                int numberBefore = 0;
                for (int i = 0; i < itemsNumberOfSheets.size(); i++) {
                    numberBefore += itemsNumberOfSheets.get(i);
                }
                int items = numberAllRow - numberBefore;
                itemsNumberOfSheets.add(items);
            }
            sheetNum++;
        }
    } catch (Exception ex) {
        throw new RuntimeException(ex);
    } finally {
        isEnd = true;
        if (stream != null) {
            try {
                stream.close();
            } catch (IOException ex) {
                LOGGER.error(ex.getMessage());
            }
        }
    }
}
Also used : InputSource(org.xml.sax.InputSource) BufferedInputStream(java.io.BufferedInputStream) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) IOException(java.io.IOException) FileInputStream(java.io.FileInputStream) IOException(java.io.IOException) SAXException(org.xml.sax.SAXException) BufferedInputStream(java.io.BufferedInputStream) OPCPackage(org.apache.poi.openxml4j.opc.OPCPackage) XMLReader(org.xml.sax.XMLReader) XSSFReader(org.apache.poi.xssf.eventusermodel.XSSFReader)

Example 9 with XSSFReader

use of org.apache.poi.xssf.eventusermodel.XSSFReader in project tdi-studio-se by Talend.

the class ExcelReader method call.

public Object call() throws Exception {
    OPCPackage pkg = null;
    try {
        if (fileURL != null) {
            pkg = OPCPackage.open(fileURL);
        } else {
            pkg = PackageHelper.open(is);
        }
        XSSFReader r = new XSSFReader(pkg);
        StylesTable styles = r.getStylesTable();
        ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(pkg);
        sheetContentsHandler = new DefaultTalendSheetContentsHandler(cache);
        DataFormatter formatter = new DataFormatter();
        boolean formulasNotResults = false;
        XMLReader parser = XMLReaderFactory.createXMLReader();
        ContentHandler handler = new TalendXSSFSheetXMLHandler(styles, strings, sheetContentsHandler, formatter, formulasNotResults);
        parser.setContentHandler(handler);
        XSSFReader.SheetIterator sheets = (XSSFReader.SheetIterator) r.getSheetsData();
        //            List<InputStream> iss = new ArrayList<InputStream>();
        LinkedHashMap<String, InputStream> issmap = new LinkedHashMap<String, InputStream>();
        while (sheets.hasNext()) {
            InputStream sheet = sheets.next();
            String sheetName = sheets.getSheetName();
            boolean match = false;
            for (int i = 0; i < sheetNames.size(); i++) {
                if ((asRegexs.get(i) && sheetName.matches(sheetNames.get(i))) || (!asRegexs.get(i) && sheetName.equals(sheetNames.get(i)))) {
                    match = true;
                    //                        iss.add(sheet);
                    issmap.put(sheetName, sheet);
                    break;
                }
            }
            if (!match) {
                sheet.close();
            }
        }
        if (issmap.size() < 1) {
            throw new RuntimeException("No match sheets");
        }
        for (InputStream is : issmap.values()) {
            try {
                InputSource sheetSource = new InputSource(is);
                sheetSource.setEncoding(charset);
                parser.parse(sheetSource);
            } finally {
                is.close();
            }
        }
    } finally {
        if (pkg != null) {
            pkg.revert();
        }
        cache.notifyErrorOccurred();
    }
    return null;
}
Also used : ReadOnlySharedStringsTable(org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable) InputSource(org.xml.sax.InputSource) InputStream(java.io.InputStream) StylesTable(org.apache.poi.xssf.model.StylesTable) ContentHandler(org.xml.sax.ContentHandler) LinkedHashMap(java.util.LinkedHashMap) OPCPackage(org.apache.poi.openxml4j.opc.OPCPackage) XMLReader(org.xml.sax.XMLReader) XSSFReader(org.apache.poi.xssf.eventusermodel.XSSFReader) DataFormatter(org.apache.poi.ss.usermodel.DataFormatter)

Example 10 with XSSFReader

use of org.apache.poi.xssf.eventusermodel.XSSFReader in project poi by apache.

the class XSSFFileHandler method checkXSSFReader.

private void checkXSSFReader(OPCPackage p) throws IOException, OpenXML4JException {
    XSSFReader reader = new XSSFReader(p);
    // these can be null...
    InputStream sharedStringsData = reader.getSharedStringsData();
    if (sharedStringsData != null) {
        sharedStringsData.close();
    }
    reader.getSharedStringsTable();
    InputStream stylesData = reader.getStylesData();
    if (stylesData != null) {
        stylesData.close();
    }
    reader.getStylesTable();
    InputStream themesData = reader.getThemesData();
    if (themesData != null) {
        themesData.close();
    }
    assertNotNull(reader.getWorkbookData());
    Iterator<InputStream> sheetsData = reader.getSheetsData();
    while (sheetsData.hasNext()) {
        InputStream str = sheetsData.next();
        str.close();
    }
}
Also used : BufferedInputStream(java.io.BufferedInputStream) ByteArrayInputStream(java.io.ByteArrayInputStream) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) XSSFReader(org.apache.poi.xssf.eventusermodel.XSSFReader)

Aggregations

XSSFReader (org.apache.poi.xssf.eventusermodel.XSSFReader)11 InputStream (java.io.InputStream)10 OPCPackage (org.apache.poi.openxml4j.opc.OPCPackage)9 ReadOnlySharedStringsTable (org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable)5 InputSource (org.xml.sax.InputSource)4 XMLReader (org.xml.sax.XMLReader)4 BufferedInputStream (java.io.BufferedInputStream)3 FileInputStream (java.io.FileInputStream)3 StylesTable (org.apache.poi.xssf.model.StylesTable)3 IOException (java.io.IOException)2 OpenXML4JException (org.apache.poi.openxml4j.exceptions.OpenXML4JException)2 CommentsTable (org.apache.poi.xssf.model.CommentsTable)2 SharedStringsTable (org.apache.poi.xssf.model.SharedStringsTable)2 SAXException (org.xml.sax.SAXException)2 ByteArrayInputStream (java.io.ByteArrayInputStream)1 File (java.io.File)1 ArrayList (java.util.ArrayList)1 HashSet (java.util.HashSet)1 LinkedHashMap (java.util.LinkedHashMap)1 InvalidFormatException (org.apache.poi.openxml4j.exceptions.InvalidFormatException)1