Search in sources :

Example 6 with OpenXML4JException

use of org.apache.poi.openxml4j.exceptions.OpenXML4JException in project tika by apache.

the class XSSFBExcelExtractorDecorator method buildXHTML.

/**
     * @see org.apache.poi.xssf.extractor.XSSFBEventBasedExcelExtractor#getText()
     */
@Override
protected void buildXHTML(XHTMLContentHandler xhtml) throws SAXException, XmlException, IOException {
    OPCPackage container = extractor.getPackage();
    XSSFBSharedStringsTable strings;
    XSSFBReader.SheetIterator iter;
    XSSFBReader xssfReader;
    XSSFBStylesTable styles;
    try {
        xssfReader = new XSSFBReader(container);
        styles = xssfReader.getXSSFBStylesTable();
        iter = (XSSFBReader.SheetIterator) xssfReader.getSheetsData();
        strings = new XSSFBSharedStringsTable(container);
    } catch (InvalidFormatException e) {
        throw new XmlException(e);
    } catch (OpenXML4JException oe) {
        throw new XmlException(oe);
    }
    while (iter.hasNext()) {
        InputStream stream = iter.next();
        PackagePart sheetPart = iter.getSheetPart();
        addDrawingHyperLinks(sheetPart);
        sheetParts.add(sheetPart);
        SheetTextAsHTML sheetExtractor = new SheetTextAsHTML(xhtml);
        XSSFBCommentsTable comments = iter.getXSSFBSheetComments();
        // Start, and output the sheet name
        xhtml.startElement("div");
        xhtml.element("h1", iter.getSheetName());
        // Extract the main sheet contents
        xhtml.startElement("table");
        xhtml.startElement("tbody");
        processSheet(sheetExtractor, comments, styles, strings, stream);
        xhtml.endElement("tbody");
        xhtml.endElement("table");
        //  do the headers before the contents)
        for (String header : sheetExtractor.headers) {
            extractHeaderFooter(header, xhtml);
        }
        for (String footer : sheetExtractor.footers) {
            extractHeaderFooter(footer, xhtml);
        }
        List<XSSFShape> shapes = iter.getShapes();
        processShapes(shapes, xhtml);
        //for now dump sheet hyperlinks at bottom of page
        //consider a double-pass of the inputstream to reunite hyperlinks with cells/textboxes
        //step 1: extract hyperlink info from bottom of page
        //step 2: process as we do now, but with cached hyperlink relationship info
        extractHyperLinks(sheetPart, xhtml);
        // All done with this sheet
        xhtml.endElement("div");
    }
}
Also used : XSSFBReader(org.apache.poi.xssf.eventusermodel.XSSFBReader) XSSFBCommentsTable(org.apache.poi.xssf.binary.XSSFBCommentsTable) InputStream(java.io.InputStream) PackagePart(org.apache.poi.openxml4j.opc.PackagePart) XSSFBStylesTable(org.apache.poi.xssf.binary.XSSFBStylesTable) InvalidFormatException(org.apache.poi.openxml4j.exceptions.InvalidFormatException) XSSFShape(org.apache.poi.xssf.usermodel.XSSFShape) OpenXML4JException(org.apache.poi.openxml4j.exceptions.OpenXML4JException) XmlException(org.apache.xmlbeans.XmlException) XSSFBSharedStringsTable(org.apache.poi.xssf.binary.XSSFBSharedStringsTable) OPCPackage(org.apache.poi.openxml4j.opc.OPCPackage)

Example 7 with OpenXML4JException

use of org.apache.poi.openxml4j.exceptions.OpenXML4JException in project tika by apache.

the class XSSFExcelExtractorDecorator method buildXHTML.

/**
     * @see org.apache.poi.xssf.extractor.XSSFExcelExtractor#getText()
     */
@Override
protected void buildXHTML(XHTMLContentHandler xhtml) throws SAXException, XmlException, IOException {
    OPCPackage container = extractor.getPackage();
    ReadOnlySharedStringsTable strings;
    XSSFReader.SheetIterator iter;
    XSSFReader xssfReader;
    StylesTable styles;
    try {
        xssfReader = new XSSFReader(container);
        styles = xssfReader.getStylesTable();
        iter = (XSSFReader.SheetIterator) xssfReader.getSheetsData();
        strings = new ReadOnlySharedStringsTable(container);
    } catch (InvalidFormatException e) {
        throw new XmlException(e);
    } catch (OpenXML4JException oe) {
        throw new XmlException(oe);
    }
    //temporary workaround for POI-61034
    //remove once POI 3.17-beta1 is released
    Set<String> seen = new HashSet<>();
    while (iter.hasNext()) {
        SheetTextAsHTML sheetExtractor = new SheetTextAsHTML(xhtml);
        PackagePart sheetPart = null;
        try (InputStream stream = iter.next()) {
            sheetPart = iter.getSheetPart();
            final String partName = sheetPart.getPartName().toString();
            if (seen.contains(partName)) {
                continue;
            }
            seen.add(partName);
            addDrawingHyperLinks(sheetPart);
            sheetParts.add(sheetPart);
            CommentsTable comments = iter.getSheetComments();
            // Start, and output the sheet name
            xhtml.startElement("div");
            xhtml.element("h1", iter.getSheetName());
            // Extract the main sheet contents
            xhtml.startElement("table");
            xhtml.startElement("tbody");
            processSheet(sheetExtractor, comments, styles, strings, stream);
        }
        xhtml.endElement("tbody");
        xhtml.endElement("table");
        //  do the headers before the contents)
        for (String header : sheetExtractor.headers) {
            extractHeaderFooter(header, xhtml);
        }
        for (String footer : sheetExtractor.footers) {
            extractHeaderFooter(footer, xhtml);
        }
        // Do text held in shapes, if required
        if (config.getIncludeShapeBasedContent()) {
            List<XSSFShape> shapes = iter.getShapes();
            processShapes(shapes, xhtml);
        }
        //for now dump sheet hyperlinks at bottom of page
        //consider a double-pass of the inputstream to reunite hyperlinks with cells/textboxes
        //step 1: extract hyperlink info from bottom of page
        //step 2: process as we do now, but with cached hyperlink relationship info
        extractHyperLinks(sheetPart, xhtml);
        // All done with this sheet
        xhtml.endElement("div");
    }
}
Also used : ReadOnlySharedStringsTable(org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable) InputStream(java.io.InputStream) StylesTable(org.apache.poi.xssf.model.StylesTable) PackagePart(org.apache.poi.openxml4j.opc.PackagePart) InvalidFormatException(org.apache.poi.openxml4j.exceptions.InvalidFormatException) CommentsTable(org.apache.poi.xssf.model.CommentsTable) XSSFShape(org.apache.poi.xssf.usermodel.XSSFShape) OpenXML4JException(org.apache.poi.openxml4j.exceptions.OpenXML4JException) XmlException(org.apache.xmlbeans.XmlException) OPCPackage(org.apache.poi.openxml4j.opc.OPCPackage) XSSFReader(org.apache.poi.xssf.eventusermodel.XSSFReader) HashSet(java.util.HashSet)

Example 8 with OpenXML4JException

use of org.apache.poi.openxml4j.exceptions.OpenXML4JException in project tika by apache.

the class XWPFEventBasedWordExtractor method loadNumbering.

private XWPFNumbering loadNumbering(PackagePart packagePart) {
    try {
        PackageRelationshipCollection numberingParts = packagePart.getRelationshipsByType(XWPFRelation.NUMBERING.getRelation());
        if (numberingParts.size() > 0) {
            PackageRelationship numberingRelationShip = numberingParts.getRelationship(0);
            if (numberingRelationShip == null) {
                return null;
            }
            PackagePart numberingPart = container.getPart(numberingRelationShip);
            if (numberingPart == null) {
                return null;
            }
            return new XWPFNumbering(numberingPart);
        }
    } catch (IOException | OpenXML4JException e) {
        LOG.warn("Couldn't load numbering", e);
    }
    return null;
}
Also used : PackageRelationship(org.apache.poi.openxml4j.opc.PackageRelationship) OpenXML4JException(org.apache.poi.openxml4j.exceptions.OpenXML4JException) XWPFNumbering(org.apache.poi.xwpf.usermodel.XWPFNumbering) PackageRelationshipCollection(org.apache.poi.openxml4j.opc.PackageRelationshipCollection) IOException(java.io.IOException) PackagePart(org.apache.poi.openxml4j.opc.PackagePart)

Example 9 with OpenXML4JException

use of org.apache.poi.openxml4j.exceptions.OpenXML4JException in project translationstudio8 by heartsome.

the class Xlsx2TbxConverter method doConvert.

@Override
public void doConvert(String targetFile, IProgressMonitor monitor) throws Exception {
    try {
        out = new FileOutputStream(new File(targetFile));
        XlsxRowReader reader = new XlsxRowReader(this.xlsxFile, 20, handler);
        try {
            reader.readRows(monitor);
        } catch (ParserConfigurationException e) {
            LOGGER.error("", e);
            throw new Exception(Messages.getString("converter.xlsx2tmx.parseExcle.error"));
        } catch (SAXException e) {
            LOGGER.error("", e);
            if ("LANG-CODE-ERORR".equals(e.getMessage())) {
                throw new Exception(Messages.getString("converter.common.vaild.langcode.error"));
            } else if ("DIFF--SRC-LANG-CODE".equals(e.getMessage())) {
                throw new Exception(Messages.getString("converter.common.appendtmx.diffsrcLang.error"));
            } else if ("EMPTY-LANG-CODE".equals(e.getMessage())) {
                throw new Exception(Messages.getString("converter.common.vaild.langcode.error"));
            } else if ("DUPLICATE-LANG-CODE-ERORR".equals(e.getMessage())) {
                throw new Exception(Messages.getString("converter.common.vaild.duplicatelangcode.error"));
            }
        } catch (IOException e) {
            LOGGER.error("", e);
            throw new Exception(Messages.getString("converter.xlsx2tmx.parseExcle.error"));
        } catch (OpenXML4JException e) {
            LOGGER.error("", e);
            throw new Exception(Messages.getString("converter.xlsx2tmx.parseExcle.error"));
        }
        String s = generateTbxEnd();
        if (s != null && s.length() != 0) {
            writeString(s);
        }
    } catch (FileNotFoundException e) {
        LOGGER.error("", e);
    } finally {
        if (out != null) {
            try {
                out.close();
            } catch (IOException e) {
            }
        }
    }
}
Also used : OpenXML4JException(org.apache.poi.openxml4j.exceptions.OpenXML4JException) FileOutputStream(java.io.FileOutputStream) XlsxRowReader(net.heartsome.cat.document.xlsx.XlsxRowReader) FileNotFoundException(java.io.FileNotFoundException) ParserConfigurationException(javax.xml.parsers.ParserConfigurationException) IOException(java.io.IOException) File(java.io.File) OpenXML4JException(org.apache.poi.openxml4j.exceptions.OpenXML4JException) IOException(java.io.IOException) FileNotFoundException(java.io.FileNotFoundException) ParserConfigurationException(javax.xml.parsers.ParserConfigurationException) SAXException(org.xml.sax.SAXException) UnsupportedEncodingException(java.io.UnsupportedEncodingException) SAXException(org.xml.sax.SAXException)

Example 10 with OpenXML4JException

use of org.apache.poi.openxml4j.exceptions.OpenXML4JException in project poi by apache.

the class ZipPackagePropertiesMarshaller method marshall.

@Override
public boolean marshall(PackagePart part, OutputStream out) throws OpenXML4JException {
    if (!(out instanceof ZipOutputStream)) {
        throw new IllegalArgumentException("ZipOutputStream expected!");
    }
    ZipOutputStream zos = (ZipOutputStream) out;
    // Saving the part in the zip file
    ZipEntry ctEntry = new ZipEntry(ZipHelper.getZipItemNameFromOPCName(part.getPartName().getURI().toString()));
    try {
        // Save in ZIP
        // Add entry in ZIP
        zos.putNextEntry(ctEntry);
        // Marshall the properties inside a XML
        super.marshall(part, out);
        // Document
        if (!StreamHelper.saveXmlInStream(xmlDoc, out)) {
            return false;
        }
        zos.closeEntry();
    } catch (IOException e) {
        throw new OpenXML4JException(e.getLocalizedMessage(), e);
    }
    return true;
}
Also used : OpenXML4JException(org.apache.poi.openxml4j.exceptions.OpenXML4JException) ZipOutputStream(java.util.zip.ZipOutputStream) ZipEntry(java.util.zip.ZipEntry) IOException(java.io.IOException)

Aggregations

OpenXML4JException (org.apache.poi.openxml4j.exceptions.OpenXML4JException)16 IOException (java.io.IOException)13 InputStream (java.io.InputStream)7 XmlException (org.apache.xmlbeans.XmlException)7 InvalidFormatException (org.apache.poi.openxml4j.exceptions.InvalidFormatException)5 PackagePart (org.apache.poi.openxml4j.opc.PackagePart)5 FileNotFoundException (java.io.FileNotFoundException)4 OPCPackage (org.apache.poi.openxml4j.opc.OPCPackage)4 SAXException (org.xml.sax.SAXException)4 ZipOutputStream (java.util.zip.ZipOutputStream)3 PackageRelationshipCollection (org.apache.poi.openxml4j.opc.PackageRelationshipCollection)3 ZipEntry (java.util.zip.ZipEntry)2 ParserConfigurationException (javax.xml.parsers.ParserConfigurationException)2 InvalidOperationException (org.apache.poi.openxml4j.exceptions.InvalidOperationException)2 PackageRelationship (org.apache.poi.openxml4j.opc.PackageRelationship)2 XSSFBCommentsTable (org.apache.poi.xssf.binary.XSSFBCommentsTable)2 XSSFBSharedStringsTable (org.apache.poi.xssf.binary.XSSFBSharedStringsTable)2 XSSFBStylesTable (org.apache.poi.xssf.binary.XSSFBStylesTable)2 XSSFBReader (org.apache.poi.xssf.eventusermodel.XSSFBReader)2 XSSFBEventBasedExcelExtractor (org.apache.poi.xssf.extractor.XSSFBEventBasedExcelExtractor)2