Search in sources :

Example 21 with XmlException

use of org.apache.xmlbeans.XmlException in project poi by apache.

the class TestZipPackage method testZipEntityExpansionSharedStringTableEvents.

@Test
public void testZipEntityExpansionSharedStringTableEvents() throws Exception {
    boolean before = ExtractorFactory.getThreadPrefersEventExtractors();
    ExtractorFactory.setThreadPrefersEventExtractors(true);
    try {
        POITextExtractor extractor = ExtractorFactory.createExtractor(HSSFTestDataSamples.getSampleFile("poc-shared-strings.xlsx"));
        try {
            assertNotNull(extractor);
            try {
                extractor.getText();
            } catch (IllegalStateException e) {
            // expected due to shared strings expansion
            }
        } finally {
            extractor.close();
        }
    } catch (XmlException e) {
        assertEntityLimitReached(e);
    } finally {
        ExtractorFactory.setThreadPrefersEventExtractors(before);
    }
}
Also used : POITextExtractor(org.apache.poi.POITextExtractor) XmlException(org.apache.xmlbeans.XmlException) Test(org.junit.Test)

Example 22 with XmlException

use of org.apache.xmlbeans.XmlException in project tika by apache.

the class XSSFBExcelExtractorDecorator method buildXHTML.

/**
     * @see org.apache.poi.xssf.extractor.XSSFBEventBasedExcelExtractor#getText()
     */
@Override
protected void buildXHTML(XHTMLContentHandler xhtml) throws SAXException, XmlException, IOException {
    OPCPackage container = extractor.getPackage();
    XSSFBSharedStringsTable strings;
    XSSFBReader.SheetIterator iter;
    XSSFBReader xssfReader;
    XSSFBStylesTable styles;
    try {
        xssfReader = new XSSFBReader(container);
        styles = xssfReader.getXSSFBStylesTable();
        iter = (XSSFBReader.SheetIterator) xssfReader.getSheetsData();
        strings = new XSSFBSharedStringsTable(container);
    } catch (InvalidFormatException e) {
        throw new XmlException(e);
    } catch (OpenXML4JException oe) {
        throw new XmlException(oe);
    }
    while (iter.hasNext()) {
        InputStream stream = iter.next();
        PackagePart sheetPart = iter.getSheetPart();
        addDrawingHyperLinks(sheetPart);
        sheetParts.add(sheetPart);
        SheetTextAsHTML sheetExtractor = new SheetTextAsHTML(xhtml);
        XSSFBCommentsTable comments = iter.getXSSFBSheetComments();
        // Start, and output the sheet name
        xhtml.startElement("div");
        xhtml.element("h1", iter.getSheetName());
        // Extract the main sheet contents
        xhtml.startElement("table");
        xhtml.startElement("tbody");
        processSheet(sheetExtractor, comments, styles, strings, stream);
        xhtml.endElement("tbody");
        xhtml.endElement("table");
        //  do the headers before the contents)
        for (String header : sheetExtractor.headers) {
            extractHeaderFooter(header, xhtml);
        }
        for (String footer : sheetExtractor.footers) {
            extractHeaderFooter(footer, xhtml);
        }
        List<XSSFShape> shapes = iter.getShapes();
        processShapes(shapes, xhtml);
        //for now dump sheet hyperlinks at bottom of page
        //consider a double-pass of the inputstream to reunite hyperlinks with cells/textboxes
        //step 1: extract hyperlink info from bottom of page
        //step 2: process as we do now, but with cached hyperlink relationship info
        extractHyperLinks(sheetPart, xhtml);
        // All done with this sheet
        xhtml.endElement("div");
    }
}
Also used : XSSFBReader(org.apache.poi.xssf.eventusermodel.XSSFBReader) XSSFBCommentsTable(org.apache.poi.xssf.binary.XSSFBCommentsTable) InputStream(java.io.InputStream) PackagePart(org.apache.poi.openxml4j.opc.PackagePart) XSSFBStylesTable(org.apache.poi.xssf.binary.XSSFBStylesTable) InvalidFormatException(org.apache.poi.openxml4j.exceptions.InvalidFormatException) XSSFShape(org.apache.poi.xssf.usermodel.XSSFShape) OpenXML4JException(org.apache.poi.openxml4j.exceptions.OpenXML4JException) XmlException(org.apache.xmlbeans.XmlException) XSSFBSharedStringsTable(org.apache.poi.xssf.binary.XSSFBSharedStringsTable) OPCPackage(org.apache.poi.openxml4j.opc.OPCPackage)

Example 23 with XmlException

use of org.apache.xmlbeans.XmlException in project tika by apache.

the class XSSFExcelExtractorDecorator method buildXHTML.

/**
     * @see org.apache.poi.xssf.extractor.XSSFExcelExtractor#getText()
     */
@Override
protected void buildXHTML(XHTMLContentHandler xhtml) throws SAXException, XmlException, IOException {
    OPCPackage container = extractor.getPackage();
    ReadOnlySharedStringsTable strings;
    XSSFReader.SheetIterator iter;
    XSSFReader xssfReader;
    StylesTable styles;
    try {
        xssfReader = new XSSFReader(container);
        styles = xssfReader.getStylesTable();
        iter = (XSSFReader.SheetIterator) xssfReader.getSheetsData();
        strings = new ReadOnlySharedStringsTable(container);
    } catch (InvalidFormatException e) {
        throw new XmlException(e);
    } catch (OpenXML4JException oe) {
        throw new XmlException(oe);
    }
    //temporary workaround for POI-61034
    //remove once POI 3.17-beta1 is released
    Set<String> seen = new HashSet<>();
    while (iter.hasNext()) {
        SheetTextAsHTML sheetExtractor = new SheetTextAsHTML(xhtml);
        PackagePart sheetPart = null;
        try (InputStream stream = iter.next()) {
            sheetPart = iter.getSheetPart();
            final String partName = sheetPart.getPartName().toString();
            if (seen.contains(partName)) {
                continue;
            }
            seen.add(partName);
            addDrawingHyperLinks(sheetPart);
            sheetParts.add(sheetPart);
            CommentsTable comments = iter.getSheetComments();
            // Start, and output the sheet name
            xhtml.startElement("div");
            xhtml.element("h1", iter.getSheetName());
            // Extract the main sheet contents
            xhtml.startElement("table");
            xhtml.startElement("tbody");
            processSheet(sheetExtractor, comments, styles, strings, stream);
        }
        xhtml.endElement("tbody");
        xhtml.endElement("table");
        //  do the headers before the contents)
        for (String header : sheetExtractor.headers) {
            extractHeaderFooter(header, xhtml);
        }
        for (String footer : sheetExtractor.footers) {
            extractHeaderFooter(footer, xhtml);
        }
        // Do text held in shapes, if required
        if (config.getIncludeShapeBasedContent()) {
            List<XSSFShape> shapes = iter.getShapes();
            processShapes(shapes, xhtml);
        }
        //for now dump sheet hyperlinks at bottom of page
        //consider a double-pass of the inputstream to reunite hyperlinks with cells/textboxes
        //step 1: extract hyperlink info from bottom of page
        //step 2: process as we do now, but with cached hyperlink relationship info
        extractHyperLinks(sheetPart, xhtml);
        // All done with this sheet
        xhtml.endElement("div");
    }
}
Also used : ReadOnlySharedStringsTable(org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable) InputStream(java.io.InputStream) StylesTable(org.apache.poi.xssf.model.StylesTable) PackagePart(org.apache.poi.openxml4j.opc.PackagePart) InvalidFormatException(org.apache.poi.openxml4j.exceptions.InvalidFormatException) CommentsTable(org.apache.poi.xssf.model.CommentsTable) XSSFShape(org.apache.poi.xssf.usermodel.XSSFShape) OpenXML4JException(org.apache.poi.openxml4j.exceptions.OpenXML4JException) XmlException(org.apache.xmlbeans.XmlException) OPCPackage(org.apache.poi.openxml4j.opc.OPCPackage) XSSFReader(org.apache.poi.xssf.eventusermodel.XSSFReader) HashSet(java.util.HashSet)

Example 24 with XmlException

use of org.apache.xmlbeans.XmlException in project knime-core by knime.

the class XML2PMMLNodeModel method createColRearranger.

private ColumnRearranger createColRearranger(final DataTableSpec spec) throws InvalidSettingsException {
    if (m_xmlColumnName.getStringValue() == null) {
        guessDefaultXMLColumn(spec);
    }
    String xmlColName = m_xmlColumnName.getStringValue();
    String newColName = m_newColumnName.getStringValue();
    final int colIndex = spec.findColumnIndex(xmlColName);
    CheckUtils.checkSetting(colIndex >= 0, "Column: '%s' does not exist anymore.", xmlColName);
    final DataColumnSpec colSpec = spec.getColumnSpec(colIndex);
    CheckUtils.checkSetting(colSpec.getType().isCompatible(StringValue.class), "Selected column '%s' is not string/xml-compatible", xmlColName);
    DataColumnSpecCreator colSpecCreator;
    if (newColName != null && !m_replaceColumn.getBooleanValue()) {
        String newName = DataTableSpec.getUniqueColumnName(spec, newColName);
        colSpecCreator = new DataColumnSpecCreator(newName, PMMLCell.TYPE);
    } else {
        colSpecCreator = new DataColumnSpecCreator(colSpec);
        colSpecCreator.setType(PMMLCell.TYPE);
        colSpecCreator.removeAllHandlers();
        colSpecCreator.setDomain(null);
    }
    DataColumnSpec outColumnSpec = colSpecCreator.createSpec();
    ColumnRearranger rearranger = new ColumnRearranger(spec);
    CellFactory fac = new SingleCellFactory(outColumnSpec) {

        @Override
        public DataCell getCell(final DataRow row) {
            DataCell cell = row.getCell(colIndex);
            if (cell.isMissing()) {
                return DataType.getMissingCell();
            } else {
                PMMLDocument pmmlDoc = null;
                String failure = null;
                XmlObject xmlDoc;
                try (LockedSupplier<Document> supplier = ((XMLValue<Document>) cell).getDocumentSupplier()) {
                    xmlDoc = XmlObject.Factory.parse(supplier.get().cloneNode(true));
                    if (xmlDoc instanceof PMMLDocument) {
                        pmmlDoc = (PMMLDocument) xmlDoc;
                    } else if (PMMLUtils.isOldKNIMEPMML(xmlDoc) || PMMLUtils.is4_1PMML(xmlDoc)) {
                        String updatedPMML = PMMLUtils.getUpdatedVersionAndNamespace(xmlDoc);
                        /* Parse the modified document and assign it to a
                                 * PMMLDocument.*/
                        pmmlDoc = PMMLDocument.Factory.parse(updatedPMML);
                    } else {
                        failure = "No valid PMML v 3.x/4.0/4.1 document";
                    }
                } catch (XmlException e) {
                    if (!m_failOnInvalid.getBooleanValue()) {
                        LOGGER.error("Invalid PMML in row " + row.getKey() + ": " + e.getMessage(), e);
                    }
                    failure = e.getMessage();
                }
                if (failure != null) {
                    m_failCounter.incrementAndGet();
                    if (m_failOnInvalid.getBooleanValue()) {
                        throw new RuntimeException("Invalid PMML in row " + row.getKey() + ": " + failure);
                    } else {
                        return new MissingCell(failure);
                    }
                } else {
                    try {
                        return PMMLCellFactory.create(pmmlDoc.toString());
                    } catch (Exception e) {
                        return new MissingCell(e.getMessage());
                    }
                }
            }
        }
    };
    if (m_replaceColumn.getBooleanValue()) {
        rearranger.replace(fac, colIndex);
    } else {
        rearranger.append(fac);
    }
    return rearranger;
}
Also used : DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) SettingsModelString(org.knime.core.node.defaultnodesettings.SettingsModelString) PMMLDocument(org.dmg.pmml.PMMLDocument) Document(org.w3c.dom.Document) DataRow(org.knime.core.data.DataRow) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) CanceledExecutionException(org.knime.core.node.CanceledExecutionException) IOException(java.io.IOException) XmlException(org.apache.xmlbeans.XmlException) DataColumnSpec(org.knime.core.data.DataColumnSpec) ColumnRearranger(org.knime.core.data.container.ColumnRearranger) MissingCell(org.knime.core.data.MissingCell) XmlException(org.apache.xmlbeans.XmlException) DataCell(org.knime.core.data.DataCell) XmlObject(org.apache.xmlbeans.XmlObject) PMMLDocument(org.dmg.pmml.PMMLDocument) XMLValue(org.knime.core.data.xml.XMLValue) StringValue(org.knime.core.data.StringValue) PMMLCellFactory(org.knime.core.data.xml.PMMLCellFactory) SingleCellFactory(org.knime.core.data.container.SingleCellFactory) CellFactory(org.knime.core.data.container.CellFactory) SingleCellFactory(org.knime.core.data.container.SingleCellFactory)

Example 25 with XmlException

use of org.apache.xmlbeans.XmlException in project arctic-sea by 52North.

the class GetResultTemplateResponseEncoder method encodeResultStructure.

private void encodeResultStructure(GetResultTemplateResponse t, ObjectNode json) throws EncodingException {
    ObjectNode jrs = json.putObject(JSONConstants.RESULT_STRUCTURE);
    SweAbstractDataComponent structure;
    SosResultStructure rs = t.getResultStructure();
    if (rs.isDecoded()) {
        structure = t.getResultStructure().get().get();
    } else {
        try {
            XmlNamespaceDecoderKey key = new XmlNamespaceDecoderKey(SweConstants.NS_SWE_20, SweAbstractDataComponent.class);
            Decoder<SweAbstractDataComponent, XmlObject> decoder = this.decoderRepository.getDecoder(key);
            if (decoder == null) {
                throw new NoDecoderForKeyException(key);
            }
            structure = decoder.decode(XmlObject.Factory.parse(rs.getXml().get()));
        } catch (XmlException | DecodingException ex) {
            throw new EncodingException(ex);
        }
    }
    if (structure instanceof SweDataRecord) {
        encodeSweDataRecord(structure, jrs);
    } else {
        LOG.warn("Unsupported structure: {}", structure == null ? null : structure.getClass());
    }
}
Also used : NoDecoderForKeyException(org.n52.svalbard.decode.exception.NoDecoderForKeyException) XmlNamespaceDecoderKey(org.n52.svalbard.decode.XmlNamespaceDecoderKey) ObjectNode(com.fasterxml.jackson.databind.node.ObjectNode) EncodingException(org.n52.svalbard.encode.exception.EncodingException) SweDataRecord(org.n52.shetland.ogc.swe.SweDataRecord) XmlException(org.apache.xmlbeans.XmlException) SweAbstractDataComponent(org.n52.shetland.ogc.swe.SweAbstractDataComponent) XmlObject(org.apache.xmlbeans.XmlObject) DecodingException(org.n52.svalbard.decode.exception.DecodingException) SosResultStructure(org.n52.shetland.ogc.sos.SosResultStructure)

Aggregations

XmlException (org.apache.xmlbeans.XmlException)112 XmlObject (org.apache.xmlbeans.XmlObject)45 IOException (java.io.IOException)35 DecodingException (org.n52.svalbard.decode.exception.DecodingException)19 EncodingException (org.n52.svalbard.encode.exception.EncodingException)17 POIXMLException (org.apache.poi.POIXMLException)15 InputStream (java.io.InputStream)11 ArrayList (java.util.ArrayList)10 XmlCursor (org.apache.xmlbeans.XmlCursor)10 XmlOptions (org.apache.xmlbeans.XmlOptions)10 OpenXML4JException (org.apache.poi.openxml4j.exceptions.OpenXML4JException)8 POIXMLDocumentPart (org.apache.poi.POIXMLDocumentPart)7 AbstractFeature (org.n52.shetland.ogc.gml.AbstractFeature)7 Geometry (org.locationtech.jts.geom.Geometry)6 Document (org.w3c.dom.Document)6 Node (org.w3c.dom.Node)6 DataAccessException (com.centurylink.mdw.common.exception.DataAccessException)5 ByteArrayInputStream (java.io.ByteArrayInputStream)5 ByteArrayOutputStream (java.io.ByteArrayOutputStream)5 File (java.io.File)5