Search in sources :

Example 61 with XmlException

use of org.apache.xmlbeans.XmlException in project poi by apache.

the class XWPFHeader method onDocumentRead.

/**
     * reads the document
     *
     * @throws IOException
     */
@Override
protected void onDocumentRead() throws IOException {
    super.onDocumentRead();
    HdrDocument hdrDocument = null;
    InputStream is = null;
    try {
        is = getPackagePart().getInputStream();
        hdrDocument = HdrDocument.Factory.parse(is, DEFAULT_XML_OPTIONS);
        headerFooter = hdrDocument.getHdr();
        // parse the document with cursor and add
        // the XmlObject to its lists
        XmlCursor cursor = headerFooter.newCursor();
        cursor.selectPath("./*");
        while (cursor.toNextSelection()) {
            XmlObject o = cursor.getObject();
            if (o instanceof CTP) {
                XWPFParagraph p = new XWPFParagraph((CTP) o, this);
                paragraphs.add(p);
                bodyElements.add(p);
            }
            if (o instanceof CTTbl) {
                XWPFTable t = new XWPFTable((CTTbl) o, this);
                tables.add(t);
                bodyElements.add(t);
            }
            if (o instanceof CTSdtBlock) {
                XWPFSDT c = new XWPFSDT((CTSdtBlock) o, this);
                bodyElements.add(c);
            }
        }
        cursor.dispose();
    } catch (XmlException e) {
        throw new POIXMLException(e);
    } finally {
        if (is != null) {
            is.close();
        }
    }
}
Also used : HdrDocument(org.openxmlformats.schemas.wordprocessingml.x2006.main.HdrDocument) InputStream(java.io.InputStream) XmlException(org.apache.xmlbeans.XmlException) XmlObject(org.apache.xmlbeans.XmlObject) CTTbl(org.openxmlformats.schemas.wordprocessingml.x2006.main.CTTbl) POIXMLException(org.apache.poi.POIXMLException) CTP(org.openxmlformats.schemas.wordprocessingml.x2006.main.CTP) XmlCursor(org.apache.xmlbeans.XmlCursor) CTSdtBlock(org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSdtBlock)

Example 62 with XmlException

use of org.apache.xmlbeans.XmlException in project tika by apache.

the class OOXMLExtractorFactory method parse.

public static void parse(InputStream stream, ContentHandler baseHandler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException {
    Locale locale = context.get(Locale.class, Locale.getDefault());
    ExtractorFactory.setThreadPrefersEventExtractors(true);
    try {
        OOXMLExtractor extractor;
        OPCPackage pkg;
        // Locate or Open the OPCPackage for the file
        TikaInputStream tis = TikaInputStream.cast(stream);
        if (tis != null && tis.getOpenContainer() instanceof OPCPackage) {
            pkg = (OPCPackage) tis.getOpenContainer();
        } else if (tis != null && tis.hasFile()) {
            pkg = OPCPackage.open(tis.getFile().getPath(), PackageAccess.READ);
            tis.setOpenContainer(pkg);
        } else {
            InputStream shield = new CloseShieldInputStream(stream);
            pkg = OPCPackage.open(shield);
        }
        // Get the type, and ensure it's one we handle
        MediaType type = ZipContainerDetector.detectOfficeOpenXML(pkg);
        if (type == null || OOXMLParser.UNSUPPORTED_OOXML_TYPES.contains(type)) {
            // Not a supported type, delegate to Empty Parser
            EmptyParser.INSTANCE.parse(stream, baseHandler, metadata, context);
            return;
        }
        metadata.set(Metadata.CONTENT_TYPE, type.toString());
        // Have the appropriate OOXML text extractor picked
        POIXMLTextExtractor poiExtractor = null;
        // This has already been set by OOXMLParser's call to configure()
        // We can rely on this being non-null.
        OfficeParserConfig config = context.get(OfficeParserConfig.class);
        if (config.getUseSAXDocxExtractor()) {
            poiExtractor = trySXWPF(pkg);
        }
        if (poiExtractor == null && config.getUseSAXPptxExtractor()) {
            poiExtractor = trySXSLF(pkg);
        }
        if (poiExtractor == null) {
            poiExtractor = ExtractorFactory.createExtractor(pkg);
        }
        POIXMLDocument document = poiExtractor.getDocument();
        if (poiExtractor instanceof XSSFBEventBasedExcelExtractor) {
            extractor = new XSSFBExcelExtractorDecorator(context, poiExtractor, locale);
        } else if (poiExtractor instanceof XSSFEventBasedExcelExtractor) {
            extractor = new XSSFExcelExtractorDecorator(context, poiExtractor, locale);
        } else if (poiExtractor instanceof XWPFEventBasedWordExtractor) {
            extractor = new SXWPFWordExtractorDecorator(metadata, context, (XWPFEventBasedWordExtractor) poiExtractor);
            metadata.add("X-Parsed-By", XWPFEventBasedWordExtractor.class.getCanonicalName());
        } else if (poiExtractor instanceof XSLFEventBasedPowerPointExtractor) {
            extractor = new SXSLFPowerPointExtractorDecorator(metadata, context, (XSLFEventBasedPowerPointExtractor) poiExtractor);
            metadata.add("X-Parsed-By", XSLFEventBasedPowerPointExtractor.class.getCanonicalName());
        } else if (document == null) {
            throw new TikaException("Expecting UserModel based POI OOXML extractor with a document, but none found. " + "The extractor returned was a " + poiExtractor);
        } else if (document instanceof XMLSlideShow) {
            extractor = new XSLFPowerPointExtractorDecorator(context, (org.apache.poi.xslf.extractor.XSLFPowerPointExtractor) poiExtractor);
        } else if (document instanceof XWPFDocument) {
            extractor = new XWPFWordExtractorDecorator(context, (XWPFWordExtractor) poiExtractor);
        } else {
            extractor = new POIXMLTextExtractorDecorator(context, poiExtractor);
        }
        // Get the bulk of the metadata first, so that it's accessible during
        //  parsing if desired by the client (see TIKA-1109)
        extractor.getMetadataExtractor().extract(metadata);
        // Extract the text, along with any in-document metadata
        extractor.getXHTML(baseHandler, metadata, context);
    } catch (IllegalArgumentException e) {
        if (e.getMessage() != null && e.getMessage().startsWith("No supported documents found")) {
            throw new TikaException("TIKA-418: RuntimeException while getting content" + " for thmx and xps file types", e);
        } else {
            throw new TikaException("Error creating OOXML extractor", e);
        }
    } catch (InvalidFormatException e) {
        throw new TikaException("Error creating OOXML extractor", e);
    } catch (OpenXML4JException e) {
        throw new TikaException("Error creating OOXML extractor", e);
    } catch (XmlException e) {
        throw new TikaException("Error creating OOXML extractor", e);
    }
}
Also used : Locale(java.util.Locale) TikaInputStream(org.apache.tika.io.TikaInputStream) XWPFEventBasedWordExtractor(org.apache.tika.parser.microsoft.ooxml.xwpf.XWPFEventBasedWordExtractor) InvalidFormatException(org.apache.poi.openxml4j.exceptions.InvalidFormatException) OpenXML4JException(org.apache.poi.openxml4j.exceptions.OpenXML4JException) XSSFEventBasedExcelExtractor(org.apache.poi.xssf.extractor.XSSFEventBasedExcelExtractor) OfficeParserConfig(org.apache.tika.parser.microsoft.OfficeParserConfig) MediaType(org.apache.tika.mime.MediaType) XWPFDocument(org.apache.poi.xwpf.usermodel.XWPFDocument) XSLFEventBasedPowerPointExtractor(org.apache.tika.parser.microsoft.ooxml.xslf.XSLFEventBasedPowerPointExtractor) TikaException(org.apache.tika.exception.TikaException) XSSFBEventBasedExcelExtractor(org.apache.poi.xssf.extractor.XSSFBEventBasedExcelExtractor) CloseShieldInputStream(org.apache.commons.io.input.CloseShieldInputStream) TikaInputStream(org.apache.tika.io.TikaInputStream) InputStream(java.io.InputStream) XWPFWordExtractor(org.apache.poi.xwpf.extractor.XWPFWordExtractor) POIXMLDocument(org.apache.poi.POIXMLDocument) POIXMLTextExtractor(org.apache.poi.POIXMLTextExtractor) XmlException(org.apache.xmlbeans.XmlException) XMLSlideShow(org.apache.poi.xslf.usermodel.XMLSlideShow) OPCPackage(org.apache.poi.openxml4j.opc.OPCPackage) CloseShieldInputStream(org.apache.commons.io.input.CloseShieldInputStream)

Example 63 with XmlException

use of org.apache.xmlbeans.XmlException in project tika by apache.

the class XSLFPowerPointExtractorDecorator method getMainDocumentParts.

/**
     * In PowerPoint files, slides have things embedded in them,
     * and slide drawings which have the images
     */
@Override
protected List<PackagePart> getMainDocumentParts() throws TikaException {
    List<PackagePart> parts = new ArrayList<>();
    XSLFSlideShow document = null;
    try {
        document = new XSLFSlideShow(extractor.getPackage());
    } catch (Exception e) {
        // Shouldn't happen
        throw new TikaException(e.getMessage());
    }
    CTSlideIdList ctSlideIdList = document.getSlideReferences();
    if (ctSlideIdList != null) {
        for (int i = 0; i < ctSlideIdList.sizeOfSldIdArray(); i++) {
            CTSlideIdListEntry ctSlide = ctSlideIdList.getSldIdArray(i);
            // Add the slide
            PackagePart slidePart;
            try {
                slidePart = document.getSlidePart(ctSlide);
            } catch (IOException e) {
                throw new TikaException("Broken OOXML file", e);
            } catch (XmlException xe) {
                throw new TikaException("Broken OOXML file", xe);
            }
            addSlideParts(slidePart, parts);
        }
    }
    //add full document to include macros
    parts.add(document.getPackagePart());
    for (String rel : new String[] { XSLFRelation.SLIDE_MASTER.getRelation(), HANDOUT_MASTER }) {
        try {
            PackageRelationshipCollection prc = document.getPackagePart().getRelationshipsByType(rel);
            for (int i = 0; i < prc.size(); i++) {
                PackagePart pp = document.getPackagePart().getRelatedPart(prc.getRelationship(i));
                if (pp != null) {
                    parts.add(pp);
                }
            }
        } catch (InvalidFormatException e) {
        //log
        }
    }
    return parts;
}
Also used : TikaException(org.apache.tika.exception.TikaException) PackageRelationshipCollection(org.apache.poi.openxml4j.opc.PackageRelationshipCollection) ArrayList(java.util.ArrayList) IOException(java.io.IOException) PackagePart(org.apache.poi.openxml4j.opc.PackagePart) CTSlideIdListEntry(org.openxmlformats.schemas.presentationml.x2006.main.CTSlideIdListEntry) InvalidFormatException(org.apache.poi.openxml4j.exceptions.InvalidFormatException) TikaException(org.apache.tika.exception.TikaException) InvalidFormatException(org.apache.poi.openxml4j.exceptions.InvalidFormatException) IOException(java.io.IOException) XmlException(org.apache.xmlbeans.XmlException) SAXException(org.xml.sax.SAXException) CTSlideIdList(org.openxmlformats.schemas.presentationml.x2006.main.CTSlideIdList) XmlException(org.apache.xmlbeans.XmlException)

Example 64 with XmlException

use of org.apache.xmlbeans.XmlException in project poi by apache.

the class XSSFCellStyle method cloneStyleFrom.

/**
     * Clones all the style information from another
     *  XSSFCellStyle, onto this one. This
     *  XSSFCellStyle will then have all the same
     *  properties as the source, but the two may
     *  be edited independently.
     * Any stylings on this XSSFCellStyle will be lost!
     *
     * The source XSSFCellStyle could be from another
     *  XSSFWorkbook if you like. This allows you to
     *  copy styles from one XSSFWorkbook to another.
     */
@Override
public void cloneStyleFrom(CellStyle source) {
    if (source instanceof XSSFCellStyle) {
        XSSFCellStyle src = (XSSFCellStyle) source;
        // Is it on our Workbook?
        if (src._stylesSource == _stylesSource) {
            // Nice and easy
            _cellXf.set(src.getCoreXf());
            _cellStyleXf.set(src.getStyleXf());
        } else {
            // Copy the style
            try {
                //  avoid orphaned nodes
                if (_cellXf.isSetAlignment())
                    _cellXf.unsetAlignment();
                if (_cellXf.isSetExtLst())
                    _cellXf.unsetExtLst();
                // Create a new Xf with the same contents
                _cellXf = CTXf.Factory.parse(src.getCoreXf().toString(), DEFAULT_XML_OPTIONS);
                // bug 56295: ensure that the fills is available and set correctly
                CTFill fill = CTFill.Factory.parse(src.getCTFill().toString(), DEFAULT_XML_OPTIONS);
                addFill(fill);
                // bug 58084: set borders correctly
                CTBorder border = CTBorder.Factory.parse(src.getCTBorder().toString(), DEFAULT_XML_OPTIONS);
                addBorder(border);
                // Swap it over
                _stylesSource.replaceCellXfAt(_cellXfId, _cellXf);
            } catch (XmlException e) {
                throw new POIXMLException(e);
            }
            // Copy the format
            String fmt = src.getDataFormatString();
            setDataFormat((new XSSFDataFormat(_stylesSource)).getFormat(fmt));
            // Copy the font
            try {
                CTFont ctFont = CTFont.Factory.parse(src.getFont().getCTFont().toString(), DEFAULT_XML_OPTIONS);
                XSSFFont font = new XSSFFont(ctFont);
                font.registerTo(_stylesSource);
                setFont(font);
            } catch (XmlException e) {
                throw new POIXMLException(e);
            }
        }
        // Clear out cached details
        _font = null;
        _cellAlignment = null;
    } else {
        throw new IllegalArgumentException("Can only clone from one XSSFCellStyle to another, not between HSSFCellStyle and XSSFCellStyle");
    }
}
Also used : CTFill(org.openxmlformats.schemas.spreadsheetml.x2006.main.CTFill) CTFont(org.openxmlformats.schemas.spreadsheetml.x2006.main.CTFont) CTBorder(org.openxmlformats.schemas.spreadsheetml.x2006.main.CTBorder) XmlException(org.apache.xmlbeans.XmlException) POIXMLException(org.apache.poi.POIXMLException)

Example 65 with XmlException

use of org.apache.xmlbeans.XmlException in project poi by apache.

the class CalculationChain method readFrom.

public void readFrom(InputStream is) throws IOException {
    try {
        CalcChainDocument doc = CalcChainDocument.Factory.parse(is, DEFAULT_XML_OPTIONS);
        chain = doc.getCalcChain();
    } catch (XmlException e) {
        throw new IOException(e.getLocalizedMessage());
    }
}
Also used : XmlException(org.apache.xmlbeans.XmlException) IOException(java.io.IOException) CalcChainDocument(org.openxmlformats.schemas.spreadsheetml.x2006.main.CalcChainDocument)

Aggregations

XmlException (org.apache.xmlbeans.XmlException)112 XmlObject (org.apache.xmlbeans.XmlObject)45 IOException (java.io.IOException)35 DecodingException (org.n52.svalbard.decode.exception.DecodingException)19 EncodingException (org.n52.svalbard.encode.exception.EncodingException)17 POIXMLException (org.apache.poi.POIXMLException)15 InputStream (java.io.InputStream)11 ArrayList (java.util.ArrayList)10 XmlCursor (org.apache.xmlbeans.XmlCursor)10 XmlOptions (org.apache.xmlbeans.XmlOptions)10 OpenXML4JException (org.apache.poi.openxml4j.exceptions.OpenXML4JException)8 POIXMLDocumentPart (org.apache.poi.POIXMLDocumentPart)7 AbstractFeature (org.n52.shetland.ogc.gml.AbstractFeature)7 Geometry (org.locationtech.jts.geom.Geometry)6 Document (org.w3c.dom.Document)6 Node (org.w3c.dom.Node)6 DataAccessException (com.centurylink.mdw.common.exception.DataAccessException)5 ByteArrayInputStream (java.io.ByteArrayInputStream)5 ByteArrayOutputStream (java.io.ByteArrayOutputStream)5 File (java.io.File)5