Search in sources :

Example 11 with PackageRelationshipCollection

use of org.apache.poi.openxml4j.opc.PackageRelationshipCollection in project tika by apache.

the class XWPFEventBasedWordExtractor method loadNumbering.

private XWPFNumbering loadNumbering(PackagePart packagePart) {
    try {
        PackageRelationshipCollection numberingParts = packagePart.getRelationshipsByType(XWPFRelation.NUMBERING.getRelation());
        if (numberingParts.size() > 0) {
            PackageRelationship numberingRelationShip = numberingParts.getRelationship(0);
            if (numberingRelationShip == null) {
                return null;
            }
            PackagePart numberingPart = container.getPart(numberingRelationShip);
            if (numberingPart == null) {
                return null;
            }
            return new XWPFNumbering(numberingPart);
        }
    } catch (IOException | OpenXML4JException e) {
        LOG.warn("Couldn't load numbering", e);
    }
    return null;
}
Also used : PackageRelationship(org.apache.poi.openxml4j.opc.PackageRelationship) OpenXML4JException(org.apache.poi.openxml4j.exceptions.OpenXML4JException) XWPFNumbering(org.apache.poi.xwpf.usermodel.XWPFNumbering) PackageRelationshipCollection(org.apache.poi.openxml4j.opc.PackageRelationshipCollection) IOException(java.io.IOException) PackagePart(org.apache.poi.openxml4j.opc.PackagePart)

Example 12 with PackageRelationshipCollection

use of org.apache.poi.openxml4j.opc.PackageRelationshipCollection in project tika by apache.

the class XWPFEventBasedWordExtractor method handleDocumentPart.

private void handleDocumentPart(PackagePart documentPart, StringBuilder sb) throws IOException, SAXException {
    //load the numbering/list manager and styles from the main document part
    XWPFNumbering numbering = loadNumbering(documentPart);
    XWPFListManager xwpfListManager = new XWPFListManager(numbering);
    //headers
    try {
        PackageRelationshipCollection headersPRC = documentPart.getRelationshipsByType(XWPFRelation.HEADER.getRelation());
        if (headersPRC != null) {
            for (int i = 0; i < headersPRC.size(); i++) {
                PackagePart header = documentPart.getRelatedPart(headersPRC.getRelationship(i));
                handlePart(header, xwpfListManager, sb);
            }
        }
    } catch (InvalidFormatException e) {
        LOG.warn("Invalid format", e);
    }
    //main document
    handlePart(documentPart, xwpfListManager, sb);
    //for now, just dump other components at end
    for (XWPFRelation rel : new XWPFRelation[] { XWPFRelation.FOOTNOTE, XWPFRelation.COMMENT, XWPFRelation.FOOTER, XWPFRelation.ENDNOTE }) {
        try {
            PackageRelationshipCollection prc = documentPart.getRelationshipsByType(rel.getRelation());
            if (prc != null) {
                for (int i = 0; i < prc.size(); i++) {
                    PackagePart packagePart = documentPart.getRelatedPart(prc.getRelationship(i));
                    handlePart(packagePart, xwpfListManager, sb);
                }
            }
        } catch (InvalidFormatException e) {
            LOG.warn("Invalid format", e);
        }
    }
}
Also used : XWPFRelation(org.apache.poi.xwpf.usermodel.XWPFRelation) XWPFNumbering(org.apache.poi.xwpf.usermodel.XWPFNumbering) PackageRelationshipCollection(org.apache.poi.openxml4j.opc.PackageRelationshipCollection) XWPFListManager(org.apache.tika.parser.microsoft.ooxml.XWPFListManager) PackagePart(org.apache.poi.openxml4j.opc.PackagePart) InvalidFormatException(org.apache.poi.openxml4j.exceptions.InvalidFormatException)

Example 13 with PackageRelationshipCollection

use of org.apache.poi.openxml4j.opc.PackageRelationshipCollection in project poi by apache.

the class SignatureInfo method writeDocument.

/**
     * Write XML signature into the OPC package
     *
     * @param document the xml signature document
     * @throws MarshalException
     */
protected void writeDocument(Document document) throws MarshalException {
    XmlOptions xo = new XmlOptions();
    Map<String, String> namespaceMap = new HashMap<String, String>();
    for (Map.Entry<String, String> entry : signatureConfig.getNamespacePrefixes().entrySet()) {
        namespaceMap.put(entry.getValue(), entry.getKey());
    }
    xo.setSaveSuggestedPrefixes(namespaceMap);
    xo.setUseDefaultNamespace();
    LOG.log(POILogger.DEBUG, "output signed Office OpenXML document");
    /*
         * Copy the original OOXML content to the signed OOXML package. During
         * copying some files need to changed.
         */
    OPCPackage pkg = signatureConfig.getOpcPackage();
    PackagePartName sigPartName, sigsPartName;
    try {
        // <Override PartName="/_xmlsignatures/sig1.xml" ContentType="application/vnd.openxmlformats-package.digital-signature-xmlsignature+xml"/>
        sigPartName = PackagingURIHelper.createPartName("/_xmlsignatures/sig1.xml");
        // <Default Extension="sigs" ContentType="application/vnd.openxmlformats-package.digital-signature-origin"/>
        sigsPartName = PackagingURIHelper.createPartName("/_xmlsignatures/origin.sigs");
    } catch (InvalidFormatException e) {
        throw new MarshalException(e);
    }
    PackagePart sigPart = pkg.getPart(sigPartName);
    if (sigPart == null) {
        sigPart = pkg.createPart(sigPartName, ContentTypes.DIGITAL_SIGNATURE_XML_SIGNATURE_PART);
    }
    try {
        OutputStream os = sigPart.getOutputStream();
        SignatureDocument sigDoc = SignatureDocument.Factory.parse(document, DEFAULT_XML_OPTIONS);
        sigDoc.save(os, xo);
        os.close();
    } catch (Exception e) {
        throw new MarshalException("Unable to write signature document", e);
    }
    PackagePart sigsPart = pkg.getPart(sigsPartName);
    if (sigsPart == null) {
        // touch empty marker file
        sigsPart = pkg.createPart(sigsPartName, ContentTypes.DIGITAL_SIGNATURE_ORIGIN_PART);
    }
    PackageRelationshipCollection relCol = pkg.getRelationshipsByType(PackageRelationshipTypes.DIGITAL_SIGNATURE_ORIGIN);
    for (PackageRelationship pr : relCol) {
        pkg.removeRelationship(pr.getId());
    }
    pkg.addRelationship(sigsPartName, TargetMode.INTERNAL, PackageRelationshipTypes.DIGITAL_SIGNATURE_ORIGIN);
    sigsPart.addRelationship(sigPartName, TargetMode.INTERNAL, PackageRelationshipTypes.DIGITAL_SIGNATURE);
}
Also used : PackagePartName(org.apache.poi.openxml4j.opc.PackagePartName) MarshalException(javax.xml.crypto.MarshalException) SignatureDocument(org.w3.x2000.x09.xmldsig.SignatureDocument) HashMap(java.util.HashMap) PackageRelationshipCollection(org.apache.poi.openxml4j.opc.PackageRelationshipCollection) XmlOptions(org.apache.xmlbeans.XmlOptions) ByteArrayOutputStream(java.io.ByteArrayOutputStream) OutputStream(java.io.OutputStream) PackagePart(org.apache.poi.openxml4j.opc.PackagePart) InvalidFormatException(org.apache.poi.openxml4j.exceptions.InvalidFormatException) XPathExpressionException(javax.xml.xpath.XPathExpressionException) GeneralSecurityException(java.security.GeneralSecurityException) InvalidFormatException(org.apache.poi.openxml4j.exceptions.InvalidFormatException) SAXException(org.xml.sax.SAXException) MarshalException(javax.xml.crypto.MarshalException) XMLSignatureException(javax.xml.crypto.dsig.XMLSignatureException) NoSuchElementException(java.util.NoSuchElementException) IOException(java.io.IOException) XmlException(org.apache.xmlbeans.XmlException) EncryptedDocumentException(org.apache.poi.EncryptedDocumentException) PackageRelationship(org.apache.poi.openxml4j.opc.PackageRelationship) Map(java.util.Map) HashMap(java.util.HashMap) OPCPackage(org.apache.poi.openxml4j.opc.OPCPackage)

Example 14 with PackageRelationshipCollection

use of org.apache.poi.openxml4j.opc.PackageRelationshipCollection in project poi by apache.

the class OOXMLSignatureFacet method addManifestReferences.

@SuppressWarnings("resource")
protected void addManifestReferences(List<Reference> manifestReferences) throws XMLSignatureException {
    OPCPackage ooxml = signatureConfig.getOpcPackage();
    List<PackagePart> relsEntryNames = ooxml.getPartsByContentType(ContentTypes.RELATIONSHIPS_PART);
    Set<String> digestedPartNames = new HashSet<String>();
    for (PackagePart pp : relsEntryNames) {
        String baseUri = pp.getPartName().getName().replaceFirst("(.*)/_rels/.*", "$1");
        PackageRelationshipCollection prc;
        try {
            prc = new PackageRelationshipCollection(ooxml);
            prc.parseRelationshipsPart(pp);
        } catch (InvalidFormatException e) {
            throw new XMLSignatureException("Invalid relationship descriptor: " + pp.getPartName().getName(), e);
        }
        RelationshipTransformParameterSpec parameterSpec = new RelationshipTransformParameterSpec();
        for (PackageRelationship relationship : prc) {
            String relationshipType = relationship.getRelationshipType();
            /*
                 * ECMA-376 Part 2 - 3rd edition
                 * 13.2.4.16 Manifest Element
                 * "The producer shall not create a Manifest element that references any data outside of the package."
                 */
            if (TargetMode.EXTERNAL == relationship.getTargetMode()) {
                continue;
            }
            if (!isSignedRelationship(relationshipType))
                continue;
            parameterSpec.addRelationshipReference(relationship.getId());
            // TODO: find a better way ...
            String partName = relationship.getTargetURI().toString();
            if (!partName.startsWith(baseUri)) {
                partName = baseUri + partName;
            }
            try {
                partName = new URI(partName).normalize().getPath().replace('\\', '/');
                LOG.log(POILogger.DEBUG, "part name: " + partName);
            } catch (URISyntaxException e) {
                throw new XMLSignatureException(e);
            }
            String contentType;
            try {
                PackagePartName relName = PackagingURIHelper.createPartName(partName);
                PackagePart pp2 = ooxml.getPart(relName);
                contentType = pp2.getContentType();
            } catch (InvalidFormatException e) {
                throw new XMLSignatureException(e);
            }
            if (relationshipType.endsWith("customXml") && !(contentType.equals("inkml+xml") || contentType.equals("text/xml"))) {
                LOG.log(POILogger.DEBUG, "skipping customXml with content type: " + contentType);
                continue;
            }
            if (!digestedPartNames.contains(partName)) {
                // We only digest a part once.
                String uri = partName + "?ContentType=" + contentType;
                Reference reference = newReference(uri, null, null, null, null);
                manifestReferences.add(reference);
                digestedPartNames.add(partName);
            }
        }
        if (parameterSpec.hasSourceIds()) {
            List<Transform> transforms = new ArrayList<Transform>();
            transforms.add(newTransform(RelationshipTransformService.TRANSFORM_URI, parameterSpec));
            transforms.add(newTransform(CanonicalizationMethod.INCLUSIVE));
            String uri = pp.getPartName().getName() + "?ContentType=application/vnd.openxmlformats-package.relationships+xml";
            Reference reference = newReference(uri, transforms, null, null, null);
            manifestReferences.add(reference);
        }
    }
}
Also used : PackagePartName(org.apache.poi.openxml4j.opc.PackagePartName) PackageRelationshipCollection(org.apache.poi.openxml4j.opc.PackageRelationshipCollection) Reference(javax.xml.crypto.dsig.Reference) ArrayList(java.util.ArrayList) URISyntaxException(java.net.URISyntaxException) PackagePart(org.apache.poi.openxml4j.opc.PackagePart) InvalidFormatException(org.apache.poi.openxml4j.exceptions.InvalidFormatException) URI(java.net.URI) PackageRelationship(org.apache.poi.openxml4j.opc.PackageRelationship) RelationshipTransformParameterSpec(org.apache.poi.poifs.crypt.dsig.services.RelationshipTransformService.RelationshipTransformParameterSpec) Transform(javax.xml.crypto.dsig.Transform) OPCPackage(org.apache.poi.openxml4j.opc.OPCPackage) XMLSignatureException(javax.xml.crypto.dsig.XMLSignatureException) HashSet(java.util.HashSet)

Example 15 with PackageRelationshipCollection

use of org.apache.poi.openxml4j.opc.PackageRelationshipCollection in project poi by apache.

the class ExtractorFactory method createExtractor.

/**
     * Tries to determine the actual type of file and produces a matching text-extractor for it.
     *
     * @param pkg An {@link OPCPackage}.
     * @return A {@link POIXMLTextExtractor} for the given file.
     * @throws IOException If an error occurs while reading the file 
     * @throws OpenXML4JException If an error parsing the OpenXML file format is found. 
     * @throws XmlException If an XML parsing error occurs.
     * @throws IllegalArgumentException If no matching file type could be found.
     */
public static POIXMLTextExtractor createExtractor(OPCPackage pkg) throws IOException, OpenXML4JException, XmlException {
    try {
        // Check for the normal Office core document
        PackageRelationshipCollection core;
        core = pkg.getRelationshipsByType(CORE_DOCUMENT_REL);
        // If nothing was found, try some of the other OOXML-based core types
        if (core.size() == 0) {
            // Could it be an OOXML-Strict one?
            core = pkg.getRelationshipsByType(STRICT_DOCUMENT_REL);
        }
        if (core.size() == 0) {
            // Could it be a visio one?
            core = pkg.getRelationshipsByType(VISIO_DOCUMENT_REL);
            if (core.size() == 1)
                return new XDGFVisioExtractor(pkg);
        }
        // Should just be a single core document, complain if not
        if (core.size() != 1) {
            throw new IllegalArgumentException("Invalid OOXML Package received - expected 1 core document, found " + core.size());
        }
        // Grab the core document part, and try to identify from that
        final PackagePart corePart = pkg.getPart(core.getRelationship(0));
        final String contentType = corePart.getContentType();
        // Is it XSSF?
        for (XSSFRelation rel : XSSFExcelExtractor.SUPPORTED_TYPES) {
            if (rel.getContentType().equals(contentType)) {
                if (getPreferEventExtractor()) {
                    return new XSSFEventBasedExcelExtractor(pkg);
                }
                return new XSSFExcelExtractor(pkg);
            }
        }
        // Is it XWPF?
        for (XWPFRelation rel : XWPFWordExtractor.SUPPORTED_TYPES) {
            if (rel.getContentType().equals(contentType)) {
                return new XWPFWordExtractor(pkg);
            }
        }
        // Is it XSLF?
        for (XSLFRelation rel : XSLFPowerPointExtractor.SUPPORTED_TYPES) {
            if (rel.getContentType().equals(contentType)) {
                return new XSLFPowerPointExtractor(pkg);
            }
        }
        // special handling for SlideShow-Theme-files, 
        if (XSLFRelation.THEME_MANAGER.getContentType().equals(contentType)) {
            return new XSLFPowerPointExtractor(new XSLFSlideShow(pkg));
        }
        // How about xlsb?
        for (XSSFRelation rel : XSSFBEventBasedExcelExtractor.SUPPORTED_TYPES) {
            if (rel.getContentType().equals(contentType)) {
                return new XSSFBEventBasedExcelExtractor(pkg);
            }
        }
        throw new IllegalArgumentException("No supported documents found in the OOXML package (found " + contentType + ")");
    } catch (IOException e) {
        // ensure that we close the package again if there is an error opening it, however
        // we need to revert the package to not re-write the file via close(), which is very likely not wanted for a TextExtractor!
        pkg.revert();
        throw e;
    } catch (OpenXML4JException e) {
        // ensure that we close the package again if there is an error opening it, however
        // we need to revert the package to not re-write the file via close(), which is very likely not wanted for a TextExtractor!
        pkg.revert();
        throw e;
    } catch (XmlException e) {
        // ensure that we close the package again if there is an error opening it, however
        // we need to revert the package to not re-write the file via close(), which is very likely not wanted for a TextExtractor!
        pkg.revert();
        throw e;
    } catch (RuntimeException e) {
        // ensure that we close the package again if there is an error opening it, however
        // we need to revert the package to not re-write the file via close(), which is very likely not wanted for a TextExtractor!
        pkg.revert();
        throw e;
    }
}
Also used : XSSFRelation(org.apache.poi.xssf.usermodel.XSSFRelation) XDGFVisioExtractor(org.apache.poi.xdgf.extractor.XDGFVisioExtractor) XSSFBEventBasedExcelExtractor(org.apache.poi.xssf.extractor.XSSFBEventBasedExcelExtractor) PackageRelationshipCollection(org.apache.poi.openxml4j.opc.PackageRelationshipCollection) XSSFExcelExtractor(org.apache.poi.xssf.extractor.XSSFExcelExtractor) XWPFWordExtractor(org.apache.poi.xwpf.extractor.XWPFWordExtractor) IOException(java.io.IOException) PackagePart(org.apache.poi.openxml4j.opc.PackagePart) XSLFSlideShow(org.apache.poi.xslf.usermodel.XSLFSlideShow) XWPFRelation(org.apache.poi.xwpf.usermodel.XWPFRelation) OpenXML4JException(org.apache.poi.openxml4j.exceptions.OpenXML4JException) XSSFEventBasedExcelExtractor(org.apache.poi.xssf.extractor.XSSFEventBasedExcelExtractor) XSLFPowerPointExtractor(org.apache.poi.xslf.extractor.XSLFPowerPointExtractor) XmlException(org.apache.xmlbeans.XmlException) XSLFRelation(org.apache.poi.xslf.usermodel.XSLFRelation)

Aggregations

PackageRelationshipCollection (org.apache.poi.openxml4j.opc.PackageRelationshipCollection)29 PackagePart (org.apache.poi.openxml4j.opc.PackagePart)23 InvalidFormatException (org.apache.poi.openxml4j.exceptions.InvalidFormatException)15 PackageRelationship (org.apache.poi.openxml4j.opc.PackageRelationship)14 IOException (java.io.IOException)8 XmlException (org.apache.xmlbeans.XmlException)5 ArrayList (java.util.ArrayList)4 HashMap (java.util.HashMap)4 OpenXML4JException (org.apache.poi.openxml4j.exceptions.OpenXML4JException)4 OPCPackage (org.apache.poi.openxml4j.opc.OPCPackage)4 PackagePartName (org.apache.poi.openxml4j.opc.PackagePartName)4 XWPFRelation (org.apache.poi.xwpf.usermodel.XWPFRelation)4 TikaException (org.apache.tika.exception.TikaException)4 SAXException (org.xml.sax.SAXException)4 ByteArrayOutputStream (java.io.ByteArrayOutputStream)2 File (java.io.File)2 OutputStream (java.io.OutputStream)2 URI (java.net.URI)2 ZipException (java.util.zip.ZipException)2 XMLSignatureException (javax.xml.crypto.dsig.XMLSignatureException)2