Search in sources :

Example 16 with PackageRelationshipCollection

use of org.apache.poi.openxml4j.opc.PackageRelationshipCollection in project poi by apache.

the class POIXMLDocumentPart method read.

/**
     * Iterate through the underlying PackagePart and create child POIXMLFactory instances
     * using the specified factory
     *
     * @param factory   the factory object that creates POIXMLFactory instances
     * @param context   context map containing already visited noted keyed by targetURI
     * 
     * @throws OpenXML4JException thrown when a related part can't be read
     */
protected void read(POIXMLFactory factory, Map<PackagePart, POIXMLDocumentPart> context) throws OpenXML4JException {
    PackagePart pp = getPackagePart();
    // add mapping a second time, in case of initial caller hasn't done so
    POIXMLDocumentPart otherChild = context.put(pp, this);
    if (otherChild != null && otherChild != this) {
        throw new POIXMLException("Unique PackagePart-POIXMLDocumentPart relation broken!");
    }
    if (!pp.hasRelationships())
        return;
    PackageRelationshipCollection rels = packagePart.getRelationships();
    List<POIXMLDocumentPart> readLater = new ArrayList<POIXMLDocumentPart>();
    // scan breadth-first, so parent-relations are hopefully the shallowest element
    for (PackageRelationship rel : rels) {
        if (rel.getTargetMode() == TargetMode.INTERNAL) {
            URI uri = rel.getTargetURI();
            // check for internal references (e.g. '#Sheet1!A1')
            PackagePartName relName;
            if (uri.getRawFragment() != null) {
                relName = PackagingURIHelper.createPartName(uri.getPath());
            } else {
                relName = PackagingURIHelper.createPartName(uri);
            }
            final PackagePart p = packagePart.getPackage().getPart(relName);
            if (p == null) {
                logger.log(POILogger.ERROR, "Skipped invalid entry " + rel.getTargetURI());
                continue;
            }
            POIXMLDocumentPart childPart = context.get(p);
            if (childPart == null) {
                childPart = factory.createDocumentPart(this, p);
                childPart.parent = this;
                // already add child to context, so other children can reference it
                context.put(p, childPart);
                readLater.add(childPart);
            }
            addRelation(rel, childPart);
        }
    }
    for (POIXMLDocumentPart childPart : readLater) {
        childPart.read(factory, context);
    }
}
Also used : PackageRelationship(org.apache.poi.openxml4j.opc.PackageRelationship) PackagePartName(org.apache.poi.openxml4j.opc.PackagePartName) PackageRelationshipCollection(org.apache.poi.openxml4j.opc.PackageRelationshipCollection) ArrayList(java.util.ArrayList) PackagePart(org.apache.poi.openxml4j.opc.PackagePart) URI(java.net.URI)

Example 17 with PackageRelationshipCollection

use of org.apache.poi.openxml4j.opc.PackageRelationshipCollection in project poi by apache.

the class POIXMLDocumentPart method rebase.

/**
     * When you open something like a theme, call this to
     *  re-base the XML Document onto the core child of the
     *  current core document
     * 
     * @param pkg the package to be rebased
     * 
     * @throws InvalidFormatException if there was an error in the core document relation 
     * @throws IllegalStateException if there are more than one core document relations
     */
protected final void rebase(OPCPackage pkg) throws InvalidFormatException {
    PackageRelationshipCollection cores = packagePart.getRelationshipsByType(coreDocumentRel);
    if (cores.size() != 1) {
        throw new IllegalStateException("Tried to rebase using " + coreDocumentRel + " but found " + cores.size() + " parts of the right type");
    }
    packagePart = packagePart.getRelatedPart(cores.getRelationship(0));
}
Also used : PackageRelationshipCollection(org.apache.poi.openxml4j.opc.PackageRelationshipCollection)

Example 18 with PackageRelationshipCollection

use of org.apache.poi.openxml4j.opc.PackageRelationshipCollection in project tika by apache.

the class AbstractOOXMLExtractor method loadLinkedRelationships.

/**
     * This is used by the SAX docx and pptx decorators to load hyperlinks and
     * other linked objects
     *
     * @param bodyPart
     * @return
     */
protected Map<String, String> loadLinkedRelationships(PackagePart bodyPart, boolean includeInternal, Metadata metadata) {
    Map<String, String> linkedRelationships = new HashMap<>();
    try {
        PackageRelationshipCollection prc = bodyPart.getRelationshipsByType(XWPFRelation.HYPERLINK.getRelation());
        for (int i = 0; i < prc.size(); i++) {
            PackageRelationship pr = prc.getRelationship(i);
            if (pr == null) {
                continue;
            }
            if (!includeInternal && TargetMode.INTERNAL.equals(pr.getTargetMode())) {
                continue;
            }
            String id = pr.getId();
            String url = (pr.getTargetURI() == null) ? null : pr.getTargetURI().toString();
            if (id != null && url != null) {
                linkedRelationships.put(id, url);
            }
        }
        for (String rel : EMBEDDED_RELATIONSHIPS) {
            prc = bodyPart.getRelationshipsByType(rel);
            for (int i = 0; i < prc.size(); i++) {
                PackageRelationship pr = prc.getRelationship(i);
                if (pr == null) {
                    continue;
                }
                String id = pr.getId();
                String uriString = (pr.getTargetURI() == null) ? null : pr.getTargetURI().toString();
                String fileName = uriString;
                if (pr.getTargetURI() != null) {
                    try {
                        fileName = FileHelper.getFilename(new File(fileName));
                    } catch (Exception e) {
                        fileName = uriString;
                    }
                }
                if (id != null) {
                    fileName = (fileName == null) ? "" : fileName;
                    linkedRelationships.put(id, fileName);
                }
            }
        }
    } catch (InvalidFormatException e) {
        EmbeddedDocumentUtil.recordEmbeddedStreamException(e, metadata);
    }
    return linkedRelationships;
}
Also used : PackageRelationship(org.apache.poi.openxml4j.opc.PackageRelationship) HashMap(java.util.HashMap) PackageRelationshipCollection(org.apache.poi.openxml4j.opc.PackageRelationshipCollection) File(java.io.File) InvalidFormatException(org.apache.poi.openxml4j.exceptions.InvalidFormatException) Ole10NativeException(org.apache.poi.poifs.filesystem.Ole10NativeException) TikaException(org.apache.tika.exception.TikaException) InvalidFormatException(org.apache.poi.openxml4j.exceptions.InvalidFormatException) IOException(java.io.IOException) FileNotFoundException(java.io.FileNotFoundException) XmlException(org.apache.xmlbeans.XmlException) SAXException(org.xml.sax.SAXException)

Example 19 with PackageRelationshipCollection

use of org.apache.poi.openxml4j.opc.PackageRelationshipCollection in project tika by apache.

the class OOXMLExtractorFactory method trySXWPF.

private static POIXMLTextExtractor trySXWPF(OPCPackage pkg) throws XmlException, OpenXML4JException, IOException {
    PackageRelationshipCollection packageRelationshipCollection = pkg.getRelationshipsByType("http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument");
    if (packageRelationshipCollection.size() == 0) {
        packageRelationshipCollection = pkg.getRelationshipsByType("http://purl.oclc.org/ooxml/officeDocument/relationships/officeDocument");
    }
    if (packageRelationshipCollection.size() == 0) {
        return null;
    }
    PackagePart corePart = pkg.getPart(packageRelationshipCollection.getRelationship(0));
    String targetContentType = corePart.getContentType();
    for (XWPFRelation relation : XWPFWordExtractor.SUPPORTED_TYPES) {
        if (targetContentType.equals(relation.getContentType())) {
            return new XWPFEventBasedWordExtractor(pkg);
        }
    }
    return null;
}
Also used : XWPFRelation(org.apache.poi.xwpf.usermodel.XWPFRelation) PackageRelationshipCollection(org.apache.poi.openxml4j.opc.PackageRelationshipCollection) PackagePart(org.apache.poi.openxml4j.opc.PackagePart) XWPFEventBasedWordExtractor(org.apache.tika.parser.microsoft.ooxml.xwpf.XWPFEventBasedWordExtractor)

Example 20 with PackageRelationshipCollection

use of org.apache.poi.openxml4j.opc.PackageRelationshipCollection in project tika by apache.

the class SXSLFPowerPointExtractorDecorator method buildXHTML.

/**
     * @see XSLFPowerPointExtractor#getText()
     */
protected void buildXHTML(XHTMLContentHandler xhtml) throws SAXException, IOException {
    loadCommentAuthors();
    PackageRelationshipCollection slidesPRC = null;
    try {
        slidesPRC = mainDocument.getRelationshipsByType(XSLFRelation.SLIDE.getRelation());
    } catch (InvalidFormatException e) {
        metadata.add(TikaCoreProperties.TIKA_META_EXCEPTION_WARNING, ExceptionUtils.getStackTrace(e));
    }
    if (slidesPRC != null && slidesPRC.size() > 0) {
        for (int i = 0; i < slidesPRC.size(); i++) {
            try {
                handleSlidePart(mainDocument.getRelatedPart(slidesPRC.getRelationship(i)), xhtml);
            } catch (InvalidFormatException | ZipException e) {
                metadata.add(TikaCoreProperties.TIKA_META_EXCEPTION_WARNING, ExceptionUtils.getStackTrace(e));
            }
        }
    }
    handleBasicRelatedParts(XSLFRelation.SLIDE_MASTER.getRelation(), "slide-master", mainDocument, new PlaceHolderSkipper(new OOXMLWordAndPowerPointTextHandler(new OOXMLTikaBodyPartHandler(xhtml), new HashMap<String, String>())));
    handleBasicRelatedParts(HANDOUT_MASTER, "slide-handout-master", mainDocument, new OOXMLWordAndPowerPointTextHandler(new OOXMLTikaBodyPartHandler(xhtml), new HashMap<String, String>()));
}
Also used : PackageRelationshipCollection(org.apache.poi.openxml4j.opc.PackageRelationshipCollection) HashMap(java.util.HashMap) ZipException(java.util.zip.ZipException) InvalidFormatException(org.apache.poi.openxml4j.exceptions.InvalidFormatException)

Aggregations

PackageRelationshipCollection (org.apache.poi.openxml4j.opc.PackageRelationshipCollection)29 PackagePart (org.apache.poi.openxml4j.opc.PackagePart)23 InvalidFormatException (org.apache.poi.openxml4j.exceptions.InvalidFormatException)15 PackageRelationship (org.apache.poi.openxml4j.opc.PackageRelationship)14 IOException (java.io.IOException)8 XmlException (org.apache.xmlbeans.XmlException)5 ArrayList (java.util.ArrayList)4 HashMap (java.util.HashMap)4 OpenXML4JException (org.apache.poi.openxml4j.exceptions.OpenXML4JException)4 OPCPackage (org.apache.poi.openxml4j.opc.OPCPackage)4 PackagePartName (org.apache.poi.openxml4j.opc.PackagePartName)4 XWPFRelation (org.apache.poi.xwpf.usermodel.XWPFRelation)4 TikaException (org.apache.tika.exception.TikaException)4 SAXException (org.xml.sax.SAXException)4 ByteArrayOutputStream (java.io.ByteArrayOutputStream)2 File (java.io.File)2 OutputStream (java.io.OutputStream)2 URI (java.net.URI)2 ZipException (java.util.zip.ZipException)2 XMLSignatureException (javax.xml.crypto.dsig.XMLSignatureException)2