Search in sources :

Example 36 with PackageRelationship

use of org.apache.poi.openxml4j.opc.PackageRelationship in project tika by apache.

the class XSSFBExcelExtractorDecorator method getMainDocumentParts.

/**
     * In Excel files, sheets have things embedded in them,
     * and sheet drawings which have the images
     */
@Override
protected List<PackagePart> getMainDocumentParts() throws TikaException {
    List<PackagePart> parts = new ArrayList<PackagePart>();
    for (PackagePart part : sheetParts) {
        // Add the sheet
        parts.add(part);
        // If it has drawings, return those too
        try {
            for (PackageRelationship rel : part.getRelationshipsByType(XSSFRelation.DRAWINGS.getRelation())) {
                if (rel.getTargetMode() == TargetMode.INTERNAL) {
                    PackagePartName relName = PackagingURIHelper.createPartName(rel.getTargetURI());
                    parts.add(rel.getPackage().getPart(relName));
                }
            }
            for (PackageRelationship rel : part.getRelationshipsByType(XSSFRelation.VML_DRAWINGS.getRelation())) {
                if (rel.getTargetMode() == TargetMode.INTERNAL) {
                    PackagePartName relName = PackagingURIHelper.createPartName(rel.getTargetURI());
                    parts.add(rel.getPackage().getPart(relName));
                }
            }
        } catch (InvalidFormatException e) {
            throw new TikaException("Broken OOXML file", e);
        }
    }
    //by AbstractOOXMLExtractor
    for (PackagePart part : extractor.getPackage().getPartsByRelationshipType(RELATION_OFFICE_DOCUMENT)) {
        parts.add(part);
    }
    return parts;
}
Also used : PackageRelationship(org.apache.poi.openxml4j.opc.PackageRelationship) PackagePartName(org.apache.poi.openxml4j.opc.PackagePartName) TikaException(org.apache.tika.exception.TikaException) ArrayList(java.util.ArrayList) PackagePart(org.apache.poi.openxml4j.opc.PackagePart) InvalidFormatException(org.apache.poi.openxml4j.exceptions.InvalidFormatException)

Example 37 with PackageRelationship

use of org.apache.poi.openxml4j.opc.PackageRelationship in project tika by apache.

the class XSSFExcelExtractorDecorator method addDrawingHyperLinks.

protected void addDrawingHyperLinks(PackagePart sheetPart) {
    try {
        for (PackageRelationship rel : sheetPart.getRelationshipsByType(XSSFRelation.DRAWINGS.getRelation())) {
            if (rel.getTargetMode() == TargetMode.INTERNAL) {
                PackagePartName relName = PackagingURIHelper.createPartName(rel.getTargetURI());
                PackagePart part = rel.getPackage().getPart(relName);
                //parts can go missing, and Excel quietly ignores missing images -- TIKA-2134
                if (part == null) {
                    continue;
                }
                for (PackageRelationship drawRel : part.getRelationshipsByType(XSSFRelation.SHEET_HYPERLINKS.getRelation())) {
                    drawingHyperlinks.put(drawRel.getId(), drawRel.getTargetURI().toString());
                }
            }
        }
    } catch (InvalidFormatException e) {
    //swallow
    //an exception trying to extract
    //hyperlinks on drawings should not cause a parse failure
    }
}
Also used : PackageRelationship(org.apache.poi.openxml4j.opc.PackageRelationship) PackagePartName(org.apache.poi.openxml4j.opc.PackagePartName) PackagePart(org.apache.poi.openxml4j.opc.PackagePart) InvalidFormatException(org.apache.poi.openxml4j.exceptions.InvalidFormatException)

Example 38 with PackageRelationship

use of org.apache.poi.openxml4j.opc.PackageRelationship in project tika by apache.

the class XWPFEventBasedWordExtractor method loadHyperlinkRelationships.

private Map<String, String> loadHyperlinkRelationships(PackagePart bodyPart) {
    Map<String, String> hyperlinks = new HashMap<>();
    try {
        PackageRelationshipCollection prc = bodyPart.getRelationshipsByType(XWPFRelation.HYPERLINK.getRelation());
        for (int i = 0; i < prc.size(); i++) {
            PackageRelationship pr = prc.getRelationship(i);
            if (pr == null) {
                continue;
            }
            String id = pr.getId();
            String url = (pr.getTargetURI() == null) ? null : pr.getTargetURI().toString();
            if (id != null && url != null) {
                hyperlinks.put(id, url);
            }
        }
    } catch (InvalidFormatException e) {
        LOG.warn("Invalid format", e);
    }
    return hyperlinks;
}
Also used : PackageRelationship(org.apache.poi.openxml4j.opc.PackageRelationship) HashMap(java.util.HashMap) PackageRelationshipCollection(org.apache.poi.openxml4j.opc.PackageRelationshipCollection) InvalidFormatException(org.apache.poi.openxml4j.exceptions.InvalidFormatException)

Example 39 with PackageRelationship

use of org.apache.poi.openxml4j.opc.PackageRelationship in project tika by apache.

the class SXWPFWordExtractorDecorator method loadNumbering.

private XWPFNumbering loadNumbering(PackagePart packagePart) {
    try {
        PackageRelationshipCollection numberingParts = packagePart.getRelationshipsByType(XWPFRelation.NUMBERING.getRelation());
        if (numberingParts.size() > 0) {
            PackageRelationship numberingRelationShip = numberingParts.getRelationship(0);
            if (numberingRelationShip == null) {
                return null;
            }
            PackagePart numberingPart = packagePart.getRelatedPart(numberingRelationShip);
            if (numberingPart == null) {
                return null;
            }
            return new XWPFNumberingShim(numberingPart);
        }
    } catch (IOException | OpenXML4JException e) {
    //swallow
    }
    return null;
}
Also used : PackageRelationship(org.apache.poi.openxml4j.opc.PackageRelationship) OpenXML4JException(org.apache.poi.openxml4j.exceptions.OpenXML4JException) PackageRelationshipCollection(org.apache.poi.openxml4j.opc.PackageRelationshipCollection) IOException(java.io.IOException) PackagePart(org.apache.poi.openxml4j.opc.PackagePart) XWPFNumberingShim(org.apache.tika.parser.microsoft.ooxml.xwpf.XWPFNumberingShim)

Example 40 with PackageRelationship

use of org.apache.poi.openxml4j.opc.PackageRelationship in project tika by apache.

the class SXWPFWordExtractorDecorator method loadStyles.

private XWPFStylesShim loadStyles(PackagePart packagePart) throws InvalidFormatException, TikaException, IOException, SAXException {
    PackageRelationshipCollection stylesParts = packagePart.getRelationshipsByType(XWPFRelation.STYLES.getRelation());
    if (stylesParts.size() > 0) {
        PackageRelationship stylesRelationShip = stylesParts.getRelationship(0);
        if (stylesRelationShip == null) {
            return null;
        }
        PackagePart stylesPart = packagePart.getRelatedPart(stylesRelationShip);
        if (stylesPart == null) {
            return null;
        }
        return new XWPFStylesShim(stylesPart, context);
    }
    return null;
}
Also used : PackageRelationship(org.apache.poi.openxml4j.opc.PackageRelationship) PackageRelationshipCollection(org.apache.poi.openxml4j.opc.PackageRelationshipCollection) XWPFStylesShim(org.apache.tika.parser.microsoft.ooxml.xwpf.XWPFStylesShim) PackagePart(org.apache.poi.openxml4j.opc.PackagePart)

Aggregations

PackageRelationship (org.apache.poi.openxml4j.opc.PackageRelationship)50 PackagePart (org.apache.poi.openxml4j.opc.PackagePart)28 InvalidFormatException (org.apache.poi.openxml4j.exceptions.InvalidFormatException)21 PackageRelationshipCollection (org.apache.poi.openxml4j.opc.PackageRelationshipCollection)15 PackagePartName (org.apache.poi.openxml4j.opc.PackagePartName)13 IOException (java.io.IOException)11 POIXMLException (org.apache.poi.POIXMLException)8 OPCPackage (org.apache.poi.openxml4j.opc.OPCPackage)5 TikaException (org.apache.tika.exception.TikaException)5 ArrayList (java.util.ArrayList)4 XmlException (org.apache.xmlbeans.XmlException)4 Test (org.junit.Test)4 InputStream (java.io.InputStream)3 URI (java.net.URI)3 HashMap (java.util.HashMap)3 OpenXML4JException (org.apache.poi.openxml4j.exceptions.OpenXML4JException)3 SAXException (org.xml.sax.SAXException)3 ByteArrayOutputStream (java.io.ByteArrayOutputStream)2 File (java.io.File)2 FileNotFoundException (java.io.FileNotFoundException)2