Search in sources :

Example 1 with XWPFNumbering

use of org.apache.poi.xwpf.usermodel.XWPFNumbering in project tika by apache.

the class XWPFEventBasedWordExtractor method loadNumbering.

private XWPFNumbering loadNumbering(PackagePart packagePart) {
    try {
        PackageRelationshipCollection numberingParts = packagePart.getRelationshipsByType(XWPFRelation.NUMBERING.getRelation());
        if (numberingParts.size() > 0) {
            PackageRelationship numberingRelationShip = numberingParts.getRelationship(0);
            if (numberingRelationShip == null) {
                return null;
            }
            PackagePart numberingPart = container.getPart(numberingRelationShip);
            if (numberingPart == null) {
                return null;
            }
            return new XWPFNumbering(numberingPart);
        }
    } catch (IOException | OpenXML4JException e) {
        LOG.warn("Couldn't load numbering", e);
    }
    return null;
}
Also used : PackageRelationship(org.apache.poi.openxml4j.opc.PackageRelationship) OpenXML4JException(org.apache.poi.openxml4j.exceptions.OpenXML4JException) XWPFNumbering(org.apache.poi.xwpf.usermodel.XWPFNumbering) PackageRelationshipCollection(org.apache.poi.openxml4j.opc.PackageRelationshipCollection) IOException(java.io.IOException) PackagePart(org.apache.poi.openxml4j.opc.PackagePart)

Example 2 with XWPFNumbering

use of org.apache.poi.xwpf.usermodel.XWPFNumbering in project tika by apache.

the class XWPFEventBasedWordExtractor method handleDocumentPart.

private void handleDocumentPart(PackagePart documentPart, StringBuilder sb) throws IOException, SAXException {
    //load the numbering/list manager and styles from the main document part
    XWPFNumbering numbering = loadNumbering(documentPart);
    XWPFListManager xwpfListManager = new XWPFListManager(numbering);
    //headers
    try {
        PackageRelationshipCollection headersPRC = documentPart.getRelationshipsByType(XWPFRelation.HEADER.getRelation());
        if (headersPRC != null) {
            for (int i = 0; i < headersPRC.size(); i++) {
                PackagePart header = documentPart.getRelatedPart(headersPRC.getRelationship(i));
                handlePart(header, xwpfListManager, sb);
            }
        }
    } catch (InvalidFormatException e) {
        LOG.warn("Invalid format", e);
    }
    //main document
    handlePart(documentPart, xwpfListManager, sb);
    //for now, just dump other components at end
    for (XWPFRelation rel : new XWPFRelation[] { XWPFRelation.FOOTNOTE, XWPFRelation.COMMENT, XWPFRelation.FOOTER, XWPFRelation.ENDNOTE }) {
        try {
            PackageRelationshipCollection prc = documentPart.getRelationshipsByType(rel.getRelation());
            if (prc != null) {
                for (int i = 0; i < prc.size(); i++) {
                    PackagePart packagePart = documentPart.getRelatedPart(prc.getRelationship(i));
                    handlePart(packagePart, xwpfListManager, sb);
                }
            }
        } catch (InvalidFormatException e) {
            LOG.warn("Invalid format", e);
        }
    }
}
Also used : XWPFRelation(org.apache.poi.xwpf.usermodel.XWPFRelation) XWPFNumbering(org.apache.poi.xwpf.usermodel.XWPFNumbering) PackageRelationshipCollection(org.apache.poi.openxml4j.opc.PackageRelationshipCollection) XWPFListManager(org.apache.tika.parser.microsoft.ooxml.XWPFListManager) PackagePart(org.apache.poi.openxml4j.opc.PackagePart) InvalidFormatException(org.apache.poi.openxml4j.exceptions.InvalidFormatException)

Example 3 with XWPFNumbering

use of org.apache.poi.xwpf.usermodel.XWPFNumbering in project tika by apache.

the class SXWPFWordExtractorDecorator method handleDocumentPart.

private void handleDocumentPart(PackagePart documentPart, XHTMLContentHandler xhtml) throws IOException, SAXException {
    //load the numbering/list manager and styles from the main document part
    XWPFNumbering numbering = loadNumbering(documentPart);
    XWPFListManager listManager = new XWPFListManager(numbering);
    XWPFStylesShim styles = null;
    try {
        styles = loadStyles(documentPart);
    } catch (Exception e) {
        metadata.add(TikaCoreProperties.TIKA_META_EXCEPTION_WARNING, ExceptionUtils.getStackTrace(e));
    }
    //headers
    try {
        PackageRelationshipCollection headersPRC = documentPart.getRelationshipsByType(XWPFRelation.HEADER.getRelation());
        if (headersPRC != null) {
            for (int i = 0; i < headersPRC.size(); i++) {
                PackagePart header = documentPart.getRelatedPart(headersPRC.getRelationship(i));
                handlePart(header, styles, listManager, xhtml);
            }
        }
    } catch (InvalidFormatException | ZipException e) {
        metadata.add(TikaCoreProperties.TIKA_META_EXCEPTION_WARNING, ExceptionUtils.getStackTrace(e));
    }
    //main document
    try {
        handlePart(documentPart, styles, listManager, xhtml);
    } catch (ZipException e) {
        metadata.add(TikaCoreProperties.TIKA_META_EXCEPTION_WARNING, ExceptionUtils.getStackTrace(e));
    }
    //for now, just dump other components at end
    for (XWPFRelation rel : new XWPFRelation[] { XWPFRelation.FOOTNOTE, XWPFRelation.COMMENT, XWPFRelation.FOOTER, XWPFRelation.ENDNOTE }) {
        try {
            PackageRelationshipCollection prc = documentPart.getRelationshipsByType(rel.getRelation());
            if (prc != null) {
                for (int i = 0; i < prc.size(); i++) {
                    PackagePart packagePart = documentPart.getRelatedPart(prc.getRelationship(i));
                    handlePart(packagePart, styles, listManager, xhtml);
                }
            }
        } catch (InvalidFormatException | ZipException e) {
            metadata.add(TikaCoreProperties.TIKA_META_EXCEPTION_WARNING, ExceptionUtils.getStackTrace(e));
        }
    }
}
Also used : XWPFRelation(org.apache.poi.xwpf.usermodel.XWPFRelation) XWPFNumbering(org.apache.poi.xwpf.usermodel.XWPFNumbering) PackageRelationshipCollection(org.apache.poi.openxml4j.opc.PackageRelationshipCollection) XWPFStylesShim(org.apache.tika.parser.microsoft.ooxml.xwpf.XWPFStylesShim) ZipException(java.util.zip.ZipException) PackagePart(org.apache.poi.openxml4j.opc.PackagePart) InvalidFormatException(org.apache.poi.openxml4j.exceptions.InvalidFormatException) TikaException(org.apache.tika.exception.TikaException) ZipException(java.util.zip.ZipException) InvalidFormatException(org.apache.poi.openxml4j.exceptions.InvalidFormatException) OpenXML4JException(org.apache.poi.openxml4j.exceptions.OpenXML4JException) IOException(java.io.IOException) XmlException(org.apache.xmlbeans.XmlException) SAXException(org.xml.sax.SAXException)

Aggregations

PackagePart (org.apache.poi.openxml4j.opc.PackagePart)3 PackageRelationshipCollection (org.apache.poi.openxml4j.opc.PackageRelationshipCollection)3 XWPFNumbering (org.apache.poi.xwpf.usermodel.XWPFNumbering)3 IOException (java.io.IOException)2 InvalidFormatException (org.apache.poi.openxml4j.exceptions.InvalidFormatException)2 OpenXML4JException (org.apache.poi.openxml4j.exceptions.OpenXML4JException)2 XWPFRelation (org.apache.poi.xwpf.usermodel.XWPFRelation)2 ZipException (java.util.zip.ZipException)1 PackageRelationship (org.apache.poi.openxml4j.opc.PackageRelationship)1 TikaException (org.apache.tika.exception.TikaException)1 XWPFListManager (org.apache.tika.parser.microsoft.ooxml.XWPFListManager)1 XWPFStylesShim (org.apache.tika.parser.microsoft.ooxml.xwpf.XWPFStylesShim)1 XmlException (org.apache.xmlbeans.XmlException)1 SAXException (org.xml.sax.SAXException)1