use of org.apache.tika.parser.microsoft.ooxml.xwpf.XWPFStylesShim in project tika by apache.
the class SXWPFWordExtractorDecorator method handleDocumentPart.
private void handleDocumentPart(PackagePart documentPart, XHTMLContentHandler xhtml) throws IOException, SAXException {
//load the numbering/list manager and styles from the main document part
XWPFNumbering numbering = loadNumbering(documentPart);
XWPFListManager listManager = new XWPFListManager(numbering);
XWPFStylesShim styles = null;
try {
styles = loadStyles(documentPart);
} catch (Exception e) {
metadata.add(TikaCoreProperties.TIKA_META_EXCEPTION_WARNING, ExceptionUtils.getStackTrace(e));
}
//headers
try {
PackageRelationshipCollection headersPRC = documentPart.getRelationshipsByType(XWPFRelation.HEADER.getRelation());
if (headersPRC != null) {
for (int i = 0; i < headersPRC.size(); i++) {
PackagePart header = documentPart.getRelatedPart(headersPRC.getRelationship(i));
handlePart(header, styles, listManager, xhtml);
}
}
} catch (InvalidFormatException | ZipException e) {
metadata.add(TikaCoreProperties.TIKA_META_EXCEPTION_WARNING, ExceptionUtils.getStackTrace(e));
}
//main document
try {
handlePart(documentPart, styles, listManager, xhtml);
} catch (ZipException e) {
metadata.add(TikaCoreProperties.TIKA_META_EXCEPTION_WARNING, ExceptionUtils.getStackTrace(e));
}
//for now, just dump other components at end
for (XWPFRelation rel : new XWPFRelation[] { XWPFRelation.FOOTNOTE, XWPFRelation.COMMENT, XWPFRelation.FOOTER, XWPFRelation.ENDNOTE }) {
try {
PackageRelationshipCollection prc = documentPart.getRelationshipsByType(rel.getRelation());
if (prc != null) {
for (int i = 0; i < prc.size(); i++) {
PackagePart packagePart = documentPart.getRelatedPart(prc.getRelationship(i));
handlePart(packagePart, styles, listManager, xhtml);
}
}
} catch (InvalidFormatException | ZipException e) {
metadata.add(TikaCoreProperties.TIKA_META_EXCEPTION_WARNING, ExceptionUtils.getStackTrace(e));
}
}
}
use of org.apache.tika.parser.microsoft.ooxml.xwpf.XWPFStylesShim in project tika by apache.
the class SXWPFWordExtractorDecorator method loadStyles.
private XWPFStylesShim loadStyles(PackagePart packagePart) throws InvalidFormatException, TikaException, IOException, SAXException {
PackageRelationshipCollection stylesParts = packagePart.getRelationshipsByType(XWPFRelation.STYLES.getRelation());
if (stylesParts.size() > 0) {
PackageRelationship stylesRelationShip = stylesParts.getRelationship(0);
if (stylesRelationShip == null) {
return null;
}
PackagePart stylesPart = packagePart.getRelatedPart(stylesRelationShip);
if (stylesPart == null) {
return null;
}
return new XWPFStylesShim(stylesPart, context);
}
return null;
}
Aggregations