use of org.apache.poi.openxml4j.opc.PackagePart in project tika by apache.
the class XWPFEventBasedWordExtractor method loadNumbering.
private XWPFNumbering loadNumbering(PackagePart packagePart) {
try {
PackageRelationshipCollection numberingParts = packagePart.getRelationshipsByType(XWPFRelation.NUMBERING.getRelation());
if (numberingParts.size() > 0) {
PackageRelationship numberingRelationShip = numberingParts.getRelationship(0);
if (numberingRelationShip == null) {
return null;
}
PackagePart numberingPart = container.getPart(numberingRelationShip);
if (numberingPart == null) {
return null;
}
return new XWPFNumbering(numberingPart);
}
} catch (IOException | OpenXML4JException e) {
LOG.warn("Couldn't load numbering", e);
}
return null;
}
use of org.apache.poi.openxml4j.opc.PackagePart in project tika by apache.
the class XWPFEventBasedWordExtractor method handleDocumentPart.
private void handleDocumentPart(PackagePart documentPart, StringBuilder sb) throws IOException, SAXException {
//load the numbering/list manager and styles from the main document part
XWPFNumbering numbering = loadNumbering(documentPart);
XWPFListManager xwpfListManager = new XWPFListManager(numbering);
//headers
try {
PackageRelationshipCollection headersPRC = documentPart.getRelationshipsByType(XWPFRelation.HEADER.getRelation());
if (headersPRC != null) {
for (int i = 0; i < headersPRC.size(); i++) {
PackagePart header = documentPart.getRelatedPart(headersPRC.getRelationship(i));
handlePart(header, xwpfListManager, sb);
}
}
} catch (InvalidFormatException e) {
LOG.warn("Invalid format", e);
}
//main document
handlePart(documentPart, xwpfListManager, sb);
//for now, just dump other components at end
for (XWPFRelation rel : new XWPFRelation[] { XWPFRelation.FOOTNOTE, XWPFRelation.COMMENT, XWPFRelation.FOOTER, XWPFRelation.ENDNOTE }) {
try {
PackageRelationshipCollection prc = documentPart.getRelationshipsByType(rel.getRelation());
if (prc != null) {
for (int i = 0; i < prc.size(); i++) {
PackagePart packagePart = documentPart.getRelatedPart(prc.getRelationship(i));
handlePart(packagePart, xwpfListManager, sb);
}
}
} catch (InvalidFormatException e) {
LOG.warn("Invalid format", e);
}
}
}
use of org.apache.poi.openxml4j.opc.PackagePart in project tika by apache.
the class OOXMLExtractorFactory method trySXSLF.
private static POIXMLTextExtractor trySXSLF(OPCPackage pkg) throws XmlException, OpenXML4JException, IOException {
PackageRelationshipCollection packageRelationshipCollection = pkg.getRelationshipsByType("http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument");
if (packageRelationshipCollection.size() == 0) {
packageRelationshipCollection = pkg.getRelationshipsByType("http://purl.oclc.org/ooxml/officeDocument/relationships/officeDocument");
}
if (packageRelationshipCollection.size() == 0) {
return null;
}
PackagePart corePart = pkg.getPart(packageRelationshipCollection.getRelationship(0));
String targetContentType = corePart.getContentType();
XSLFRelation[] xslfRelations = org.apache.poi.xslf.extractor.XSLFPowerPointExtractor.SUPPORTED_TYPES;
for (int i = 0; i < xslfRelations.length; i++) {
XSLFRelation xslfRelation = xslfRelations[i];
if (xslfRelation.getContentType().equals(targetContentType)) {
return new XSLFEventBasedPowerPointExtractor(pkg);
}
}
if (XSLFRelation.THEME_MANAGER.getContentType().equals(targetContentType)) {
return new XSLFEventBasedPowerPointExtractor(pkg);
}
return null;
}
use of org.apache.poi.openxml4j.opc.PackagePart in project tika by apache.
the class SXSLFPowerPointExtractorDecorator method handleBasicRelatedParts.
/**
* This should handle the comments, master, notes, etc
*
* @param contentType
* @param xhtmlClassLabel
* @param parentPart
* @param contentHandler
*/
private void handleBasicRelatedParts(String contentType, String xhtmlClassLabel, PackagePart parentPart, ContentHandler contentHandler) throws SAXException {
PackageRelationshipCollection relatedPartPRC = null;
try {
relatedPartPRC = parentPart.getRelationshipsByType(contentType);
} catch (InvalidFormatException e) {
metadata.add(TikaCoreProperties.TIKA_META_EXCEPTION_WARNING, ExceptionUtils.getStackTrace(e));
}
if (relatedPartPRC != null && relatedPartPRC.size() > 0) {
AttributesImpl attributes = new AttributesImpl();
attributes.addAttribute("", "class", "class", "CDATA", xhtmlClassLabel);
contentHandler.startElement("", "div", "div", attributes);
for (int i = 0; i < relatedPartPRC.size(); i++) {
PackageRelationship relatedPartPackageRelationship = relatedPartPRC.getRelationship(i);
try {
PackagePart relatedPartPart = parentPart.getRelatedPart(relatedPartPackageRelationship);
try (InputStream stream = relatedPartPart.getInputStream()) {
context.getSAXParser().parse(stream, new OfflineContentHandler(new EmbeddedContentHandler(contentHandler)));
} catch (IOException | TikaException e) {
metadata.add(TikaCoreProperties.TIKA_META_EXCEPTION_WARNING, ExceptionUtils.getStackTrace(e));
}
} catch (InvalidFormatException e) {
metadata.add(TikaCoreProperties.TIKA_META_EXCEPTION_WARNING, ExceptionUtils.getStackTrace(e));
}
}
contentHandler.endElement("", "div", "div");
}
}
use of org.apache.poi.openxml4j.opc.PackagePart in project tika by apache.
the class SXWPFWordExtractorDecorator method addRelatedParts.
private void addRelatedParts(PackagePart documentPart, List<PackagePart> relatedParts) {
for (String relation : MAIN_PART_RELATIONS) {
PackageRelationshipCollection prc = null;
try {
prc = documentPart.getRelationshipsByType(relation);
if (prc != null) {
for (int i = 0; i < prc.size(); i++) {
PackagePart packagePart = documentPart.getRelatedPart(prc.getRelationship(i));
relatedParts.add(packagePart);
}
}
} catch (InvalidFormatException e) {
}
}
}
Aggregations