use of org.apache.poi.openxml4j.opc.PackageRelationshipCollection in project poi by apache.
the class POIXMLDocumentPart method read.
/**
* Iterate through the underlying PackagePart and create child POIXMLFactory instances
* using the specified factory
*
* @param factory the factory object that creates POIXMLFactory instances
* @param context context map containing already visited noted keyed by targetURI
*
* @throws OpenXML4JException thrown when a related part can't be read
*/
protected void read(POIXMLFactory factory, Map<PackagePart, POIXMLDocumentPart> context) throws OpenXML4JException {
PackagePart pp = getPackagePart();
// add mapping a second time, in case of initial caller hasn't done so
POIXMLDocumentPart otherChild = context.put(pp, this);
if (otherChild != null && otherChild != this) {
throw new POIXMLException("Unique PackagePart-POIXMLDocumentPart relation broken!");
}
if (!pp.hasRelationships())
return;
PackageRelationshipCollection rels = packagePart.getRelationships();
List<POIXMLDocumentPart> readLater = new ArrayList<POIXMLDocumentPart>();
// scan breadth-first, so parent-relations are hopefully the shallowest element
for (PackageRelationship rel : rels) {
if (rel.getTargetMode() == TargetMode.INTERNAL) {
URI uri = rel.getTargetURI();
// check for internal references (e.g. '#Sheet1!A1')
PackagePartName relName;
if (uri.getRawFragment() != null) {
relName = PackagingURIHelper.createPartName(uri.getPath());
} else {
relName = PackagingURIHelper.createPartName(uri);
}
final PackagePart p = packagePart.getPackage().getPart(relName);
if (p == null) {
logger.log(POILogger.ERROR, "Skipped invalid entry " + rel.getTargetURI());
continue;
}
POIXMLDocumentPart childPart = context.get(p);
if (childPart == null) {
childPart = factory.createDocumentPart(this, p);
childPart.parent = this;
// already add child to context, so other children can reference it
context.put(p, childPart);
readLater.add(childPart);
}
addRelation(rel, childPart);
}
}
for (POIXMLDocumentPart childPart : readLater) {
childPart.read(factory, context);
}
}
use of org.apache.poi.openxml4j.opc.PackageRelationshipCollection in project poi by apache.
the class POIXMLDocumentPart method rebase.
/**
* When you open something like a theme, call this to
* re-base the XML Document onto the core child of the
* current core document
*
* @param pkg the package to be rebased
*
* @throws InvalidFormatException if there was an error in the core document relation
* @throws IllegalStateException if there are more than one core document relations
*/
protected final void rebase(OPCPackage pkg) throws InvalidFormatException {
PackageRelationshipCollection cores = packagePart.getRelationshipsByType(coreDocumentRel);
if (cores.size() != 1) {
throw new IllegalStateException("Tried to rebase using " + coreDocumentRel + " but found " + cores.size() + " parts of the right type");
}
packagePart = packagePart.getRelatedPart(cores.getRelationship(0));
}
use of org.apache.poi.openxml4j.opc.PackageRelationshipCollection in project tika by apache.
the class AbstractOOXMLExtractor method loadLinkedRelationships.
/**
* This is used by the SAX docx and pptx decorators to load hyperlinks and
* other linked objects
*
* @param bodyPart
* @return
*/
protected Map<String, String> loadLinkedRelationships(PackagePart bodyPart, boolean includeInternal, Metadata metadata) {
Map<String, String> linkedRelationships = new HashMap<>();
try {
PackageRelationshipCollection prc = bodyPart.getRelationshipsByType(XWPFRelation.HYPERLINK.getRelation());
for (int i = 0; i < prc.size(); i++) {
PackageRelationship pr = prc.getRelationship(i);
if (pr == null) {
continue;
}
if (!includeInternal && TargetMode.INTERNAL.equals(pr.getTargetMode())) {
continue;
}
String id = pr.getId();
String url = (pr.getTargetURI() == null) ? null : pr.getTargetURI().toString();
if (id != null && url != null) {
linkedRelationships.put(id, url);
}
}
for (String rel : EMBEDDED_RELATIONSHIPS) {
prc = bodyPart.getRelationshipsByType(rel);
for (int i = 0; i < prc.size(); i++) {
PackageRelationship pr = prc.getRelationship(i);
if (pr == null) {
continue;
}
String id = pr.getId();
String uriString = (pr.getTargetURI() == null) ? null : pr.getTargetURI().toString();
String fileName = uriString;
if (pr.getTargetURI() != null) {
try {
fileName = FileHelper.getFilename(new File(fileName));
} catch (Exception e) {
fileName = uriString;
}
}
if (id != null) {
fileName = (fileName == null) ? "" : fileName;
linkedRelationships.put(id, fileName);
}
}
}
} catch (InvalidFormatException e) {
EmbeddedDocumentUtil.recordEmbeddedStreamException(e, metadata);
}
return linkedRelationships;
}
use of org.apache.poi.openxml4j.opc.PackageRelationshipCollection in project tika by apache.
the class OOXMLExtractorFactory method trySXWPF.
private static POIXMLTextExtractor trySXWPF(OPCPackage pkg) throws XmlException, OpenXML4JException, IOException {
PackageRelationshipCollection packageRelationshipCollection = pkg.getRelationshipsByType("http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument");
if (packageRelationshipCollection.size() == 0) {
packageRelationshipCollection = pkg.getRelationshipsByType("http://purl.oclc.org/ooxml/officeDocument/relationships/officeDocument");
}
if (packageRelationshipCollection.size() == 0) {
return null;
}
PackagePart corePart = pkg.getPart(packageRelationshipCollection.getRelationship(0));
String targetContentType = corePart.getContentType();
for (XWPFRelation relation : XWPFWordExtractor.SUPPORTED_TYPES) {
if (targetContentType.equals(relation.getContentType())) {
return new XWPFEventBasedWordExtractor(pkg);
}
}
return null;
}
use of org.apache.poi.openxml4j.opc.PackageRelationshipCollection in project tika by apache.
the class SXSLFPowerPointExtractorDecorator method buildXHTML.
/**
* @see XSLFPowerPointExtractor#getText()
*/
protected void buildXHTML(XHTMLContentHandler xhtml) throws SAXException, IOException {
loadCommentAuthors();
PackageRelationshipCollection slidesPRC = null;
try {
slidesPRC = mainDocument.getRelationshipsByType(XSLFRelation.SLIDE.getRelation());
} catch (InvalidFormatException e) {
metadata.add(TikaCoreProperties.TIKA_META_EXCEPTION_WARNING, ExceptionUtils.getStackTrace(e));
}
if (slidesPRC != null && slidesPRC.size() > 0) {
for (int i = 0; i < slidesPRC.size(); i++) {
try {
handleSlidePart(mainDocument.getRelatedPart(slidesPRC.getRelationship(i)), xhtml);
} catch (InvalidFormatException | ZipException e) {
metadata.add(TikaCoreProperties.TIKA_META_EXCEPTION_WARNING, ExceptionUtils.getStackTrace(e));
}
}
}
handleBasicRelatedParts(XSLFRelation.SLIDE_MASTER.getRelation(), "slide-master", mainDocument, new PlaceHolderSkipper(new OOXMLWordAndPowerPointTextHandler(new OOXMLTikaBodyPartHandler(xhtml), new HashMap<String, String>())));
handleBasicRelatedParts(HANDOUT_MASTER, "slide-handout-master", mainDocument, new OOXMLWordAndPowerPointTextHandler(new OOXMLTikaBodyPartHandler(xhtml), new HashMap<String, String>()));
}
Aggregations