use of org.apache.poi.openxml4j.opc.PackageRelationship in project tika by apache.
the class SXWPFWordExtractorDecorator method loadNumbering.
private XWPFNumbering loadNumbering(PackagePart packagePart) {
try {
PackageRelationshipCollection numberingParts = packagePart.getRelationshipsByType(XWPFRelation.NUMBERING.getRelation());
if (numberingParts.size() > 0) {
PackageRelationship numberingRelationShip = numberingParts.getRelationship(0);
if (numberingRelationShip == null) {
return null;
}
PackagePart numberingPart = packagePart.getRelatedPart(numberingRelationShip);
if (numberingPart == null) {
return null;
}
return new XWPFNumberingShim(numberingPart);
}
} catch (IOException | OpenXML4JException e) {
//swallow
}
return null;
}
use of org.apache.poi.openxml4j.opc.PackageRelationship in project tika by apache.
the class SXWPFWordExtractorDecorator method loadStyles.
private XWPFStylesShim loadStyles(PackagePart packagePart) throws InvalidFormatException, TikaException, IOException, SAXException {
PackageRelationshipCollection stylesParts = packagePart.getRelationshipsByType(XWPFRelation.STYLES.getRelation());
if (stylesParts.size() > 0) {
PackageRelationship stylesRelationShip = stylesParts.getRelationship(0);
if (stylesRelationShip == null) {
return null;
}
PackagePart stylesPart = packagePart.getRelatedPart(stylesRelationShip);
if (stylesPart == null) {
return null;
}
return new XWPFStylesShim(stylesPart, context);
}
return null;
}
use of org.apache.poi.openxml4j.opc.PackageRelationship in project tika by apache.
the class AbstractOOXMLExtractor method loadLinkedRelationships.
/**
* This is used by the SAX docx and pptx decorators to load hyperlinks and
* other linked objects
*
* @param bodyPart
* @return
*/
protected Map<String, String> loadLinkedRelationships(PackagePart bodyPart, boolean includeInternal, Metadata metadata) {
Map<String, String> linkedRelationships = new HashMap<>();
try {
PackageRelationshipCollection prc = bodyPart.getRelationshipsByType(XWPFRelation.HYPERLINK.getRelation());
for (int i = 0; i < prc.size(); i++) {
PackageRelationship pr = prc.getRelationship(i);
if (pr == null) {
continue;
}
if (!includeInternal && TargetMode.INTERNAL.equals(pr.getTargetMode())) {
continue;
}
String id = pr.getId();
String url = (pr.getTargetURI() == null) ? null : pr.getTargetURI().toString();
if (id != null && url != null) {
linkedRelationships.put(id, url);
}
}
for (String rel : EMBEDDED_RELATIONSHIPS) {
prc = bodyPart.getRelationshipsByType(rel);
for (int i = 0; i < prc.size(); i++) {
PackageRelationship pr = prc.getRelationship(i);
if (pr == null) {
continue;
}
String id = pr.getId();
String uriString = (pr.getTargetURI() == null) ? null : pr.getTargetURI().toString();
String fileName = uriString;
if (pr.getTargetURI() != null) {
try {
fileName = FileHelper.getFilename(new File(fileName));
} catch (Exception e) {
fileName = uriString;
}
}
if (id != null) {
fileName = (fileName == null) ? "" : fileName;
linkedRelationships.put(id, fileName);
}
}
}
} catch (InvalidFormatException e) {
EmbeddedDocumentUtil.recordEmbeddedStreamException(e, metadata);
}
return linkedRelationships;
}
use of org.apache.poi.openxml4j.opc.PackageRelationship in project tika by apache.
the class AbstractOOXMLExtractor method handleThumbnail.
private void handleThumbnail(ContentHandler handler) {
try {
OPCPackage opcPackage = extractor.getPackage();
for (PackageRelationship rel : opcPackage.getRelationshipsByType(PackageRelationshipTypes.THUMBNAIL)) {
PackagePart tPart = opcPackage.getPart(rel);
InputStream tStream = tPart.getInputStream();
Metadata thumbnailMetadata = new Metadata();
String thumbName = tPart.getPartName().getName();
thumbnailMetadata.set(Metadata.RESOURCE_NAME_KEY, thumbName);
AttributesImpl attributes = new AttributesImpl();
attributes.addAttribute(XHTML, "class", "class", "CDATA", "embedded");
attributes.addAttribute(XHTML, "id", "id", "CDATA", thumbName);
handler.startElement(XHTML, "div", "div", attributes);
handler.endElement(XHTML, "div", "div");
thumbnailMetadata.set(Metadata.EMBEDDED_RELATIONSHIP_ID, thumbName);
thumbnailMetadata.set(Metadata.CONTENT_TYPE, tPart.getContentType());
thumbnailMetadata.set(TikaCoreProperties.TITLE, tPart.getPartName().getName());
if (embeddedExtractor.shouldParseEmbedded(thumbnailMetadata)) {
embeddedExtractor.parseEmbedded(TikaInputStream.get(tStream), new EmbeddedContentHandler(handler), thumbnailMetadata, false);
}
tStream.close();
}
} catch (Exception ex) {
}
}
Aggregations