use of org.apache.poi.openxml4j.opc.internal.PackagePropertiesPart in project poi by apache.
the class OPCPackage method getParts.
/**
* Load the parts of the archive if it has not been done yet. The
* relationships of each part are not loaded.
*
* Note - Rule M4.1 states that there may only ever be one Core
* Properties Part, but Office produced files will sometimes
* have multiple! As Office ignores all but the first, we relax
* Compliance with Rule M4.1, and ignore all others silently too.
*
* @return All this package's parts.
* @throws InvalidFormatException if the package is not valid.
*/
public ArrayList<PackagePart> getParts() throws InvalidFormatException {
throwExceptionIfWriteOnly();
// If the part list is null, we parse the package to retrieve all parts.
if (partList == null) {
/* Variables use to validate OPC Compliance */
// Check rule M4.1 -> A format consumer shall consider more than
// one core properties relationship for a package to be an error
// (We just log it and move on, as real files break this!)
boolean hasCorePropertiesPart = false;
boolean needCorePropertiesPart = true;
PackagePart[] parts = this.getPartsImpl();
this.partList = new PackagePartCollection();
for (PackagePart part : parts) {
if (partList.containsKey(part._partName)) {
throw new InvalidFormatException("A part with the name '" + part._partName + "' already exist : Packages shall not contain equivalent " + "part names and package implementers shall neither create " + "nor recognize packages with equivalent part names. [M1.12]");
}
// Check OPC compliance rule M4.1
if (part.getContentType().equals(ContentTypes.CORE_PROPERTIES_PART)) {
if (!hasCorePropertiesPart) {
hasCorePropertiesPart = true;
} else {
logger.log(POILogger.WARN, "OPC Compliance error [M4.1]: " + "there is more than one core properties relationship in the package! " + "POI will use only the first, but other software may reject this file.");
}
}
PartUnmarshaller partUnmarshaller = partUnmarshallers.get(part._contentType);
if (partUnmarshaller != null) {
UnmarshallContext context = new UnmarshallContext(this, part._partName);
try {
PackagePart unmarshallPart = partUnmarshaller.unmarshall(context, part.getInputStream());
partList.put(unmarshallPart._partName, unmarshallPart);
// and ignore any subsequent ones
if (unmarshallPart instanceof PackagePropertiesPart && hasCorePropertiesPart && needCorePropertiesPart) {
this.packageProperties = (PackagePropertiesPart) unmarshallPart;
needCorePropertiesPart = false;
}
} catch (IOException ioe) {
logger.log(POILogger.WARN, "Unmarshall operation : IOException for " + part._partName);
continue;
} catch (InvalidOperationException invoe) {
throw new InvalidFormatException(invoe.getMessage(), invoe);
}
} else {
try {
partList.put(part._partName, part);
} catch (InvalidOperationException e) {
throw new InvalidFormatException(e.getMessage(), e);
}
}
}
}
return new ArrayList<PackagePart>(partList.sortedValues());
}
use of org.apache.poi.openxml4j.opc.internal.PackagePropertiesPart in project poi by apache.
the class PackagePropertiesMarshaller method marshall.
/**
* Marshall package core properties to an XML document. Always return
* <code>true</code>.
*/
@Override
public boolean marshall(PackagePart part, OutputStream out) throws OpenXML4JException {
if (!(part instanceof PackagePropertiesPart))
throw new IllegalArgumentException("'part' must be a PackagePropertiesPart instance.");
propsPart = (PackagePropertiesPart) part;
// Configure the document
xmlDoc = DocumentHelper.createDocument();
Element rootElem = xmlDoc.createElementNS(namespaceCoreProperties.getNamespaceURI(), getQName("coreProperties", namespaceCoreProperties));
DocumentHelper.addNamespaceDeclaration(rootElem, namespaceCoreProperties);
DocumentHelper.addNamespaceDeclaration(rootElem, namespaceDC);
DocumentHelper.addNamespaceDeclaration(rootElem, namespaceDcTerms);
DocumentHelper.addNamespaceDeclaration(rootElem, namespaceXSI);
xmlDoc.appendChild(rootElem);
addCategory();
addContentStatus();
addContentType();
addCreated();
addCreator();
addDescription();
addIdentifier();
addKeywords();
addLanguage();
addLastModifiedBy();
addLastPrinted();
addModified();
addRevision();
addSubject();
addTitle();
addVersion();
return true;
}
use of org.apache.poi.openxml4j.opc.internal.PackagePropertiesPart in project poi by apache.
the class TestPackageCoreProperties method testAlternateCorePropertyTimezones.
@Test
public void testAlternateCorePropertyTimezones() throws Exception {
InputStream is = OpenXML4JTestDataSamples.openSampleStream("OPCCompliance_CoreProperties_AlternateTimezones.docx");
OPCPackage pkg = OPCPackage.open(is);
PackagePropertiesPart props = (PackagePropertiesPart) pkg.getPackageProperties();
is.close();
// We need predictable dates for testing!
SimpleDateFormat df = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'", Locale.ROOT);
df.setTimeZone(LocaleUtil.TIMEZONE_UTC);
// Check text properties first
assertEquals("Lorem Ipsum", props.getTitleProperty().getValue());
assertEquals("Apache POI", props.getCreatorProperty().getValue());
// Created at has a +3 timezone and milliseconds
// 2006-10-13T18:06:00.123+03:00
// = 2006-10-13T15:06:00.123+00:00
assertEquals("2006-10-13T15:06:00Z", props.getCreatedPropertyString());
assertEquals("2006-10-13T15:06:00.123Z", df.format(props.getCreatedProperty().getValue()));
// Modified at has a -13 timezone but no milliseconds
// 2007-06-20T07:59:00-13:00
// = 2007-06-20T20:59:00-13:00
assertEquals("2007-06-20T20:59:00Z", props.getModifiedPropertyString());
assertEquals("2007-06-20T20:59:00.000Z", df.format(props.getModifiedProperty().getValue()));
// Ensure we can change them with other timezones and still read back OK
props.setCreatedProperty("2007-06-20T20:57:00+13:00");
props.setModifiedProperty("2007-06-20T20:59:00.123-13:00");
ByteArrayOutputStream baos = new ByteArrayOutputStream();
pkg.save(baos);
pkg = OPCPackage.open(new ByteArrayInputStream(baos.toByteArray()));
// Check text properties first - should be unchanged
assertEquals("Lorem Ipsum", props.getTitleProperty().getValue());
assertEquals("Apache POI", props.getCreatorProperty().getValue());
// Check the updated times
// 2007-06-20T20:57:00+13:00
// = 2007-06-20T07:57:00Z
assertEquals("2007-06-20T07:57:00.000Z", df.format(props.getCreatedProperty().getValue()));
// 2007-06-20T20:59:00.123-13:00
// = 2007-06-21T09:59:00.123Z
assertEquals("2007-06-21T09:59:00.123Z", df.format(props.getModifiedProperty().getValue()));
}
use of org.apache.poi.openxml4j.opc.internal.PackagePropertiesPart in project poi by apache.
the class TestPackageCoreProperties method testEntitiesInCoreProps_56164.
@Test
public void testEntitiesInCoreProps_56164() throws Exception {
InputStream is = OpenXML4JTestDataSamples.openSampleStream("CorePropertiesHasEntities.ooxml");
OPCPackage p = OPCPackage.open(is);
is.close();
// Should have 3 root relationships
boolean foundDocRel = false, foundCorePropRel = false, foundExtPropRel = false;
for (PackageRelationship pr : p.getRelationships()) {
if (pr.getRelationshipType().equals(PackageRelationshipTypes.CORE_DOCUMENT))
foundDocRel = true;
if (pr.getRelationshipType().equals(PackageRelationshipTypes.CORE_PROPERTIES))
foundCorePropRel = true;
if (pr.getRelationshipType().equals(PackageRelationshipTypes.EXTENDED_PROPERTIES))
foundExtPropRel = true;
}
assertTrue("Core/Doc Relationship not found in " + p.getRelationships(), foundDocRel);
assertTrue("Core Props Relationship not found in " + p.getRelationships(), foundCorePropRel);
assertTrue("Ext Props Relationship not found in " + p.getRelationships(), foundExtPropRel);
// Get the Core Properties
PackagePropertiesPart props = (PackagePropertiesPart) p.getPackageProperties();
// Check
assertEquals("Stefan Kopf", props.getCreatorProperty().getValue());
p.close();
}
use of org.apache.poi.openxml4j.opc.internal.PackagePropertiesPart in project tika by apache.
the class MetadataExtractor method extractMetadata.
private void extractMetadata(CoreProperties properties, Metadata metadata) {
PackagePropertiesPart propsHolder = properties.getUnderlyingProperties();
addProperty(metadata, OfficeOpenXMLCore.CATEGORY, propsHolder.getCategoryProperty());
addProperty(metadata, OfficeOpenXMLCore.CONTENT_STATUS, propsHolder.getContentStatusProperty());
addProperty(metadata, TikaCoreProperties.CREATED, propsHolder.getCreatedProperty());
addMultiProperty(metadata, TikaCoreProperties.CREATOR, propsHolder.getCreatorProperty());
addProperty(metadata, TikaCoreProperties.DESCRIPTION, propsHolder.getDescriptionProperty());
addProperty(metadata, TikaCoreProperties.IDENTIFIER, propsHolder.getIdentifierProperty());
addProperty(metadata, TikaCoreProperties.KEYWORDS, propsHolder.getKeywordsProperty());
addProperty(metadata, TikaCoreProperties.LANGUAGE, propsHolder.getLanguageProperty());
addProperty(metadata, TikaCoreProperties.MODIFIER, propsHolder.getLastModifiedByProperty());
addProperty(metadata, TikaCoreProperties.PRINT_DATE, propsHolder.getLastPrintedProperty());
addProperty(metadata, Metadata.LAST_MODIFIED, propsHolder.getModifiedProperty());
addProperty(metadata, TikaCoreProperties.MODIFIED, propsHolder.getModifiedProperty());
addProperty(metadata, OfficeOpenXMLCore.REVISION, propsHolder.getRevisionProperty());
// TODO: Move to OO subject in Tika 2.0
addProperty(metadata, TikaCoreProperties.TRANSITION_SUBJECT_TO_OO_SUBJECT, propsHolder.getSubjectProperty());
addProperty(metadata, TikaCoreProperties.TITLE, propsHolder.getTitleProperty());
addProperty(metadata, OfficeOpenXMLCore.VERSION, propsHolder.getVersionProperty());
// Legacy Tika-1.0 style stats
// TODO Remove these in Tika 2.0
addProperty(metadata, Metadata.CATEGORY, propsHolder.getCategoryProperty());
addProperty(metadata, Metadata.CONTENT_STATUS, propsHolder.getContentStatusProperty());
addProperty(metadata, Metadata.REVISION_NUMBER, propsHolder.getRevisionProperty());
addProperty(metadata, Metadata.VERSION, propsHolder.getVersionProperty());
}
Aggregations