use of org.apache.xmlbeans.impl.values.XmlValueOutOfRangeException in project tika by apache.
the class MetadataExtractor method extractMetadata.
private void extractMetadata(ExtendedProperties properties, Metadata metadata) {
CTProperties propsHolder = properties.getUnderlyingProperties();
//TIKA-2055, some ooxml files can include unsigned int/long values
//which cause this exception.
//For now, catch it and record as '0' because
//Word converts to '0' on save.
int totalTime = 0;
try {
totalTime = propsHolder.getTotalTime();
} catch (XmlValueOutOfRangeException e) {
//swallow for now
}
addProperty(metadata, OfficeOpenXMLExtended.APPLICATION, propsHolder.getApplication());
addProperty(metadata, OfficeOpenXMLExtended.APP_VERSION, propsHolder.getAppVersion());
addProperty(metadata, TikaCoreProperties.PUBLISHER, propsHolder.getCompany());
addProperty(metadata, OfficeOpenXMLExtended.COMPANY, propsHolder.getCompany());
SummaryExtractor.addMulti(metadata, OfficeOpenXMLExtended.MANAGER, propsHolder.getManager());
addProperty(metadata, OfficeOpenXMLExtended.NOTES, propsHolder.getNotes());
addProperty(metadata, OfficeOpenXMLExtended.PRESENTATION_FORMAT, propsHolder.getPresentationFormat());
addProperty(metadata, OfficeOpenXMLExtended.TEMPLATE, propsHolder.getTemplate());
addProperty(metadata, OfficeOpenXMLExtended.TOTAL_TIME, totalTime);
if (propsHolder.getPages() > 0) {
metadata.set(PagedText.N_PAGES, propsHolder.getPages());
} else if (propsHolder.getSlides() > 0) {
metadata.set(PagedText.N_PAGES, propsHolder.getSlides());
}
// Process the document statistics
addProperty(metadata, Office.PAGE_COUNT, propsHolder.getPages());
addProperty(metadata, Office.SLIDE_COUNT, propsHolder.getSlides());
addProperty(metadata, Office.PARAGRAPH_COUNT, propsHolder.getParagraphs());
addProperty(metadata, Office.LINE_COUNT, propsHolder.getLines());
addProperty(metadata, Office.WORD_COUNT, propsHolder.getWords());
addProperty(metadata, Office.CHARACTER_COUNT, propsHolder.getCharacters());
addProperty(metadata, Office.CHARACTER_COUNT_WITH_SPACES, propsHolder.getCharactersWithSpaces());
// Legacy Tika-1.0 style stats
// TODO Remove these in Tika 2.0
addProperty(metadata, Metadata.APPLICATION_NAME, propsHolder.getApplication());
addProperty(metadata, Metadata.APPLICATION_VERSION, propsHolder.getAppVersion());
addProperty(metadata, Metadata.MANAGER, propsHolder.getManager());
addProperty(metadata, Metadata.NOTES, propsHolder.getNotes());
addProperty(metadata, Metadata.PRESENTATION_FORMAT, propsHolder.getPresentationFormat());
addProperty(metadata, Metadata.TEMPLATE, propsHolder.getTemplate());
addProperty(metadata, Metadata.TOTAL_TIME, totalTime);
addProperty(metadata, MSOffice.PAGE_COUNT, propsHolder.getPages());
addProperty(metadata, MSOffice.SLIDE_COUNT, propsHolder.getSlides());
addProperty(metadata, MSOffice.PARAGRAPH_COUNT, propsHolder.getParagraphs());
addProperty(metadata, MSOffice.LINE_COUNT, propsHolder.getLines());
addProperty(metadata, MSOffice.WORD_COUNT, propsHolder.getWords());
addProperty(metadata, MSOffice.CHARACTER_COUNT, propsHolder.getCharacters());
addProperty(metadata, MSOffice.CHARACTER_COUNT_WITH_SPACES, propsHolder.getCharactersWithSpaces());
}
Aggregations