Search in sources :

Example 1 with XmlValueOutOfRangeException

use of org.apache.xmlbeans.impl.values.XmlValueOutOfRangeException in project tika by apache.

the class MetadataExtractor method extractMetadata.

private void extractMetadata(ExtendedProperties properties, Metadata metadata) {
    CTProperties propsHolder = properties.getUnderlyingProperties();
    //TIKA-2055, some ooxml files can include unsigned int/long values
    //which cause this exception.
    //For now, catch it and record as '0' because
    //Word converts to '0' on save.
    int totalTime = 0;
    try {
        totalTime = propsHolder.getTotalTime();
    } catch (XmlValueOutOfRangeException e) {
    //swallow for now
    }
    addProperty(metadata, OfficeOpenXMLExtended.APPLICATION, propsHolder.getApplication());
    addProperty(metadata, OfficeOpenXMLExtended.APP_VERSION, propsHolder.getAppVersion());
    addProperty(metadata, TikaCoreProperties.PUBLISHER, propsHolder.getCompany());
    addProperty(metadata, OfficeOpenXMLExtended.COMPANY, propsHolder.getCompany());
    SummaryExtractor.addMulti(metadata, OfficeOpenXMLExtended.MANAGER, propsHolder.getManager());
    addProperty(metadata, OfficeOpenXMLExtended.NOTES, propsHolder.getNotes());
    addProperty(metadata, OfficeOpenXMLExtended.PRESENTATION_FORMAT, propsHolder.getPresentationFormat());
    addProperty(metadata, OfficeOpenXMLExtended.TEMPLATE, propsHolder.getTemplate());
    addProperty(metadata, OfficeOpenXMLExtended.TOTAL_TIME, totalTime);
    if (propsHolder.getPages() > 0) {
        metadata.set(PagedText.N_PAGES, propsHolder.getPages());
    } else if (propsHolder.getSlides() > 0) {
        metadata.set(PagedText.N_PAGES, propsHolder.getSlides());
    }
    // Process the document statistics
    addProperty(metadata, Office.PAGE_COUNT, propsHolder.getPages());
    addProperty(metadata, Office.SLIDE_COUNT, propsHolder.getSlides());
    addProperty(metadata, Office.PARAGRAPH_COUNT, propsHolder.getParagraphs());
    addProperty(metadata, Office.LINE_COUNT, propsHolder.getLines());
    addProperty(metadata, Office.WORD_COUNT, propsHolder.getWords());
    addProperty(metadata, Office.CHARACTER_COUNT, propsHolder.getCharacters());
    addProperty(metadata, Office.CHARACTER_COUNT_WITH_SPACES, propsHolder.getCharactersWithSpaces());
    // Legacy Tika-1.0 style stats
    // TODO Remove these in Tika 2.0
    addProperty(metadata, Metadata.APPLICATION_NAME, propsHolder.getApplication());
    addProperty(metadata, Metadata.APPLICATION_VERSION, propsHolder.getAppVersion());
    addProperty(metadata, Metadata.MANAGER, propsHolder.getManager());
    addProperty(metadata, Metadata.NOTES, propsHolder.getNotes());
    addProperty(metadata, Metadata.PRESENTATION_FORMAT, propsHolder.getPresentationFormat());
    addProperty(metadata, Metadata.TEMPLATE, propsHolder.getTemplate());
    addProperty(metadata, Metadata.TOTAL_TIME, totalTime);
    addProperty(metadata, MSOffice.PAGE_COUNT, propsHolder.getPages());
    addProperty(metadata, MSOffice.SLIDE_COUNT, propsHolder.getSlides());
    addProperty(metadata, MSOffice.PARAGRAPH_COUNT, propsHolder.getParagraphs());
    addProperty(metadata, MSOffice.LINE_COUNT, propsHolder.getLines());
    addProperty(metadata, MSOffice.WORD_COUNT, propsHolder.getWords());
    addProperty(metadata, MSOffice.CHARACTER_COUNT, propsHolder.getCharacters());
    addProperty(metadata, MSOffice.CHARACTER_COUNT_WITH_SPACES, propsHolder.getCharactersWithSpaces());
}
Also used : CTProperties(org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.CTProperties) XmlValueOutOfRangeException(org.apache.xmlbeans.impl.values.XmlValueOutOfRangeException)

Aggregations

XmlValueOutOfRangeException (org.apache.xmlbeans.impl.values.XmlValueOutOfRangeException)1 CTProperties (org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.CTProperties)1