Search in sources :

Example 1 with CTProperties

use of org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.CTProperties in project poi by apache.

the class TestXSLFSlideShow method testMetadataBasics.

@Test
public void testMetadataBasics() throws IOException, OpenXML4JException, XmlException {
    XSLFSlideShow xml = new XSLFSlideShow(pack);
    assertNotNull(xml.getProperties().getCoreProperties());
    assertNotNull(xml.getProperties().getExtendedProperties());
    CTProperties props = xml.getProperties().getExtendedProperties().getUnderlyingProperties();
    assertEquals("Microsoft Office PowerPoint", props.getApplication());
    assertEquals(0, props.getCharacters());
    assertEquals(0, props.getLines());
    CoreProperties cprops = xml.getProperties().getCoreProperties();
    assertNull(cprops.getTitle());
    assertNull(cprops.getUnderlyingProperties().getSubjectProperty().getValue());
    xml.close();
}
Also used : CoreProperties(org.apache.poi.POIXMLProperties.CoreProperties) XSLFSlideShow(org.apache.poi.xslf.usermodel.XSLFSlideShow) CTProperties(org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.CTProperties) Test(org.junit.Test)

Example 2 with CTProperties

use of org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.CTProperties in project poi by apache.

the class TestAllExtendedProperties method testGetAllExtendedProperties.

public void testGetAllExtendedProperties() throws IOException {
    XWPFDocument doc = XWPFTestDataSamples.openSampleDocument("TestPoiXMLDocumentCorePropertiesGetKeywords.docx");
    CTProperties ctProps = doc.getProperties().getExtendedProperties().getUnderlyingProperties();
    assertEquals("Microsoft Office Word", ctProps.getApplication());
    assertEquals("14.0000", ctProps.getAppVersion());
    assertEquals(57, ctProps.getCharacters());
    assertEquals(66, ctProps.getCharactersWithSpaces());
    assertEquals("", ctProps.getCompany());
    assertNull(ctProps.getDigSig());
    assertEquals(0, ctProps.getDocSecurity());
    assertNotNull(ctProps.getDomNode());
    CTVectorVariant vec = ctProps.getHeadingPairs();
    assertEquals(2, vec.getVector().sizeOfVariantArray());
    assertEquals("Title", vec.getVector().getVariantArray(0).getLpstr());
    assertEquals(1, vec.getVector().getVariantArray(1).getI4());
    assertFalse(ctProps.isSetHiddenSlides());
    assertEquals(0, ctProps.getHiddenSlides());
    assertFalse(ctProps.isSetHLinks());
    assertNull(ctProps.getHLinks());
    assertNull(ctProps.getHyperlinkBase());
    assertTrue(ctProps.isSetHyperlinksChanged());
    assertFalse(ctProps.getHyperlinksChanged());
    assertEquals(1, ctProps.getLines());
    assertTrue(ctProps.isSetLinksUpToDate());
    assertFalse(ctProps.getLinksUpToDate());
    assertNull(ctProps.getManager());
    assertFalse(ctProps.isSetMMClips());
    assertEquals(0, ctProps.getMMClips());
    assertFalse(ctProps.isSetNotes());
    assertEquals(0, ctProps.getNotes());
    assertEquals(1, ctProps.getPages());
    assertEquals(1, ctProps.getParagraphs());
    assertNull(ctProps.getPresentationFormat());
    assertTrue(ctProps.isSetScaleCrop());
    assertFalse(ctProps.getScaleCrop());
    assertTrue(ctProps.isSetSharedDoc());
    assertFalse(ctProps.getSharedDoc());
    assertFalse(ctProps.isSetSlides());
    assertEquals(0, ctProps.getSlides());
    assertEquals("Normal.dotm", ctProps.getTemplate());
    CTVectorLpstr vec2 = ctProps.getTitlesOfParts();
    assertEquals(1, vec2.getVector().sizeOfLpstrArray());
    assertEquals("Example Word 2010 Document", vec2.getVector().getLpstrArray(0));
    assertEquals(3, ctProps.getTotalTime());
    assertEquals(10, ctProps.getWords());
    // Check the digital signature part
    // Won't be there in this file, but we
    //  need to do this check so that the
    //  appropriate parts end up in the
    //  smaller ooxml schemas file
    CTDigSigBlob blob = ctProps.getDigSig();
    assertNull(blob);
    blob = CTDigSigBlob.Factory.newInstance();
    blob.setBlob(new byte[] { 2, 6, 7, 2, 3, 4, 5, 1, 2, 3 });
}
Also used : CTVectorLpstr(org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.CTVectorLpstr) XWPFDocument(org.apache.poi.xwpf.usermodel.XWPFDocument) CTProperties(org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.CTProperties) CTVectorVariant(org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.CTVectorVariant) CTDigSigBlob(org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.CTDigSigBlob)

Example 3 with CTProperties

use of org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.CTProperties in project tika by apache.

the class MetadataExtractor method extractMetadata.

private void extractMetadata(ExtendedProperties properties, Metadata metadata) {
    CTProperties propsHolder = properties.getUnderlyingProperties();
    //TIKA-2055, some ooxml files can include unsigned int/long values
    //which cause this exception.
    //For now, catch it and record as '0' because
    //Word converts to '0' on save.
    int totalTime = 0;
    try {
        totalTime = propsHolder.getTotalTime();
    } catch (XmlValueOutOfRangeException e) {
    //swallow for now
    }
    addProperty(metadata, OfficeOpenXMLExtended.APPLICATION, propsHolder.getApplication());
    addProperty(metadata, OfficeOpenXMLExtended.APP_VERSION, propsHolder.getAppVersion());
    addProperty(metadata, TikaCoreProperties.PUBLISHER, propsHolder.getCompany());
    addProperty(metadata, OfficeOpenXMLExtended.COMPANY, propsHolder.getCompany());
    SummaryExtractor.addMulti(metadata, OfficeOpenXMLExtended.MANAGER, propsHolder.getManager());
    addProperty(metadata, OfficeOpenXMLExtended.NOTES, propsHolder.getNotes());
    addProperty(metadata, OfficeOpenXMLExtended.PRESENTATION_FORMAT, propsHolder.getPresentationFormat());
    addProperty(metadata, OfficeOpenXMLExtended.TEMPLATE, propsHolder.getTemplate());
    addProperty(metadata, OfficeOpenXMLExtended.TOTAL_TIME, totalTime);
    if (propsHolder.getPages() > 0) {
        metadata.set(PagedText.N_PAGES, propsHolder.getPages());
    } else if (propsHolder.getSlides() > 0) {
        metadata.set(PagedText.N_PAGES, propsHolder.getSlides());
    }
    // Process the document statistics
    addProperty(metadata, Office.PAGE_COUNT, propsHolder.getPages());
    addProperty(metadata, Office.SLIDE_COUNT, propsHolder.getSlides());
    addProperty(metadata, Office.PARAGRAPH_COUNT, propsHolder.getParagraphs());
    addProperty(metadata, Office.LINE_COUNT, propsHolder.getLines());
    addProperty(metadata, Office.WORD_COUNT, propsHolder.getWords());
    addProperty(metadata, Office.CHARACTER_COUNT, propsHolder.getCharacters());
    addProperty(metadata, Office.CHARACTER_COUNT_WITH_SPACES, propsHolder.getCharactersWithSpaces());
    // Legacy Tika-1.0 style stats
    // TODO Remove these in Tika 2.0
    addProperty(metadata, Metadata.APPLICATION_NAME, propsHolder.getApplication());
    addProperty(metadata, Metadata.APPLICATION_VERSION, propsHolder.getAppVersion());
    addProperty(metadata, Metadata.MANAGER, propsHolder.getManager());
    addProperty(metadata, Metadata.NOTES, propsHolder.getNotes());
    addProperty(metadata, Metadata.PRESENTATION_FORMAT, propsHolder.getPresentationFormat());
    addProperty(metadata, Metadata.TEMPLATE, propsHolder.getTemplate());
    addProperty(metadata, Metadata.TOTAL_TIME, totalTime);
    addProperty(metadata, MSOffice.PAGE_COUNT, propsHolder.getPages());
    addProperty(metadata, MSOffice.SLIDE_COUNT, propsHolder.getSlides());
    addProperty(metadata, MSOffice.PARAGRAPH_COUNT, propsHolder.getParagraphs());
    addProperty(metadata, MSOffice.LINE_COUNT, propsHolder.getLines());
    addProperty(metadata, MSOffice.WORD_COUNT, propsHolder.getWords());
    addProperty(metadata, MSOffice.CHARACTER_COUNT, propsHolder.getCharacters());
    addProperty(metadata, MSOffice.CHARACTER_COUNT_WITH_SPACES, propsHolder.getCharactersWithSpaces());
}
Also used : CTProperties(org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.CTProperties) XmlValueOutOfRangeException(org.apache.xmlbeans.impl.values.XmlValueOutOfRangeException)

Aggregations

CTProperties (org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.CTProperties)3 CoreProperties (org.apache.poi.POIXMLProperties.CoreProperties)1 XSLFSlideShow (org.apache.poi.xslf.usermodel.XSLFSlideShow)1 XWPFDocument (org.apache.poi.xwpf.usermodel.XWPFDocument)1 XmlValueOutOfRangeException (org.apache.xmlbeans.impl.values.XmlValueOutOfRangeException)1 Test (org.junit.Test)1 CTDigSigBlob (org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.CTDigSigBlob)1 CTVectorLpstr (org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.CTVectorLpstr)1 CTVectorVariant (org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.CTVectorVariant)1