Search in sources :

Example 1 with Property

use of org.apache.tika.metadata.Property in project tika by apache.

the class SummaryExtractor method parse.

/**
     * Attempt to parse custom document properties and add to the collection of metadata
     *
     * @param customProperties
     */
private void parse(CustomProperties customProperties) {
    if (customProperties != null) {
        for (String name : customProperties.nameSet()) {
            // Apply the custom prefix
            String key = Metadata.USER_DEFINED_METADATA_NAME_PREFIX + name;
            // Get, convert and save property value
            Object value = customProperties.get(name);
            if (value instanceof String) {
                set(key, (String) value);
            } else if (value instanceof Date) {
                Property prop = Property.externalDate(key);
                metadata.set(prop, (Date) value);
            } else if (value instanceof Boolean) {
                Property prop = Property.externalBoolean(key);
                metadata.set(prop, value.toString());
            } else if (value instanceof Long) {
                Property prop = Property.externalInteger(key);
                metadata.set(prop, ((Long) value).intValue());
            } else if (value instanceof Double) {
                Property prop = Property.externalReal(key);
                metadata.set(prop, (Double) value);
            } else if (value instanceof Integer) {
                Property prop = Property.externalInteger(key);
                metadata.set(prop, ((Integer) value).intValue());
            }
        }
    }
}
Also used : Property(org.apache.tika.metadata.Property) Date(java.util.Date)

Example 2 with Property

use of org.apache.tika.metadata.Property in project tika by apache.

the class MetadataExtractor method extractMetadata.

private void extractMetadata(CustomProperties properties, Metadata metadata) {
    org.openxmlformats.schemas.officeDocument.x2006.customProperties.CTProperties props = properties.getUnderlyingProperties();
    for (int i = 0; i < props.sizeOfPropertyArray(); i++) {
        CTProperty property = props.getPropertyArray(i);
        String val = null;
        Date date = null;
        if (property.isSetLpwstr()) {
            val = property.getLpwstr();
        } else if (property.isSetLpstr()) {
            val = property.getLpstr();
        } else if (property.isSetDate()) {
            date = property.getDate().getTime();
        } else if (property.isSetFiletime()) {
            date = property.getFiletime().getTime();
        } else if (property.isSetBool()) {
            val = Boolean.toString(property.getBool());
        } else // Integers
        if (property.isSetI1()) {
            val = Integer.toString(property.getI1());
        } else if (property.isSetI2()) {
            val = Integer.toString(property.getI2());
        } else if (property.isSetI4()) {
            val = Integer.toString(property.getI4());
        } else if (property.isSetI8()) {
            val = Long.toString(property.getI8());
        } else if (property.isSetInt()) {
            val = Integer.toString(property.getInt());
        } else // Unsigned Integers
        if (property.isSetUi1()) {
            val = Integer.toString(property.getUi1());
        } else if (property.isSetUi2()) {
            val = Integer.toString(property.getUi2());
        } else if (property.isSetUi4()) {
            val = Long.toString(property.getUi4());
        } else if (property.isSetUi8()) {
            val = property.getUi8().toString();
        } else if (property.isSetUint()) {
            val = Long.toString(property.getUint());
        } else // Reals
        if (property.isSetR4()) {
            val = Float.toString(property.getR4());
        } else if (property.isSetR8()) {
            val = Double.toString(property.getR8());
        } else if (property.isSetDecimal()) {
            BigDecimal d = property.getDecimal();
            if (d == null) {
                val = null;
            } else {
                val = d.toPlainString();
            }
        } else if (property.isSetArray()) {
        // TODO Fetch the array values and output
        } else if (property.isSetVector()) {
        // TODO Fetch the vector values and output
        } else if (property.isSetBlob() || property.isSetOblob()) {
        // TODO Decode, if possible
        } else if (property.isSetStream() || property.isSetOstream() || property.isSetVstream()) {
        // TODO Decode, if possible
        } else if (property.isSetStorage() || property.isSetOstorage()) {
        // TODO Decode, if possible
        } else {
        // This type isn't currently supported yet, skip the property
        }
        String propName = "custom:" + property.getName();
        if (date != null) {
            Property tikaProp = Property.externalDate(propName);
            metadata.set(tikaProp, date);
        } else if (val != null) {
            metadata.set(propName, val);
        }
    }
}
Also used : CTProperty(org.openxmlformats.schemas.officeDocument.x2006.customProperties.CTProperty) CTProperty(org.openxmlformats.schemas.officeDocument.x2006.customProperties.CTProperty) Property(org.apache.tika.metadata.Property) Date(java.util.Date) BigDecimal(java.math.BigDecimal)

Example 3 with Property

use of org.apache.tika.metadata.Property in project tika by apache.

the class NetCDFParser method parse.

/*
     * (non-Javadoc)
     * 
     * @see org.apache.tika.parser.Parser#parse(java.io.InputStream,
     * org.xml.sax.ContentHandler, org.apache.tika.metadata.Metadata,
     * org.apache.tika.parser.ParseContext)
     */
public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException {
    TemporaryResources tmp = TikaInputStream.isTikaInputStream(stream) ? null : new TemporaryResources();
    TikaInputStream tis = TikaInputStream.get(stream, tmp);
    NetcdfFile ncFile = null;
    try {
        ncFile = NetcdfFile.open(tis.getFile().getAbsolutePath());
        metadata.set("File-Type-Description", ncFile.getFileTypeDescription());
        // first parse out the set of global attributes
        for (Attribute attr : ncFile.getGlobalAttributes()) {
            Property property = resolveMetadataKey(attr.getFullName());
            if (attr.getDataType().isString()) {
                metadata.add(property, attr.getStringValue());
            } else if (attr.getDataType().isNumeric()) {
                int value = attr.getNumericValue().intValue();
                metadata.add(property, String.valueOf(value));
            }
        }
        XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
        xhtml.startDocument();
        xhtml.newline();
        xhtml.element("h1", "dimensions");
        xhtml.startElement("ul");
        xhtml.newline();
        for (Dimension dim : ncFile.getDimensions()) {
            xhtml.element("li", dim.getFullName() + " = " + dim.getLength());
        }
        xhtml.endElement("ul");
        xhtml.element("h1", "variables");
        xhtml.startElement("ul");
        xhtml.newline();
        for (Variable var : ncFile.getVariables()) {
            xhtml.startElement("li");
            xhtml.characters(var.getDataType() + " " + var.getNameAndDimensions());
            xhtml.newline();
            List<Attribute> attributes = var.getAttributes();
            if (!attributes.isEmpty()) {
                xhtml.startElement("ul");
                for (Attribute element : attributes) {
                    xhtml.element("li", element.toString());
                }
                xhtml.endElement("ul");
            }
            xhtml.endElement("li");
        }
        xhtml.endElement("ul");
        xhtml.endDocument();
    } catch (IOException e) {
        throw new TikaException("NetCDF parse error", e);
    } finally {
        if (ncFile != null) {
            ncFile.close();
        }
        if (tmp != null) {
            tmp.dispose();
        }
    }
}
Also used : NetcdfFile(ucar.nc2.NetcdfFile) Variable(ucar.nc2.Variable) TikaException(org.apache.tika.exception.TikaException) Attribute(ucar.nc2.Attribute) TemporaryResources(org.apache.tika.io.TemporaryResources) TikaInputStream(org.apache.tika.io.TikaInputStream) Dimension(ucar.nc2.Dimension) IOException(java.io.IOException) XHTMLContentHandler(org.apache.tika.sax.XHTMLContentHandler) Property(org.apache.tika.metadata.Property)

Example 4 with Property

use of org.apache.tika.metadata.Property in project tika by apache.

the class ExternalEmbedder method getCommandMetadataSegments.

/**
     * Constructs a collection of command line arguments responsible for setting
     * individual metadata fields based on the given <code>metadata</code>.
     *
     * @param metadata the metadata to embed
     * @return the metadata-related command line arguments
     */
protected List<String> getCommandMetadataSegments(Metadata metadata) {
    List<String> commandMetadataSegments = new ArrayList<String>();
    if (metadata == null || metadata.names() == null) {
        return commandMetadataSegments;
    }
    for (String metadataName : metadata.names()) {
        for (Property property : getMetadataCommandArguments().keySet()) {
            if (metadataName.equals(property.getName())) {
                String[] metadataCommandArguments = getMetadataCommandArguments().get(property);
                if (metadataCommandArguments != null) {
                    for (String metadataCommandArgument : metadataCommandArguments) {
                        if (metadata.isMultiValued(metadataName)) {
                            for (String metadataValue : metadata.getValues(metadataName)) {
                                String assignmentValue = metadataValue;
                                if (quoteAssignmentValues) {
                                    assignmentValue = "'" + assignmentValue + "'";
                                }
                                commandMetadataSegments.add(metadataCommandArgument + commandAppendOperator + assignmentValue);
                            }
                        } else {
                            String assignmentValue = metadata.get(metadataName);
                            if (quoteAssignmentValues) {
                                assignmentValue = "'" + assignmentValue + "'";
                            }
                            commandMetadataSegments.add(metadataCommandArgument + commandAssignmentOperator + assignmentValue);
                        }
                    }
                }
            }
        }
    }
    return commandMetadataSegments;
}
Also used : ArrayList(java.util.ArrayList) Property(org.apache.tika.metadata.Property)

Example 5 with Property

use of org.apache.tika.metadata.Property in project tika by apache.

the class DWGParser method handleHeader.

private void handleHeader(int headerNumber, String value, Metadata metadata, XHTMLContentHandler xhtml) throws SAXException {
    if (value == null || value.length() == 0) {
        return;
    }
    Property headerProp = HEADER_PROPERTIES_ENTRIES[headerNumber];
    if (headerProp != null) {
        metadata.set(headerProp, value);
    }
    xhtml.element("p", value);
}
Also used : Property(org.apache.tika.metadata.Property)

Aggregations

Property (org.apache.tika.metadata.Property)8 IOException (java.io.IOException)2 Date (java.util.Date)2 TikaException (org.apache.tika.exception.TikaException)2 TemporaryResources (org.apache.tika.io.TemporaryResources)2 TikaInputStream (org.apache.tika.io.TikaInputStream)2 XHTMLContentHandler (org.apache.tika.sax.XHTMLContentHandler)2 Attribute (ucar.nc2.Attribute)2 Dimension (ucar.nc2.Dimension)2 NetcdfFile (ucar.nc2.NetcdfFile)2 Variable (ucar.nc2.Variable)2 XMPProperty (com.adobe.xmp.properties.XMPProperty)1 File (java.io.File)1 BigDecimal (java.math.BigDecimal)1 ArrayList (java.util.ArrayList)1 Enumeration (java.util.Enumeration)1 PropertyTypeException (org.apache.tika.metadata.PropertyTypeException)1 CTProperty (org.openxmlformats.schemas.officeDocument.x2006.customProperties.CTProperty)1