use of org.apache.tika.metadata.Property in project tika by apache.
the class SummaryExtractor method parse.
/**
* Attempt to parse custom document properties and add to the collection of metadata
*
* @param customProperties
*/
private void parse(CustomProperties customProperties) {
if (customProperties != null) {
for (String name : customProperties.nameSet()) {
// Apply the custom prefix
String key = Metadata.USER_DEFINED_METADATA_NAME_PREFIX + name;
// Get, convert and save property value
Object value = customProperties.get(name);
if (value instanceof String) {
set(key, (String) value);
} else if (value instanceof Date) {
Property prop = Property.externalDate(key);
metadata.set(prop, (Date) value);
} else if (value instanceof Boolean) {
Property prop = Property.externalBoolean(key);
metadata.set(prop, value.toString());
} else if (value instanceof Long) {
Property prop = Property.externalInteger(key);
metadata.set(prop, ((Long) value).intValue());
} else if (value instanceof Double) {
Property prop = Property.externalReal(key);
metadata.set(prop, (Double) value);
} else if (value instanceof Integer) {
Property prop = Property.externalInteger(key);
metadata.set(prop, ((Integer) value).intValue());
}
}
}
}
use of org.apache.tika.metadata.Property in project tika by apache.
the class MetadataExtractor method extractMetadata.
private void extractMetadata(CustomProperties properties, Metadata metadata) {
org.openxmlformats.schemas.officeDocument.x2006.customProperties.CTProperties props = properties.getUnderlyingProperties();
for (int i = 0; i < props.sizeOfPropertyArray(); i++) {
CTProperty property = props.getPropertyArray(i);
String val = null;
Date date = null;
if (property.isSetLpwstr()) {
val = property.getLpwstr();
} else if (property.isSetLpstr()) {
val = property.getLpstr();
} else if (property.isSetDate()) {
date = property.getDate().getTime();
} else if (property.isSetFiletime()) {
date = property.getFiletime().getTime();
} else if (property.isSetBool()) {
val = Boolean.toString(property.getBool());
} else // Integers
if (property.isSetI1()) {
val = Integer.toString(property.getI1());
} else if (property.isSetI2()) {
val = Integer.toString(property.getI2());
} else if (property.isSetI4()) {
val = Integer.toString(property.getI4());
} else if (property.isSetI8()) {
val = Long.toString(property.getI8());
} else if (property.isSetInt()) {
val = Integer.toString(property.getInt());
} else // Unsigned Integers
if (property.isSetUi1()) {
val = Integer.toString(property.getUi1());
} else if (property.isSetUi2()) {
val = Integer.toString(property.getUi2());
} else if (property.isSetUi4()) {
val = Long.toString(property.getUi4());
} else if (property.isSetUi8()) {
val = property.getUi8().toString();
} else if (property.isSetUint()) {
val = Long.toString(property.getUint());
} else // Reals
if (property.isSetR4()) {
val = Float.toString(property.getR4());
} else if (property.isSetR8()) {
val = Double.toString(property.getR8());
} else if (property.isSetDecimal()) {
BigDecimal d = property.getDecimal();
if (d == null) {
val = null;
} else {
val = d.toPlainString();
}
} else if (property.isSetArray()) {
// TODO Fetch the array values and output
} else if (property.isSetVector()) {
// TODO Fetch the vector values and output
} else if (property.isSetBlob() || property.isSetOblob()) {
// TODO Decode, if possible
} else if (property.isSetStream() || property.isSetOstream() || property.isSetVstream()) {
// TODO Decode, if possible
} else if (property.isSetStorage() || property.isSetOstorage()) {
// TODO Decode, if possible
} else {
// This type isn't currently supported yet, skip the property
}
String propName = "custom:" + property.getName();
if (date != null) {
Property tikaProp = Property.externalDate(propName);
metadata.set(tikaProp, date);
} else if (val != null) {
metadata.set(propName, val);
}
}
}
use of org.apache.tika.metadata.Property in project tika by apache.
the class NetCDFParser method parse.
/*
* (non-Javadoc)
*
* @see org.apache.tika.parser.Parser#parse(java.io.InputStream,
* org.xml.sax.ContentHandler, org.apache.tika.metadata.Metadata,
* org.apache.tika.parser.ParseContext)
*/
public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException {
TemporaryResources tmp = TikaInputStream.isTikaInputStream(stream) ? null : new TemporaryResources();
TikaInputStream tis = TikaInputStream.get(stream, tmp);
NetcdfFile ncFile = null;
try {
ncFile = NetcdfFile.open(tis.getFile().getAbsolutePath());
metadata.set("File-Type-Description", ncFile.getFileTypeDescription());
// first parse out the set of global attributes
for (Attribute attr : ncFile.getGlobalAttributes()) {
Property property = resolveMetadataKey(attr.getFullName());
if (attr.getDataType().isString()) {
metadata.add(property, attr.getStringValue());
} else if (attr.getDataType().isNumeric()) {
int value = attr.getNumericValue().intValue();
metadata.add(property, String.valueOf(value));
}
}
XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
xhtml.startDocument();
xhtml.newline();
xhtml.element("h1", "dimensions");
xhtml.startElement("ul");
xhtml.newline();
for (Dimension dim : ncFile.getDimensions()) {
xhtml.element("li", dim.getFullName() + " = " + dim.getLength());
}
xhtml.endElement("ul");
xhtml.element("h1", "variables");
xhtml.startElement("ul");
xhtml.newline();
for (Variable var : ncFile.getVariables()) {
xhtml.startElement("li");
xhtml.characters(var.getDataType() + " " + var.getNameAndDimensions());
xhtml.newline();
List<Attribute> attributes = var.getAttributes();
if (!attributes.isEmpty()) {
xhtml.startElement("ul");
for (Attribute element : attributes) {
xhtml.element("li", element.toString());
}
xhtml.endElement("ul");
}
xhtml.endElement("li");
}
xhtml.endElement("ul");
xhtml.endDocument();
} catch (IOException e) {
throw new TikaException("NetCDF parse error", e);
} finally {
if (ncFile != null) {
ncFile.close();
}
if (tmp != null) {
tmp.dispose();
}
}
}
use of org.apache.tika.metadata.Property in project tika by apache.
the class ExternalEmbedder method getCommandMetadataSegments.
/**
* Constructs a collection of command line arguments responsible for setting
* individual metadata fields based on the given <code>metadata</code>.
*
* @param metadata the metadata to embed
* @return the metadata-related command line arguments
*/
protected List<String> getCommandMetadataSegments(Metadata metadata) {
List<String> commandMetadataSegments = new ArrayList<String>();
if (metadata == null || metadata.names() == null) {
return commandMetadataSegments;
}
for (String metadataName : metadata.names()) {
for (Property property : getMetadataCommandArguments().keySet()) {
if (metadataName.equals(property.getName())) {
String[] metadataCommandArguments = getMetadataCommandArguments().get(property);
if (metadataCommandArguments != null) {
for (String metadataCommandArgument : metadataCommandArguments) {
if (metadata.isMultiValued(metadataName)) {
for (String metadataValue : metadata.getValues(metadataName)) {
String assignmentValue = metadataValue;
if (quoteAssignmentValues) {
assignmentValue = "'" + assignmentValue + "'";
}
commandMetadataSegments.add(metadataCommandArgument + commandAppendOperator + assignmentValue);
}
} else {
String assignmentValue = metadata.get(metadataName);
if (quoteAssignmentValues) {
assignmentValue = "'" + assignmentValue + "'";
}
commandMetadataSegments.add(metadataCommandArgument + commandAssignmentOperator + assignmentValue);
}
}
}
}
}
}
return commandMetadataSegments;
}
use of org.apache.tika.metadata.Property in project tika by apache.
the class DWGParser method handleHeader.
private void handleHeader(int headerNumber, String value, Metadata metadata, XHTMLContentHandler xhtml) throws SAXException {
if (value == null || value.length() == 0) {
return;
}
Property headerProp = HEADER_PROPERTIES_ENTRIES[headerNumber];
if (headerProp != null) {
metadata.set(headerProp, value);
}
xhtml.element("p", value);
}
Aggregations