use of weka.core.Attribute in project dkpro-tc by dkpro.
the class WekaUtils method getInstanceIdAttributeOffset.
/**
* @param data
* weka instances
* @return id
*/
public static int getInstanceIdAttributeOffset(Instances data) {
int attOffset = 1;
Enumeration<Attribute> enumeration = data.enumerateAttributes();
while (enumeration.hasMoreElements()) {
Attribute att = enumeration.nextElement();
// System.out.println(att.name());
if (att.name().equals(Constants.ID_FEATURE_NAME)) {
return attOffset;
}
attOffset++;
}
return -1;
}
use of weka.core.Attribute in project dkpro-tc by dkpro.
the class WekaUtils method getFeatureValues.
/**
* @param attributeStore
* weka attribute store
* @param instance
* tc instances
* @return array of double values
*/
private static double[] getFeatureValues(AttributeStore attributeStore, Instance instance) {
double[] featureValues = new double[attributeStore.getAttributes().size()];
for (Feature feature : instance.getFeatures()) {
try {
Attribute attribute = attributeStore.getAttribute(feature.getName());
Object featureValue = feature.getValue();
double attributeValue;
if (featureValue instanceof Number) {
// numeric attribute
attributeValue = ((Number) feature.getValue()).doubleValue();
} else if (featureValue instanceof Boolean) {
// boolean attribute
attributeValue = (Boolean) featureValue ? 1.0d : 0.0d;
} else if (featureValue == null) {
// null
throw new IllegalArgumentException("You have an instance which doesn't specify a value for the feature " + feature.getName());
} else {
// nominal or string
Object stringValue = feature.getValue();
if (!attribute.isNominal() && !attribute.isString()) {
throw new IllegalArgumentException("Attribute neither nominal nor string: " + stringValue);
}
int valIndex = attribute.indexOfValue(stringValue.toString());
if (valIndex == -1) {
if (attribute.isNominal()) {
throw new IllegalArgumentException("Value not defined for given nominal attribute!");
} else {
attribute.addStringValue(stringValue.toString());
valIndex = attribute.indexOfValue(stringValue.toString());
}
}
attributeValue = valIndex;
}
int offset = attributeStore.getAttributeOffset(attribute.name());
if (offset != -1) {
featureValues[offset] = attributeValue;
}
} catch (NullPointerException e) {
// ignore unseen attributes
}
}
return featureValues;
}
use of weka.core.Attribute in project dkpro-tc by dkpro.
the class MekaDataWriter method initalConfiguration.
private Instances initalConfiguration(Collection<Instance> instances) throws TextClassificationException, IOException {
if (saver != null) {
return masterInstance;
}
saver = new ArffSaver();
saver.setRetrieval(Saver.INCREMENTAL);
saver.setFile(arffTarget);
saver.setCompressOutput(false);
attributeStore = new AttributeStore();
List<String> lines = FileUtils.readLines(new File(outputFolder, Constants.FILENAME_FEATURES_DESCRIPTION), "utf-8");
for (String l : lines) {
String[] split = l.split("\t");
String featureName = split[0];
if (!attributeStore.containsAttributeName(featureName)) {
FeatureType type = FeatureType.valueOf(split[1]);
String enumType = null;
if (type == FeatureType.NOMINAL) {
enumType = split[2];
}
Attribute attribute = WekaFeatureEncoder.featureToAttributeUsingFeatureDescription(featureName, type, enumType);
attributeStore.addAttribute(featureName, attribute);
}
}
// Make sure "outcome" is not the name of an attribute
List<String> outcomeList = Arrays.asList(outcomes);
outcomeAttributes = createOutcomeAttributes(outcomeList);
// in Meka, class label attributes have to go on top
for (Attribute attribute : outcomeAttributes) {
attributeStore.addAttributeAtBegin(attribute.name(), attribute);
}
// for Meka-internal use
masterInstance = new Instances(WekaUtils.RELATION_NAME + ": -C " + outcomeAttributes.size() + " ", attributeStore.getAttributes(), instances.size());
masterInstance.setClassIndex(outcomeAttributes.size());
saver.setInstances(masterInstance);
return masterInstance;
}
use of weka.core.Attribute in project dkpro-tc by dkpro.
the class MekaDataWriter method getFeatureValues.
private double[] getFeatureValues(AttributeStore attributeStore, Instance instance) {
double[] featureValues = new double[attributeStore.getAttributes().size()];
for (Feature feature : instance.getFeatures()) {
try {
Attribute attribute = attributeStore.getAttribute(feature.getName());
Object featureValue = feature.getValue();
double attributeValue;
if (featureValue instanceof Number) {
// numeric attribute
attributeValue = ((Number) feature.getValue()).doubleValue();
} else if (featureValue instanceof Boolean) {
// boolean attribute
attributeValue = (Boolean) featureValue ? 1.0d : 0.0d;
} else if (featureValue == null) {
// null
throw new IllegalArgumentException("You have an instance which doesn't specify a value for the feature " + feature.getName());
} else {
// nominal or string
Object stringValue = feature.getValue();
if (!attribute.isNominal() && !attribute.isString()) {
throw new IllegalArgumentException("Attribute neither nominal nor string: " + stringValue);
}
int valIndex = attribute.indexOfValue(stringValue.toString());
if (valIndex == -1) {
if (attribute.isNominal()) {
throw new IllegalArgumentException("Value not defined for given nominal attribute!");
} else {
attribute.addStringValue(stringValue.toString());
valIndex = attribute.indexOfValue(stringValue.toString());
}
}
attributeValue = valIndex;
}
int offset = attributeStore.getAttributeOffset(attribute.name());
if (offset != -1) {
featureValues[offset] = attributeValue;
}
} catch (NullPointerException e) {
// ignore unseen attributes
}
}
return featureValues;
}
use of weka.core.Attribute in project dkpro-tc by dkpro.
the class MekaDataWriter method writeClassifierFormat.
@Override
public void writeClassifierFormat(Collection<Instance> instances) throws AnalysisEngineProcessException {
try {
Instances masterInstance = initalConfiguration(instances);
for (Instance instance : instances) {
double[] featureValues = getFeatureValues(attributeStore, instance);
// set class label values
List<String> instanceOutcome = instance.getOutcomes();
for (Attribute label : outcomeAttributes) {
String labelname = label.name();
featureValues[attributeStore.getAttributeOffset(labelname)] = instanceOutcome.contains(labelname.split(CLASS_ATTRIBUTE_PREFIX)[1]) ? 1.0d : 0.0d;
}
weka.core.Instance wekaInstance;
if (useSparse) {
wekaInstance = new SparseInstance(1.0, featureValues);
} else {
wekaInstance = new DenseInstance(1.0, featureValues);
}
wekaInstance.setDataset(masterInstance);
Double instanceWeight = instance.getWeight();
if (applyWeighting) {
wekaInstance.setWeight(instanceWeight);
}
saver.writeIncremental(wekaInstance);
}
} catch (Exception e) {
throw new AnalysisEngineProcessException(e);
}
}
Aggregations