use of weka.core.Attribute in project dkpro-tc by dkpro.
the class WekaDataWriter method initalConfiguration.
private Instances initalConfiguration(Collection<Instance> instances) throws TextClassificationException, IOException {
if (saver != null) {
return masterInstance;
}
saver = new ArffSaver();
saver.setRetrieval(Saver.INCREMENTAL);
saver.setFile(arffTarget);
saver.setCompressOutput(false);
attributeStore = new AttributeStore();
List<String> lines = FileUtils.readLines(new File(outputFolder, Constants.FILENAME_FEATURES_DESCRIPTION), "utf-8");
for (String l : lines) {
String[] split = l.split("\t");
String featureName = split[0];
if (!attributeStore.containsAttributeName(featureName)) {
FeatureType type = FeatureType.valueOf(split[1]);
String enumType = null;
if (type == FeatureType.NOMINAL) {
enumType = split[2];
}
Attribute attribute = WekaFeatureEncoder.featureToAttributeUsingFeatureDescription(featureName, type, enumType);
attributeStore.addAttribute(featureName, attribute);
}
}
// Make sure "outcome" is not the name of an attribute
List<String> outcomeList = Arrays.asList(outcomes);
outcomeAttribute = createOutcomeAttribute(outcomeList, isRegression);
if (attributeStore.containsAttributeName(CLASS_ATTRIBUTE_NAME)) {
System.err.println("A feature with name \"outcome\" was found. Renaming outcome attribute");
outcomeAttribute = outcomeAttribute.copy(CLASS_ATTRIBUTE_PREFIX + CLASS_ATTRIBUTE_NAME);
}
attributeStore.addAttribute(outcomeAttribute.name(), outcomeAttribute);
masterInstance = new Instances(WekaUtils.RELATION_NAME, attributeStore.getAttributes(), instances.size());
masterInstance.setClass(outcomeAttribute);
saver.setInstances(masterInstance);
return masterInstance;
}
use of weka.core.Attribute in project dkpro-tc by dkpro.
the class WekaFeatureEncoder method getAttributeStore.
public static AttributeStore getAttributeStore(Collection<Instance> instances) throws TextClassificationException {
AttributeStore attributeStore = new AttributeStore();
for (Instance instance : instances) {
for (Feature feature : instance.getFeatures()) {
if (!attributeStore.containsAttributeName(feature.getName())) {
Attribute attribute = featureToAttribute(feature);
attributeStore.addAttribute(feature.getName(), attribute);
}
}
}
return attributeStore;
}
use of weka.core.Attribute in project dkpro-tc by dkpro.
the class WekaFeatureEncoder method featureToAttributeUsingFeatureDescription.
public static Attribute featureToAttributeUsingFeatureDescription(String featureName, FeatureType value, String enumType) throws TextClassificationException {
String name = Utils.quote(featureName);
Attribute attribute;
// if value is a number then create a numeric attribute
if (value.equals(FeatureType.NUMERIC) || value.equals(FeatureType.BOOLEAN)) {
attribute = new Attribute(name);
} else if (value.equals(FeatureType.STRING)) {
attribute = new Attribute(name, true);
} else // if value is an Enum thene create a nominal attribute
if (value.equals(FeatureType.NOMINAL)) {
Class<?> forName = null;
try {
forName = Class.forName(enumType);
} catch (ClassNotFoundException e) {
throw new TextClassificationException(e);
}
Object[] enumConstants = forName.getEnumConstants();
ArrayList<String> attributeValues = new ArrayList<String>(enumConstants.length);
for (Object enumConstant : enumConstants) {
attributeValues.add(enumConstant.toString());
}
attribute = new Attribute(name, attributeValues);
} else {
attribute = new Attribute(name, (ArrayList<String>) null);
}
return attribute;
}
use of weka.core.Attribute in project dkpro-tc by dkpro.
the class WekaFeatureEncoder method featureToAttribute.
public static Attribute featureToAttribute(Feature feature) throws TextClassificationException {
String name = Utils.quote(feature.getName());
Object value = feature.getValue();
Attribute attribute;
// if value is a number then create a numeric attribute
if (value instanceof Number) {
attribute = new Attribute(name);
} else // if value is a boolean then create a numeric attribute
if (value instanceof Boolean) {
attribute = new Attribute(name);
} else // if value is an Enum thene create a nominal attribute
if (value instanceof Enum) {
Object[] enumConstants = value.getClass().getEnumConstants();
ArrayList<String> attributeValues = new ArrayList<String>(enumConstants.length);
for (Object enumConstant : enumConstants) {
attributeValues.add(enumConstant.toString());
}
attribute = new Attribute(name, attributeValues);
} else {
attribute = new Attribute(name, (ArrayList<String>) null);
}
return attribute;
}
use of weka.core.Attribute in project dkpro-tc by dkpro.
the class WekaOutcomeIDReport method generateSlProperties.
protected Properties generateSlProperties(Instances predictions, boolean isRegression, boolean isUnit, Map<Integer, String> documentIdMap, List<String> labels) throws Exception {
Properties props = new SortedKeyProperties();
String[] classValues = new String[predictions.numClasses()];
for (int i = 0; i < predictions.numClasses(); i++) {
classValues[i] = predictions.classAttribute().value(i);
}
int attOffset = predictions.attribute(ID_FEATURE_NAME).index();
prepareBaseline();
int idx = 0;
for (Instance inst : predictions) {
Double gold;
try {
gold = new Double(inst.value(predictions.attribute(CLASS_ATTRIBUTE_NAME + WekaUtils.COMPATIBLE_OUTCOME_CLASS)));
} catch (NullPointerException e) {
// if train and test data have not been balanced
gold = new Double(inst.value(predictions.attribute(CLASS_ATTRIBUTE_NAME)));
}
Attribute gsAtt = predictions.attribute(WekaTestTask.PREDICTION_CLASS_LABEL_NAME);
Double prediction = new Double(inst.value(gsAtt));
if (!isRegression) {
Map<String, Integer> class2number = classNamesToMapping(labels);
// Integer predictionAsNumber = class2number
// .get(gsAtt.value(prediction.intValue()));
Integer goldAsNumber = class2number.get(classValues[gold.intValue()]);
String stringValue = inst.stringValue(attOffset);
if (!isUnit && documentIdMap != null) {
stringValue = documentIdMap.get(idx++);
}
props.setProperty(stringValue, getPrediction(prediction, class2number, gsAtt) + SEPARATOR_CHAR + goldAsNumber + SEPARATOR_CHAR + String.valueOf(-1));
} else {
// the outcome is numeric
String stringValue = inst.stringValue(attOffset);
if (documentIdMap != null) {
stringValue = documentIdMap.get(idx++);
}
props.setProperty(stringValue, prediction + SEPARATOR_CHAR + gold + SEPARATOR_CHAR + String.valueOf(0));
}
}
return props;
}
Aggregations