use of org.dkpro.tc.ml.weka.util.AttributeStore in project dkpro-tc by dkpro.
the class MekaDataWriter method initalConfiguration.
private Instances initalConfiguration(Collection<Instance> instances) throws TextClassificationException, IOException {
if (saver != null) {
return masterInstance;
}
saver = new ArffSaver();
saver.setRetrieval(Saver.INCREMENTAL);
saver.setFile(arffTarget);
saver.setCompressOutput(false);
attributeStore = new AttributeStore();
List<String> lines = FileUtils.readLines(new File(outputFolder, Constants.FILENAME_FEATURES_DESCRIPTION), "utf-8");
for (String l : lines) {
String[] split = l.split("\t");
String featureName = split[0];
if (!attributeStore.containsAttributeName(featureName)) {
FeatureType type = FeatureType.valueOf(split[1]);
String enumType = null;
if (type == FeatureType.NOMINAL) {
enumType = split[2];
}
Attribute attribute = WekaFeatureEncoder.featureToAttributeUsingFeatureDescription(featureName, type, enumType);
attributeStore.addAttribute(featureName, attribute);
}
}
// Make sure "outcome" is not the name of an attribute
List<String> outcomeList = Arrays.asList(outcomes);
outcomeAttributes = createOutcomeAttributes(outcomeList);
// in Meka, class label attributes have to go on top
for (Attribute attribute : outcomeAttributes) {
attributeStore.addAttributeAtBegin(attribute.name(), attribute);
}
// for Meka-internal use
masterInstance = new Instances(WekaUtils.RELATION_NAME + ": -C " + outcomeAttributes.size() + " ", attributeStore.getAttributes(), instances.size());
masterInstance.setClassIndex(outcomeAttributes.size());
saver.setInstances(masterInstance);
return masterInstance;
}
use of org.dkpro.tc.ml.weka.util.AttributeStore in project dkpro-tc by dkpro.
the class WekaDataWriter method initalConfiguration.
private Instances initalConfiguration(Collection<Instance> instances) throws TextClassificationException, IOException {
if (saver != null) {
return masterInstance;
}
saver = new ArffSaver();
saver.setRetrieval(Saver.INCREMENTAL);
saver.setFile(arffTarget);
saver.setCompressOutput(false);
attributeStore = new AttributeStore();
List<String> lines = FileUtils.readLines(new File(outputFolder, Constants.FILENAME_FEATURES_DESCRIPTION), "utf-8");
for (String l : lines) {
String[] split = l.split("\t");
String featureName = split[0];
if (!attributeStore.containsAttributeName(featureName)) {
FeatureType type = FeatureType.valueOf(split[1]);
String enumType = null;
if (type == FeatureType.NOMINAL) {
enumType = split[2];
}
Attribute attribute = WekaFeatureEncoder.featureToAttributeUsingFeatureDescription(featureName, type, enumType);
attributeStore.addAttribute(featureName, attribute);
}
}
// Make sure "outcome" is not the name of an attribute
List<String> outcomeList = Arrays.asList(outcomes);
outcomeAttribute = createOutcomeAttribute(outcomeList, isRegression);
if (attributeStore.containsAttributeName(CLASS_ATTRIBUTE_NAME)) {
System.err.println("A feature with name \"outcome\" was found. Renaming outcome attribute");
outcomeAttribute = outcomeAttribute.copy(CLASS_ATTRIBUTE_PREFIX + CLASS_ATTRIBUTE_NAME);
}
attributeStore.addAttribute(outcomeAttribute.name(), outcomeAttribute);
masterInstance = new Instances(WekaUtils.RELATION_NAME, attributeStore.getAttributes(), instances.size());
masterInstance.setClass(outcomeAttribute);
saver.setInstances(masterInstance);
return masterInstance;
}
use of org.dkpro.tc.ml.weka.util.AttributeStore in project dkpro-tc by dkpro.
the class WekaFeatureEncoder method getAttributeStore.
public static AttributeStore getAttributeStore(Collection<Instance> instances) throws TextClassificationException {
AttributeStore attributeStore = new AttributeStore();
for (Instance instance : instances) {
for (Feature feature : instance.getFeatures()) {
if (!attributeStore.containsAttributeName(feature.getName())) {
Attribute attribute = featureToAttribute(feature);
attributeStore.addAttribute(feature.getName(), attribute);
}
}
}
return attributeStore;
}
Aggregations