use of org.dkpro.tc.api.features.FeatureType in project dkpro-tc by dkpro.
the class FeatureMetaData method typeDescriptionToString.
private String typeDescriptionToString() {
StringBuilder sb = new StringBuilder();
List<String> keyList = new ArrayList<String>(featDesc.keySet());
Collections.sort(keyList);
for (String k : keyList) {
FeatureType type = featDesc.get(k);
sb.append(k + "\t" + type.toString());
if (type == FeatureType.NOMINAL) {
sb.append("\t" + enumFeatureName.get(k));
}
sb.append("\n");
}
return sb.toString();
}
use of org.dkpro.tc.api.features.FeatureType in project dkpro-tc by dkpro.
the class MekaDataWriter method initalConfiguration.
private Instances initalConfiguration(Collection<Instance> instances) throws TextClassificationException, IOException {
if (saver != null) {
return masterInstance;
}
saver = new ArffSaver();
saver.setRetrieval(Saver.INCREMENTAL);
saver.setFile(arffTarget);
saver.setCompressOutput(false);
attributeStore = new AttributeStore();
List<String> lines = FileUtils.readLines(new File(outputFolder, Constants.FILENAME_FEATURES_DESCRIPTION), "utf-8");
for (String l : lines) {
String[] split = l.split("\t");
String featureName = split[0];
if (!attributeStore.containsAttributeName(featureName)) {
FeatureType type = FeatureType.valueOf(split[1]);
String enumType = null;
if (type == FeatureType.NOMINAL) {
enumType = split[2];
}
Attribute attribute = WekaFeatureEncoder.featureToAttributeUsingFeatureDescription(featureName, type, enumType);
attributeStore.addAttribute(featureName, attribute);
}
}
// Make sure "outcome" is not the name of an attribute
List<String> outcomeList = Arrays.asList(outcomes);
outcomeAttributes = createOutcomeAttributes(outcomeList);
// in Meka, class label attributes have to go on top
for (Attribute attribute : outcomeAttributes) {
attributeStore.addAttributeAtBegin(attribute.name(), attribute);
}
// for Meka-internal use
masterInstance = new Instances(WekaUtils.RELATION_NAME + ": -C " + outcomeAttributes.size() + " ", attributeStore.getAttributes(), instances.size());
masterInstance.setClassIndex(outcomeAttributes.size());
saver.setInstances(masterInstance);
return masterInstance;
}
use of org.dkpro.tc.api.features.FeatureType in project dkpro-tc by dkpro.
the class WekaDataWriter method initalConfiguration.
private Instances initalConfiguration(Collection<Instance> instances) throws TextClassificationException, IOException {
if (saver != null) {
return masterInstance;
}
saver = new ArffSaver();
saver.setRetrieval(Saver.INCREMENTAL);
saver.setFile(arffTarget);
saver.setCompressOutput(false);
attributeStore = new AttributeStore();
List<String> lines = FileUtils.readLines(new File(outputFolder, Constants.FILENAME_FEATURES_DESCRIPTION), "utf-8");
for (String l : lines) {
String[] split = l.split("\t");
String featureName = split[0];
if (!attributeStore.containsAttributeName(featureName)) {
FeatureType type = FeatureType.valueOf(split[1]);
String enumType = null;
if (type == FeatureType.NOMINAL) {
enumType = split[2];
}
Attribute attribute = WekaFeatureEncoder.featureToAttributeUsingFeatureDescription(featureName, type, enumType);
attributeStore.addAttribute(featureName, attribute);
}
}
// Make sure "outcome" is not the name of an attribute
List<String> outcomeList = Arrays.asList(outcomes);
outcomeAttribute = createOutcomeAttribute(outcomeList, isRegression);
if (attributeStore.containsAttributeName(CLASS_ATTRIBUTE_NAME)) {
System.err.println("A feature with name \"outcome\" was found. Renaming outcome attribute");
outcomeAttribute = outcomeAttribute.copy(CLASS_ATTRIBUTE_PREFIX + CLASS_ATTRIBUTE_NAME);
}
attributeStore.addAttribute(outcomeAttribute.name(), outcomeAttribute);
masterInstance = new Instances(WekaUtils.RELATION_NAME, attributeStore.getAttributes(), instances.size());
masterInstance.setClass(outcomeAttribute);
saver.setInstances(masterInstance);
return masterInstance;
}
Aggregations