Search in sources :

Example 1 with FeatureType

use of org.dkpro.tc.api.features.FeatureType in project dkpro-tc by dkpro.

the class FeatureMetaData method typeDescriptionToString.

private String typeDescriptionToString() {
    StringBuilder sb = new StringBuilder();
    List<String> keyList = new ArrayList<String>(featDesc.keySet());
    Collections.sort(keyList);
    for (String k : keyList) {
        FeatureType type = featDesc.get(k);
        sb.append(k + "\t" + type.toString());
        if (type == FeatureType.NOMINAL) {
            sb.append("\t" + enumFeatureName.get(k));
        }
        sb.append("\n");
    }
    return sb.toString();
}
Also used : FeatureType(org.dkpro.tc.api.features.FeatureType) ArrayList(java.util.ArrayList)

Example 2 with FeatureType

use of org.dkpro.tc.api.features.FeatureType in project dkpro-tc by dkpro.

the class MekaDataWriter method initalConfiguration.

private Instances initalConfiguration(Collection<Instance> instances) throws TextClassificationException, IOException {
    if (saver != null) {
        return masterInstance;
    }
    saver = new ArffSaver();
    saver.setRetrieval(Saver.INCREMENTAL);
    saver.setFile(arffTarget);
    saver.setCompressOutput(false);
    attributeStore = new AttributeStore();
    List<String> lines = FileUtils.readLines(new File(outputFolder, Constants.FILENAME_FEATURES_DESCRIPTION), "utf-8");
    for (String l : lines) {
        String[] split = l.split("\t");
        String featureName = split[0];
        if (!attributeStore.containsAttributeName(featureName)) {
            FeatureType type = FeatureType.valueOf(split[1]);
            String enumType = null;
            if (type == FeatureType.NOMINAL) {
                enumType = split[2];
            }
            Attribute attribute = WekaFeatureEncoder.featureToAttributeUsingFeatureDescription(featureName, type, enumType);
            attributeStore.addAttribute(featureName, attribute);
        }
    }
    // Make sure "outcome" is not the name of an attribute
    List<String> outcomeList = Arrays.asList(outcomes);
    outcomeAttributes = createOutcomeAttributes(outcomeList);
    // in Meka, class label attributes have to go on top
    for (Attribute attribute : outcomeAttributes) {
        attributeStore.addAttributeAtBegin(attribute.name(), attribute);
    }
    // for Meka-internal use
    masterInstance = new Instances(WekaUtils.RELATION_NAME + ": -C " + outcomeAttributes.size() + " ", attributeStore.getAttributes(), instances.size());
    masterInstance.setClassIndex(outcomeAttributes.size());
    saver.setInstances(masterInstance);
    return masterInstance;
}
Also used : AttributeStore(org.dkpro.tc.ml.weka.util.AttributeStore) Instances(weka.core.Instances) FeatureType(org.dkpro.tc.api.features.FeatureType) Attribute(weka.core.Attribute) ArffSaver(weka.core.converters.ArffSaver) File(java.io.File)

Example 3 with FeatureType

use of org.dkpro.tc.api.features.FeatureType in project dkpro-tc by dkpro.

the class WekaDataWriter method initalConfiguration.

private Instances initalConfiguration(Collection<Instance> instances) throws TextClassificationException, IOException {
    if (saver != null) {
        return masterInstance;
    }
    saver = new ArffSaver();
    saver.setRetrieval(Saver.INCREMENTAL);
    saver.setFile(arffTarget);
    saver.setCompressOutput(false);
    attributeStore = new AttributeStore();
    List<String> lines = FileUtils.readLines(new File(outputFolder, Constants.FILENAME_FEATURES_DESCRIPTION), "utf-8");
    for (String l : lines) {
        String[] split = l.split("\t");
        String featureName = split[0];
        if (!attributeStore.containsAttributeName(featureName)) {
            FeatureType type = FeatureType.valueOf(split[1]);
            String enumType = null;
            if (type == FeatureType.NOMINAL) {
                enumType = split[2];
            }
            Attribute attribute = WekaFeatureEncoder.featureToAttributeUsingFeatureDescription(featureName, type, enumType);
            attributeStore.addAttribute(featureName, attribute);
        }
    }
    // Make sure "outcome" is not the name of an attribute
    List<String> outcomeList = Arrays.asList(outcomes);
    outcomeAttribute = createOutcomeAttribute(outcomeList, isRegression);
    if (attributeStore.containsAttributeName(CLASS_ATTRIBUTE_NAME)) {
        System.err.println("A feature with name \"outcome\" was found. Renaming outcome attribute");
        outcomeAttribute = outcomeAttribute.copy(CLASS_ATTRIBUTE_PREFIX + CLASS_ATTRIBUTE_NAME);
    }
    attributeStore.addAttribute(outcomeAttribute.name(), outcomeAttribute);
    masterInstance = new Instances(WekaUtils.RELATION_NAME, attributeStore.getAttributes(), instances.size());
    masterInstance.setClass(outcomeAttribute);
    saver.setInstances(masterInstance);
    return masterInstance;
}
Also used : AttributeStore(org.dkpro.tc.ml.weka.util.AttributeStore) Instances(weka.core.Instances) FeatureType(org.dkpro.tc.api.features.FeatureType) Attribute(weka.core.Attribute) ArffSaver(weka.core.converters.ArffSaver) File(java.io.File)

Aggregations

FeatureType (org.dkpro.tc.api.features.FeatureType)3 File (java.io.File)2 AttributeStore (org.dkpro.tc.ml.weka.util.AttributeStore)2 Attribute (weka.core.Attribute)2 Instances (weka.core.Instances)2 ArffSaver (weka.core.converters.ArffSaver)2 ArrayList (java.util.ArrayList)1