Search in sources :

Example 1 with AttributeStore

use of org.dkpro.tc.ml.weka.util.AttributeStore in project dkpro-tc by dkpro.

the class MekaDataWriter method initalConfiguration.

private Instances initalConfiguration(Collection<Instance> instances) throws TextClassificationException, IOException {
    if (saver != null) {
        return masterInstance;
    }
    saver = new ArffSaver();
    saver.setRetrieval(Saver.INCREMENTAL);
    saver.setFile(arffTarget);
    saver.setCompressOutput(false);
    attributeStore = new AttributeStore();
    List<String> lines = FileUtils.readLines(new File(outputFolder, Constants.FILENAME_FEATURES_DESCRIPTION), "utf-8");
    for (String l : lines) {
        String[] split = l.split("\t");
        String featureName = split[0];
        if (!attributeStore.containsAttributeName(featureName)) {
            FeatureType type = FeatureType.valueOf(split[1]);
            String enumType = null;
            if (type == FeatureType.NOMINAL) {
                enumType = split[2];
            }
            Attribute attribute = WekaFeatureEncoder.featureToAttributeUsingFeatureDescription(featureName, type, enumType);
            attributeStore.addAttribute(featureName, attribute);
        }
    }
    // Make sure "outcome" is not the name of an attribute
    List<String> outcomeList = Arrays.asList(outcomes);
    outcomeAttributes = createOutcomeAttributes(outcomeList);
    // in Meka, class label attributes have to go on top
    for (Attribute attribute : outcomeAttributes) {
        attributeStore.addAttributeAtBegin(attribute.name(), attribute);
    }
    // for Meka-internal use
    masterInstance = new Instances(WekaUtils.RELATION_NAME + ": -C " + outcomeAttributes.size() + " ", attributeStore.getAttributes(), instances.size());
    masterInstance.setClassIndex(outcomeAttributes.size());
    saver.setInstances(masterInstance);
    return masterInstance;
}
Also used : AttributeStore(org.dkpro.tc.ml.weka.util.AttributeStore) Instances(weka.core.Instances) FeatureType(org.dkpro.tc.api.features.FeatureType) Attribute(weka.core.Attribute) ArffSaver(weka.core.converters.ArffSaver) File(java.io.File)

Example 2 with AttributeStore

use of org.dkpro.tc.ml.weka.util.AttributeStore in project dkpro-tc by dkpro.

the class WekaDataWriter method initalConfiguration.

private Instances initalConfiguration(Collection<Instance> instances) throws TextClassificationException, IOException {
    if (saver != null) {
        return masterInstance;
    }
    saver = new ArffSaver();
    saver.setRetrieval(Saver.INCREMENTAL);
    saver.setFile(arffTarget);
    saver.setCompressOutput(false);
    attributeStore = new AttributeStore();
    List<String> lines = FileUtils.readLines(new File(outputFolder, Constants.FILENAME_FEATURES_DESCRIPTION), "utf-8");
    for (String l : lines) {
        String[] split = l.split("\t");
        String featureName = split[0];
        if (!attributeStore.containsAttributeName(featureName)) {
            FeatureType type = FeatureType.valueOf(split[1]);
            String enumType = null;
            if (type == FeatureType.NOMINAL) {
                enumType = split[2];
            }
            Attribute attribute = WekaFeatureEncoder.featureToAttributeUsingFeatureDescription(featureName, type, enumType);
            attributeStore.addAttribute(featureName, attribute);
        }
    }
    // Make sure "outcome" is not the name of an attribute
    List<String> outcomeList = Arrays.asList(outcomes);
    outcomeAttribute = createOutcomeAttribute(outcomeList, isRegression);
    if (attributeStore.containsAttributeName(CLASS_ATTRIBUTE_NAME)) {
        System.err.println("A feature with name \"outcome\" was found. Renaming outcome attribute");
        outcomeAttribute = outcomeAttribute.copy(CLASS_ATTRIBUTE_PREFIX + CLASS_ATTRIBUTE_NAME);
    }
    attributeStore.addAttribute(outcomeAttribute.name(), outcomeAttribute);
    masterInstance = new Instances(WekaUtils.RELATION_NAME, attributeStore.getAttributes(), instances.size());
    masterInstance.setClass(outcomeAttribute);
    saver.setInstances(masterInstance);
    return masterInstance;
}
Also used : AttributeStore(org.dkpro.tc.ml.weka.util.AttributeStore) Instances(weka.core.Instances) FeatureType(org.dkpro.tc.api.features.FeatureType) Attribute(weka.core.Attribute) ArffSaver(weka.core.converters.ArffSaver) File(java.io.File)

Example 3 with AttributeStore

use of org.dkpro.tc.ml.weka.util.AttributeStore in project dkpro-tc by dkpro.

the class WekaFeatureEncoder method getAttributeStore.

public static AttributeStore getAttributeStore(Collection<Instance> instances) throws TextClassificationException {
    AttributeStore attributeStore = new AttributeStore();
    for (Instance instance : instances) {
        for (Feature feature : instance.getFeatures()) {
            if (!attributeStore.containsAttributeName(feature.getName())) {
                Attribute attribute = featureToAttribute(feature);
                attributeStore.addAttribute(feature.getName(), attribute);
            }
        }
    }
    return attributeStore;
}
Also used : AttributeStore(org.dkpro.tc.ml.weka.util.AttributeStore) Instance(org.dkpro.tc.api.features.Instance) Attribute(weka.core.Attribute) Feature(org.dkpro.tc.api.features.Feature)

Aggregations

AttributeStore (org.dkpro.tc.ml.weka.util.AttributeStore)3 Attribute (weka.core.Attribute)3 File (java.io.File)2 FeatureType (org.dkpro.tc.api.features.FeatureType)2 Instances (weka.core.Instances)2 ArffSaver (weka.core.converters.ArffSaver)2 Feature (org.dkpro.tc.api.features.Feature)1 Instance (org.dkpro.tc.api.features.Instance)1