Search in sources :

Example 21 with Attribute

use of weka.core.Attribute in project dkpro-tc by dkpro.

the class WekaDataWriter method initalConfiguration.

private Instances initalConfiguration(Collection<Instance> instances) throws TextClassificationException, IOException {
    if (saver != null) {
        return masterInstance;
    }
    saver = new ArffSaver();
    saver.setRetrieval(Saver.INCREMENTAL);
    saver.setFile(arffTarget);
    saver.setCompressOutput(false);
    attributeStore = new AttributeStore();
    List<String> lines = FileUtils.readLines(new File(outputFolder, Constants.FILENAME_FEATURES_DESCRIPTION), "utf-8");
    for (String l : lines) {
        String[] split = l.split("\t");
        String featureName = split[0];
        if (!attributeStore.containsAttributeName(featureName)) {
            FeatureType type = FeatureType.valueOf(split[1]);
            String enumType = null;
            if (type == FeatureType.NOMINAL) {
                enumType = split[2];
            }
            Attribute attribute = WekaFeatureEncoder.featureToAttributeUsingFeatureDescription(featureName, type, enumType);
            attributeStore.addAttribute(featureName, attribute);
        }
    }
    // Make sure "outcome" is not the name of an attribute
    List<String> outcomeList = Arrays.asList(outcomes);
    outcomeAttribute = createOutcomeAttribute(outcomeList, isRegression);
    if (attributeStore.containsAttributeName(CLASS_ATTRIBUTE_NAME)) {
        System.err.println("A feature with name \"outcome\" was found. Renaming outcome attribute");
        outcomeAttribute = outcomeAttribute.copy(CLASS_ATTRIBUTE_PREFIX + CLASS_ATTRIBUTE_NAME);
    }
    attributeStore.addAttribute(outcomeAttribute.name(), outcomeAttribute);
    masterInstance = new Instances(WekaUtils.RELATION_NAME, attributeStore.getAttributes(), instances.size());
    masterInstance.setClass(outcomeAttribute);
    saver.setInstances(masterInstance);
    return masterInstance;
}
Also used : AttributeStore(org.dkpro.tc.ml.weka.util.AttributeStore) Instances(weka.core.Instances) FeatureType(org.dkpro.tc.api.features.FeatureType) Attribute(weka.core.Attribute) ArffSaver(weka.core.converters.ArffSaver) File(java.io.File)

Example 22 with Attribute

use of weka.core.Attribute in project dkpro-tc by dkpro.

the class WekaFeatureEncoder method getAttributeStore.

public static AttributeStore getAttributeStore(Collection<Instance> instances) throws TextClassificationException {
    AttributeStore attributeStore = new AttributeStore();
    for (Instance instance : instances) {
        for (Feature feature : instance.getFeatures()) {
            if (!attributeStore.containsAttributeName(feature.getName())) {
                Attribute attribute = featureToAttribute(feature);
                attributeStore.addAttribute(feature.getName(), attribute);
            }
        }
    }
    return attributeStore;
}
Also used : AttributeStore(org.dkpro.tc.ml.weka.util.AttributeStore) Instance(org.dkpro.tc.api.features.Instance) Attribute(weka.core.Attribute) Feature(org.dkpro.tc.api.features.Feature)

Example 23 with Attribute

use of weka.core.Attribute in project dkpro-tc by dkpro.

the class WekaFeatureEncoder method featureToAttributeUsingFeatureDescription.

public static Attribute featureToAttributeUsingFeatureDescription(String featureName, FeatureType value, String enumType) throws TextClassificationException {
    String name = Utils.quote(featureName);
    Attribute attribute;
    // if value is a number then create a numeric attribute
    if (value.equals(FeatureType.NUMERIC) || value.equals(FeatureType.BOOLEAN)) {
        attribute = new Attribute(name);
    } else if (value.equals(FeatureType.STRING)) {
        attribute = new Attribute(name, true);
    } else // if value is an Enum thene create a nominal attribute
    if (value.equals(FeatureType.NOMINAL)) {
        Class<?> forName = null;
        try {
            forName = Class.forName(enumType);
        } catch (ClassNotFoundException e) {
            throw new TextClassificationException(e);
        }
        Object[] enumConstants = forName.getEnumConstants();
        ArrayList<String> attributeValues = new ArrayList<String>(enumConstants.length);
        for (Object enumConstant : enumConstants) {
            attributeValues.add(enumConstant.toString());
        }
        attribute = new Attribute(name, attributeValues);
    } else {
        attribute = new Attribute(name, (ArrayList<String>) null);
    }
    return attribute;
}
Also used : Attribute(weka.core.Attribute) TextClassificationException(org.dkpro.tc.api.exception.TextClassificationException) ArrayList(java.util.ArrayList)

Example 24 with Attribute

use of weka.core.Attribute in project dkpro-tc by dkpro.

the class WekaFeatureEncoder method featureToAttribute.

public static Attribute featureToAttribute(Feature feature) throws TextClassificationException {
    String name = Utils.quote(feature.getName());
    Object value = feature.getValue();
    Attribute attribute;
    // if value is a number then create a numeric attribute
    if (value instanceof Number) {
        attribute = new Attribute(name);
    } else // if value is a boolean then create a numeric attribute
    if (value instanceof Boolean) {
        attribute = new Attribute(name);
    } else // if value is an Enum thene create a nominal attribute
    if (value instanceof Enum) {
        Object[] enumConstants = value.getClass().getEnumConstants();
        ArrayList<String> attributeValues = new ArrayList<String>(enumConstants.length);
        for (Object enumConstant : enumConstants) {
            attributeValues.add(enumConstant.toString());
        }
        attribute = new Attribute(name, attributeValues);
    } else {
        attribute = new Attribute(name, (ArrayList<String>) null);
    }
    return attribute;
}
Also used : Attribute(weka.core.Attribute) ArrayList(java.util.ArrayList)

Example 25 with Attribute

use of weka.core.Attribute in project dkpro-tc by dkpro.

the class WekaOutcomeIDReport method generateSlProperties.

protected Properties generateSlProperties(Instances predictions, boolean isRegression, boolean isUnit, Map<Integer, String> documentIdMap, List<String> labels) throws Exception {
    Properties props = new SortedKeyProperties();
    String[] classValues = new String[predictions.numClasses()];
    for (int i = 0; i < predictions.numClasses(); i++) {
        classValues[i] = predictions.classAttribute().value(i);
    }
    int attOffset = predictions.attribute(ID_FEATURE_NAME).index();
    prepareBaseline();
    int idx = 0;
    for (Instance inst : predictions) {
        Double gold;
        try {
            gold = new Double(inst.value(predictions.attribute(CLASS_ATTRIBUTE_NAME + WekaUtils.COMPATIBLE_OUTCOME_CLASS)));
        } catch (NullPointerException e) {
            // if train and test data have not been balanced
            gold = new Double(inst.value(predictions.attribute(CLASS_ATTRIBUTE_NAME)));
        }
        Attribute gsAtt = predictions.attribute(WekaTestTask.PREDICTION_CLASS_LABEL_NAME);
        Double prediction = new Double(inst.value(gsAtt));
        if (!isRegression) {
            Map<String, Integer> class2number = classNamesToMapping(labels);
            // Integer predictionAsNumber = class2number
            // .get(gsAtt.value(prediction.intValue()));
            Integer goldAsNumber = class2number.get(classValues[gold.intValue()]);
            String stringValue = inst.stringValue(attOffset);
            if (!isUnit && documentIdMap != null) {
                stringValue = documentIdMap.get(idx++);
            }
            props.setProperty(stringValue, getPrediction(prediction, class2number, gsAtt) + SEPARATOR_CHAR + goldAsNumber + SEPARATOR_CHAR + String.valueOf(-1));
        } else {
            // the outcome is numeric
            String stringValue = inst.stringValue(attOffset);
            if (documentIdMap != null) {
                stringValue = documentIdMap.get(idx++);
            }
            props.setProperty(stringValue, prediction + SEPARATOR_CHAR + gold + SEPARATOR_CHAR + String.valueOf(0));
        }
    }
    return props;
}
Also used : SortedKeyProperties(org.dkpro.tc.ml.report.util.SortedKeyProperties) Instance(weka.core.Instance) Attribute(weka.core.Attribute) Properties(java.util.Properties) SortedKeyProperties(org.dkpro.tc.ml.report.util.SortedKeyProperties)

Aggregations

Attribute (weka.core.Attribute)28 ArrayList (java.util.ArrayList)12 Instances (weka.core.Instances)12 Feature (org.dkpro.tc.api.features.Feature)8 Instance (org.dkpro.tc.api.features.Instance)8 SparseInstance (weka.core.SparseInstance)5 Test (org.junit.Test)4 DenseInstance (weka.core.DenseInstance)4 Instance (weka.core.Instance)4 ArffSaver (weka.core.converters.ArffSaver)4 File (java.io.File)3 MultiLabelInstances (mulan.data.MultiLabelInstances)3 TextClassificationException (org.dkpro.tc.api.exception.TextClassificationException)3 AttributeStore (org.dkpro.tc.ml.weka.util.AttributeStore)3 FastVector (weka.core.FastVector)3 IOException (java.io.IOException)2 HashMap (java.util.HashMap)2 Optional (java.util.Optional)2 FeatureType (org.dkpro.tc.api.features.FeatureType)2 Request (io.milton.http.Request)1