Search in sources :

Example 16 with Attribute

use of weka.core.Attribute in project dkpro-tc by dkpro.

the class WekaUtils method getInstanceIdAttributeOffset.

/**
 * @param data
 *            weka instances
 * @return id
 */
public static int getInstanceIdAttributeOffset(Instances data) {
    int attOffset = 1;
    Enumeration<Attribute> enumeration = data.enumerateAttributes();
    while (enumeration.hasMoreElements()) {
        Attribute att = enumeration.nextElement();
        // System.out.println(att.name());
        if (att.name().equals(Constants.ID_FEATURE_NAME)) {
            return attOffset;
        }
        attOffset++;
    }
    return -1;
}
Also used : Attribute(weka.core.Attribute)

Example 17 with Attribute

use of weka.core.Attribute in project dkpro-tc by dkpro.

the class WekaUtils method getFeatureValues.

/**
 * @param attributeStore
 *            weka attribute store
 * @param instance
 *            tc instances
 * @return array of double values
 */
private static double[] getFeatureValues(AttributeStore attributeStore, Instance instance) {
    double[] featureValues = new double[attributeStore.getAttributes().size()];
    for (Feature feature : instance.getFeatures()) {
        try {
            Attribute attribute = attributeStore.getAttribute(feature.getName());
            Object featureValue = feature.getValue();
            double attributeValue;
            if (featureValue instanceof Number) {
                // numeric attribute
                attributeValue = ((Number) feature.getValue()).doubleValue();
            } else if (featureValue instanceof Boolean) {
                // boolean attribute
                attributeValue = (Boolean) featureValue ? 1.0d : 0.0d;
            } else if (featureValue == null) {
                // null
                throw new IllegalArgumentException("You have an instance which doesn't specify a value for the feature " + feature.getName());
            } else {
                // nominal or string
                Object stringValue = feature.getValue();
                if (!attribute.isNominal() && !attribute.isString()) {
                    throw new IllegalArgumentException("Attribute neither nominal nor string: " + stringValue);
                }
                int valIndex = attribute.indexOfValue(stringValue.toString());
                if (valIndex == -1) {
                    if (attribute.isNominal()) {
                        throw new IllegalArgumentException("Value not defined for given nominal attribute!");
                    } else {
                        attribute.addStringValue(stringValue.toString());
                        valIndex = attribute.indexOfValue(stringValue.toString());
                    }
                }
                attributeValue = valIndex;
            }
            int offset = attributeStore.getAttributeOffset(attribute.name());
            if (offset != -1) {
                featureValues[offset] = attributeValue;
            }
        } catch (NullPointerException e) {
        // ignore unseen attributes
        }
    }
    return featureValues;
}
Also used : Attribute(weka.core.Attribute) Feature(org.dkpro.tc.api.features.Feature)

Example 18 with Attribute

use of weka.core.Attribute in project dkpro-tc by dkpro.

the class MekaDataWriter method initalConfiguration.

private Instances initalConfiguration(Collection<Instance> instances) throws TextClassificationException, IOException {
    if (saver != null) {
        return masterInstance;
    }
    saver = new ArffSaver();
    saver.setRetrieval(Saver.INCREMENTAL);
    saver.setFile(arffTarget);
    saver.setCompressOutput(false);
    attributeStore = new AttributeStore();
    List<String> lines = FileUtils.readLines(new File(outputFolder, Constants.FILENAME_FEATURES_DESCRIPTION), "utf-8");
    for (String l : lines) {
        String[] split = l.split("\t");
        String featureName = split[0];
        if (!attributeStore.containsAttributeName(featureName)) {
            FeatureType type = FeatureType.valueOf(split[1]);
            String enumType = null;
            if (type == FeatureType.NOMINAL) {
                enumType = split[2];
            }
            Attribute attribute = WekaFeatureEncoder.featureToAttributeUsingFeatureDescription(featureName, type, enumType);
            attributeStore.addAttribute(featureName, attribute);
        }
    }
    // Make sure "outcome" is not the name of an attribute
    List<String> outcomeList = Arrays.asList(outcomes);
    outcomeAttributes = createOutcomeAttributes(outcomeList);
    // in Meka, class label attributes have to go on top
    for (Attribute attribute : outcomeAttributes) {
        attributeStore.addAttributeAtBegin(attribute.name(), attribute);
    }
    // for Meka-internal use
    masterInstance = new Instances(WekaUtils.RELATION_NAME + ": -C " + outcomeAttributes.size() + " ", attributeStore.getAttributes(), instances.size());
    masterInstance.setClassIndex(outcomeAttributes.size());
    saver.setInstances(masterInstance);
    return masterInstance;
}
Also used : AttributeStore(org.dkpro.tc.ml.weka.util.AttributeStore) Instances(weka.core.Instances) FeatureType(org.dkpro.tc.api.features.FeatureType) Attribute(weka.core.Attribute) ArffSaver(weka.core.converters.ArffSaver) File(java.io.File)

Example 19 with Attribute

use of weka.core.Attribute in project dkpro-tc by dkpro.

the class MekaDataWriter method getFeatureValues.

private double[] getFeatureValues(AttributeStore attributeStore, Instance instance) {
    double[] featureValues = new double[attributeStore.getAttributes().size()];
    for (Feature feature : instance.getFeatures()) {
        try {
            Attribute attribute = attributeStore.getAttribute(feature.getName());
            Object featureValue = feature.getValue();
            double attributeValue;
            if (featureValue instanceof Number) {
                // numeric attribute
                attributeValue = ((Number) feature.getValue()).doubleValue();
            } else if (featureValue instanceof Boolean) {
                // boolean attribute
                attributeValue = (Boolean) featureValue ? 1.0d : 0.0d;
            } else if (featureValue == null) {
                // null
                throw new IllegalArgumentException("You have an instance which doesn't specify a value for the feature " + feature.getName());
            } else {
                // nominal or string
                Object stringValue = feature.getValue();
                if (!attribute.isNominal() && !attribute.isString()) {
                    throw new IllegalArgumentException("Attribute neither nominal nor string: " + stringValue);
                }
                int valIndex = attribute.indexOfValue(stringValue.toString());
                if (valIndex == -1) {
                    if (attribute.isNominal()) {
                        throw new IllegalArgumentException("Value not defined for given nominal attribute!");
                    } else {
                        attribute.addStringValue(stringValue.toString());
                        valIndex = attribute.indexOfValue(stringValue.toString());
                    }
                }
                attributeValue = valIndex;
            }
            int offset = attributeStore.getAttributeOffset(attribute.name());
            if (offset != -1) {
                featureValues[offset] = attributeValue;
            }
        } catch (NullPointerException e) {
        // ignore unseen attributes
        }
    }
    return featureValues;
}
Also used : Attribute(weka.core.Attribute) Feature(org.dkpro.tc.api.features.Feature)

Example 20 with Attribute

use of weka.core.Attribute in project dkpro-tc by dkpro.

the class MekaDataWriter method writeClassifierFormat.

@Override
public void writeClassifierFormat(Collection<Instance> instances) throws AnalysisEngineProcessException {
    try {
        Instances masterInstance = initalConfiguration(instances);
        for (Instance instance : instances) {
            double[] featureValues = getFeatureValues(attributeStore, instance);
            // set class label values
            List<String> instanceOutcome = instance.getOutcomes();
            for (Attribute label : outcomeAttributes) {
                String labelname = label.name();
                featureValues[attributeStore.getAttributeOffset(labelname)] = instanceOutcome.contains(labelname.split(CLASS_ATTRIBUTE_PREFIX)[1]) ? 1.0d : 0.0d;
            }
            weka.core.Instance wekaInstance;
            if (useSparse) {
                wekaInstance = new SparseInstance(1.0, featureValues);
            } else {
                wekaInstance = new DenseInstance(1.0, featureValues);
            }
            wekaInstance.setDataset(masterInstance);
            Double instanceWeight = instance.getWeight();
            if (applyWeighting) {
                wekaInstance.setWeight(instanceWeight);
            }
            saver.writeIncremental(wekaInstance);
        }
    } catch (Exception e) {
        throw new AnalysisEngineProcessException(e);
    }
}
Also used : DenseInstance(weka.core.DenseInstance) SparseInstance(weka.core.SparseInstance) DenseInstance(weka.core.DenseInstance) SparseInstance(weka.core.SparseInstance) Instance(org.dkpro.tc.api.features.Instance) Attribute(weka.core.Attribute) AnalysisEngineProcessException(org.apache.uima.analysis_engine.AnalysisEngineProcessException) IOException(java.io.IOException) TextClassificationException(org.dkpro.tc.api.exception.TextClassificationException) AnalysisEngineProcessException(org.apache.uima.analysis_engine.AnalysisEngineProcessException) Instances(weka.core.Instances)

Aggregations

Attribute (weka.core.Attribute)28 ArrayList (java.util.ArrayList)12 Instances (weka.core.Instances)12 Feature (org.dkpro.tc.api.features.Feature)8 Instance (org.dkpro.tc.api.features.Instance)8 SparseInstance (weka.core.SparseInstance)5 Test (org.junit.Test)4 DenseInstance (weka.core.DenseInstance)4 Instance (weka.core.Instance)4 ArffSaver (weka.core.converters.ArffSaver)4 File (java.io.File)3 MultiLabelInstances (mulan.data.MultiLabelInstances)3 TextClassificationException (org.dkpro.tc.api.exception.TextClassificationException)3 AttributeStore (org.dkpro.tc.ml.weka.util.AttributeStore)3 FastVector (weka.core.FastVector)3 IOException (java.io.IOException)2 HashMap (java.util.HashMap)2 Optional (java.util.Optional)2 FeatureType (org.dkpro.tc.api.features.FeatureType)2 Request (io.milton.http.Request)1