Search in sources :

Example 1 with SparseInstance

use of weka.core.SparseInstance in project dkpro-tc by dkpro.

the class WekaUtils method instanceListToArffFile.

/**
 * Converts a feature store to a list of instances. Single-label case.
 *
 * @param outputFile
 *            the output file
 * @param instanceList
 *            the instance list
 * @param useDenseInstances
 *            use dense instances
 * @param isRegressionExperiment
 *            is regression
 * @param useWeights
 *            uses weight
 * @throws Exception
 *             in case of error
 */
public static void instanceListToArffFile(File outputFile, List<Instance> instanceList, boolean useDenseInstances, boolean isRegressionExperiment, boolean useWeights) throws Exception {
    List<String> outcomeList = new ArrayList<>();
    for (Instance i : instanceList) {
        outcomeList.add(i.getOutcome());
    }
    // check for error conditions
    if (outcomeList.isEmpty()) {
        throw new IllegalArgumentException("List of instance outcomes is empty.");
    }
    // Filter preprocessingFilter = new ReplaceMissingValuesWithZeroFilter();
    AttributeStore attributeStore = WekaFeatureEncoder.getAttributeStore(instanceList);
    // Make sure "outcome" is not the name of an attribute
    Attribute outcomeAttribute = createOutcomeAttribute(outcomeList, isRegressionExperiment);
    if (attributeStore.containsAttributeName(CLASS_ATTRIBUTE_NAME)) {
        System.err.println("A feature with name \"outcome\" was found. Renaming outcome attribute");
        outcomeAttribute = outcomeAttribute.copy(CLASS_ATTRIBUTE_PREFIX + CLASS_ATTRIBUTE_NAME);
    }
    attributeStore.addAttribute(outcomeAttribute.name(), outcomeAttribute);
    Instances wekaInstances = new Instances(RELATION_NAME, attributeStore.getAttributes(), instanceList.size());
    wekaInstances.setClass(outcomeAttribute);
    if (!outputFile.exists()) {
        outputFile.mkdirs();
        outputFile.createNewFile();
    }
    ArffSaver saver = new ArffSaver();
    // preprocessingFilter.setInputFormat(wekaInstances);
    saver.setRetrieval(Saver.INCREMENTAL);
    saver.setFile(outputFile);
    saver.setCompressOutput(true);
    saver.setInstances(wekaInstances);
    for (int i = 0; i < instanceList.size(); i++) {
        Instance instance = instanceList.get(i);
        double[] featureValues = getFeatureValues(attributeStore, instance);
        weka.core.Instance wekaInstance;
        if (useDenseInstances) {
            wekaInstance = new DenseInstance(1.0, featureValues);
        } else {
            wekaInstance = new SparseInstance(1.0, featureValues);
        }
        wekaInstance.setDataset(wekaInstances);
        String outcome = outcomeList.get(i);
        if (isRegressionExperiment) {
            wekaInstance.setClassValue(Double.parseDouble(outcome));
        } else {
            wekaInstance.setClassValue(outcome);
        }
        Double instanceWeight = instance.getWeight();
        if (useWeights) {
            wekaInstance.setWeight(instanceWeight);
        }
        // preprocessingFilter.input(wekaInstance);
        // saver.writeIncremental(preprocessingFilter.output());
        saver.writeIncremental(wekaInstance);
    }
    // finishes the incremental saving process
    saver.writeIncremental(null);
}
Also used : DenseInstance(weka.core.DenseInstance) SparseInstance(weka.core.SparseInstance) DenseInstance(weka.core.DenseInstance) SparseInstance(weka.core.SparseInstance) Instance(org.dkpro.tc.api.features.Instance) Attribute(weka.core.Attribute) ArrayList(java.util.ArrayList) ArffSaver(weka.core.converters.ArffSaver) Instances(weka.core.Instances) MultiLabelInstances(mulan.data.MultiLabelInstances)

Example 2 with SparseInstance

use of weka.core.SparseInstance in project dkpro-tc by dkpro.

the class ReplaceMissingValuesWithZeroFilter method convertInstance.

/**
 * Convert a single instance over. The converted instance is added to the end of the output
 * queue.
 *
 * @param instance
 *            the instance to convert
 */
private void convertInstance(Instance instance) {
    Instance inst = null;
    if (instance instanceof SparseInstance) {
        double[] vals = new double[instance.numValues()];
        int[] indices = new int[instance.numValues()];
        int num = 0;
        for (int j = 0; j < instance.numValues(); j++) {
            if (instance.isMissingSparse(j) && (getInputFormat().classIndex() != instance.index(j)) && (instance.attributeSparse(j).isNominal() || instance.attributeSparse(j).isNumeric())) {
            } else {
                vals[num] = instance.valueSparse(j);
                indices[num] = instance.index(j);
                num++;
            }
        }
        if (num == instance.numValues()) {
            inst = new SparseInstance(instance.weight(), vals, indices, instance.numAttributes());
        } else {
            double[] tempVals = new double[num];
            int[] tempInd = new int[num];
            System.arraycopy(vals, 0, tempVals, 0, num);
            System.arraycopy(indices, 0, tempInd, 0, num);
            inst = new SparseInstance(instance.weight(), tempVals, tempInd, instance.numAttributes());
        }
    } else {
        double[] vals = new double[getInputFormat().numAttributes()];
        for (int j = 0; j < instance.numAttributes(); j++) {
            if (instance.isMissing(j) && (getInputFormat().classIndex() != j) && (getInputFormat().attribute(j).isNominal() || getInputFormat().attribute(j).isNumeric())) {
                vals[j] = 0.0d;
            } else {
                vals[j] = instance.value(j);
            }
        }
        inst = new DenseInstance(instance.weight(), vals);
    }
    inst.setDataset(instance.dataset());
    push(inst);
}
Also used : DenseInstance(weka.core.DenseInstance) SparseInstance(weka.core.SparseInstance) Instance(weka.core.Instance) DenseInstance(weka.core.DenseInstance) SparseInstance(weka.core.SparseInstance)

Example 3 with SparseInstance

use of weka.core.SparseInstance in project dkpro-tc by dkpro.

the class WekaUtils method instanceListToArffFileMultiLabel.

/**
 * Converts a feature store to a list of instances. Multi-label case.
 *
 * @param outputFile
 *            the output file
 * @param instances
 *            the instances to convert
 * @param useDenseInstances
 *            dense features
 * @param useWeights
 *            use weights
 * @throws Exception
 *             in case of errors
 */
public static void instanceListToArffFileMultiLabel(File outputFile, List<Instance> instances, boolean useDenseInstances, boolean useWeights) throws Exception {
    // Filter preprocessingFilter = new ReplaceMissingValuesWithZeroFilter();
    AttributeStore attributeStore = WekaFeatureEncoder.getAttributeStore(instances);
    List<String> outcomes = new ArrayList<>();
    for (Instance i : instances) {
        outcomes.add(i.getOutcome());
    }
    List<Attribute> outcomeAttributes = createOutcomeAttributes(new ArrayList<String>(outcomes));
    // in Meka, class label attributes have to go on top
    for (Attribute attribute : outcomeAttributes) {
        attributeStore.addAttributeAtBegin(attribute.name(), attribute);
    }
    // for Meka-internal use
    Instances wekaInstances = new Instances(RELATION_NAME + ": -C " + outcomeAttributes.size() + " ", attributeStore.getAttributes(), instances.size());
    wekaInstances.setClassIndex(outcomeAttributes.size());
    if (!outputFile.exists()) {
        outputFile.mkdirs();
        outputFile.createNewFile();
    }
    ArffSaver saver = new ArffSaver();
    // preprocessingFilter.setInputFormat(wekaInstances);
    saver.setRetrieval(Saver.INCREMENTAL);
    saver.setFile(outputFile);
    saver.setCompressOutput(true);
    saver.setInstances(wekaInstances);
    for (int i = 0; i < instances.size(); i++) {
        Instance instance = instances.get(i);
        double[] featureValues = getFeatureValues(attributeStore, instance);
        // set class label values
        List<String> instanceOutcome = instance.getOutcomes();
        for (Attribute label : outcomeAttributes) {
            String labelname = label.name();
            featureValues[attributeStore.getAttributeOffset(labelname)] = instanceOutcome.contains(labelname.split(CLASS_ATTRIBUTE_PREFIX)[1]) ? 1.0d : 0.0d;
        }
        weka.core.Instance wekaInstance;
        if (useDenseInstances) {
            wekaInstance = new DenseInstance(1.0, featureValues);
        } else {
            wekaInstance = new SparseInstance(1.0, featureValues);
        }
        wekaInstance.setDataset(wekaInstances);
        Double instanceWeight = instance.getWeight();
        if (useWeights) {
            wekaInstance.setWeight(instanceWeight);
        }
        // preprocessingFilter.input(wekaInstance);
        // saver.writeIncremental(preprocessingFilter.output());
        saver.writeIncremental(wekaInstance);
    }
    // finishes the incremental saving process
    saver.writeIncremental(null);
}
Also used : DenseInstance(weka.core.DenseInstance) SparseInstance(weka.core.SparseInstance) DenseInstance(weka.core.DenseInstance) SparseInstance(weka.core.SparseInstance) Instance(org.dkpro.tc.api.features.Instance) Attribute(weka.core.Attribute) ArrayList(java.util.ArrayList) ArffSaver(weka.core.converters.ArffSaver) Instances(weka.core.Instances) MultiLabelInstances(mulan.data.MultiLabelInstances)

Example 4 with SparseInstance

use of weka.core.SparseInstance in project dkpro-tc by dkpro.

the class WekaUtils method tcInstanceToMekaInstance.

/**
 * Converts a TC instance object into a Meka instance object, compatible with the given
 * attribute set and class labels.
 *
 * @param instance
 *            tc instance
 * @param trainingData
 *            training data
 * @param allClassLabels
 *            all labels
 * @return weka instance
 * @throws Exception
 *             in case of errors
 */
public static weka.core.Instance tcInstanceToMekaInstance(Instance instance, Instances trainingData, List<String> allClassLabels) throws Exception {
    AttributeStore attributeStore = new AttributeStore();
    List<Attribute> outcomeAttributes = createOutcomeAttributes(allClassLabels);
    // in Meka, class label attributes have to go on top
    for (Attribute attribute : outcomeAttributes) {
        attributeStore.addAttributeAtBegin(attribute.name(), attribute);
    }
    for (int i = outcomeAttributes.size(); i < trainingData.numAttributes(); i++) {
        attributeStore.addAttribute(trainingData.attribute(i).name(), trainingData.attribute(i));
    }
    double[] featureValues = getFeatureValues(attributeStore, instance);
    SparseInstance sparseInstance = new SparseInstance(1.0, featureValues);
    trainingData.setClassIndex(outcomeAttributes.size());
    sparseInstance.setDataset(trainingData);
    return sparseInstance;
}
Also used : SparseInstance(weka.core.SparseInstance) Attribute(weka.core.Attribute)

Example 5 with SparseInstance

use of weka.core.SparseInstance in project dkpro-tc by dkpro.

the class WekaUtils method tcInstanceToWekaInstance.

/**
 * Converts a TC instance object into a Weka instance object, compatible with the given
 * attribute set and class labels.
 *
 * @param instance
 *            tc instance
 * @param trainingData
 *            training data
 * @param allClasses
 *            all classes
 * @param isRegressionExperiment
 *            is regression
 * @return weka instance
 * @throws Exception
 *             in case of errors
 */
public static weka.core.Instance tcInstanceToWekaInstance(Instance instance, Instances trainingData, List<String> allClasses, boolean isRegressionExperiment) throws Exception {
    AttributeStore attributeStore = new AttributeStore();
    // outcome attribute is last and will be ignored
    for (int i = 0; i < trainingData.numAttributes() - 1; i++) {
        attributeStore.addAttribute(trainingData.attribute(i).name(), trainingData.attribute(i));
    }
    // add outcome attribute
    Attribute outcomeAttribute = createOutcomeAttribute(allClasses, isRegressionExperiment);
    attributeStore.addAttribute(outcomeAttribute.name(), outcomeAttribute);
    double[] featureValues = getFeatureValues(attributeStore, instance);
    SparseInstance sparseInstance = new SparseInstance(1.0, featureValues);
    sparseInstance.setDataset(trainingData);
    return sparseInstance;
}
Also used : SparseInstance(weka.core.SparseInstance) Attribute(weka.core.Attribute)

Aggregations

SparseInstance (weka.core.SparseInstance)7 Attribute (weka.core.Attribute)5 DenseInstance (weka.core.DenseInstance)5 Instance (org.dkpro.tc.api.features.Instance)4 Instances (weka.core.Instances)4 IOException (java.io.IOException)2 ArrayList (java.util.ArrayList)2 MultiLabelInstances (mulan.data.MultiLabelInstances)2 AnalysisEngineProcessException (org.apache.uima.analysis_engine.AnalysisEngineProcessException)2 TextClassificationException (org.dkpro.tc.api.exception.TextClassificationException)2 ArffSaver (weka.core.converters.ArffSaver)2 Instance (weka.core.Instance)1