Search in sources :

Example 1 with ReplaceMissingValues

use of weka.filters.unsupervised.attribute.ReplaceMissingValues in project umple by umple.

the class MultiFilterTest method getConfiguredFilterVariant.

/**
 * Creates a configured MultiFilter (variant)
 */
public Filter getConfiguredFilterVariant() {
    MultiFilter result = new MultiFilter();
    Filter[] filters = new Filter[2];
    filters[0] = new ReplaceMissingValues();
    filters[1] = new Center();
    result.setFilters(filters);
    return result;
}
Also used : Center(weka.filters.unsupervised.attribute.Center) ReplaceMissingValues(weka.filters.unsupervised.attribute.ReplaceMissingValues)

Example 2 with ReplaceMissingValues

use of weka.filters.unsupervised.attribute.ReplaceMissingValues in project umple by umple.

the class SGD method buildClassifier.

/**
 * Method for building the classifier.
 *
 * @param data the set of training instances.
 * @throws Exception if the classifier can't be built successfully.
 */
@Override
public void buildClassifier(Instances data) throws Exception {
    reset();
    // can classifier handle the data?
    getCapabilities().testWithFail(data);
    data = new Instances(data);
    data.deleteWithMissingClass();
    if (data.numInstances() > 0 && !m_dontReplaceMissing) {
        m_replaceMissing = new ReplaceMissingValues();
        m_replaceMissing.setInputFormat(data);
        data = Filter.useFilter(data, m_replaceMissing);
    }
    // check for only numeric attributes
    boolean onlyNumeric = true;
    for (int i = 0; i < data.numAttributes(); i++) {
        if (i != data.classIndex()) {
            if (!data.attribute(i).isNumeric()) {
                onlyNumeric = false;
                break;
            }
        }
    }
    if (!onlyNumeric) {
        if (data.numInstances() > 0) {
            m_nominalToBinary = new weka.filters.supervised.attribute.NominalToBinary();
        } else {
            m_nominalToBinary = new weka.filters.unsupervised.attribute.NominalToBinary();
        }
        m_nominalToBinary.setInputFormat(data);
        data = Filter.useFilter(data, m_nominalToBinary);
    }
    if (!m_dontNormalize && data.numInstances() > 0) {
        m_normalize = new Normalize();
        m_normalize.setInputFormat(data);
        data = Filter.useFilter(data, m_normalize);
    }
    m_numInstances = data.numInstances();
    m_weights = new double[data.numAttributes() + 1];
    m_data = new Instances(data, 0);
    if (data.numInstances() > 0) {
        // randomize the data
        data.randomize(new Random(getSeed()));
        train(data);
    }
}
Also used : Instances(weka.core.Instances) Normalize(weka.filters.unsupervised.attribute.Normalize) Random(java.util.Random) ReplaceMissingValues(weka.filters.unsupervised.attribute.ReplaceMissingValues)

Example 3 with ReplaceMissingValues

use of weka.filters.unsupervised.attribute.ReplaceMissingValues in project umple by umple.

the class SMO method buildClassifier.

/**
 * Method for building the classifier. Implements a one-against-one
 * wrapper for multi-class problems.
 *
 * @param insts the set of training instances
 * @throws Exception if the classifier can't be built successfully
 */
public void buildClassifier(Instances insts) throws Exception {
    if (!m_checksTurnedOff) {
        // can classifier handle the data?
        getCapabilities().testWithFail(insts);
        // remove instances with missing class
        insts = new Instances(insts);
        insts.deleteWithMissingClass();
        /* Removes all the instances with weight equal to 0.
       MUST be done since condition (8) of Keerthi's paper 
       is made with the assertion Ci > 0 (See equation (3a). */
        Instances data = new Instances(insts, insts.numInstances());
        for (int i = 0; i < insts.numInstances(); i++) {
            if (insts.instance(i).weight() > 0)
                data.add(insts.instance(i));
        }
        if (data.numInstances() == 0) {
            throw new Exception("No training instances left after removing " + "instances with weight 0!");
        }
        insts = data;
    }
    if (!m_checksTurnedOff) {
        m_Missing = new ReplaceMissingValues();
        m_Missing.setInputFormat(insts);
        insts = Filter.useFilter(insts, m_Missing);
    } else {
        m_Missing = null;
    }
    if (getCapabilities().handles(Capability.NUMERIC_ATTRIBUTES)) {
        boolean onlyNumeric = true;
        if (!m_checksTurnedOff) {
            for (int i = 0; i < insts.numAttributes(); i++) {
                if (i != insts.classIndex()) {
                    if (!insts.attribute(i).isNumeric()) {
                        onlyNumeric = false;
                        break;
                    }
                }
            }
        }
        if (!onlyNumeric) {
            m_NominalToBinary = new NominalToBinary();
            m_NominalToBinary.setInputFormat(insts);
            insts = Filter.useFilter(insts, m_NominalToBinary);
        } else {
            m_NominalToBinary = null;
        }
    } else {
        m_NominalToBinary = null;
    }
    if (m_filterType == FILTER_STANDARDIZE) {
        m_Filter = new Standardize();
        m_Filter.setInputFormat(insts);
        insts = Filter.useFilter(insts, m_Filter);
    } else if (m_filterType == FILTER_NORMALIZE) {
        m_Filter = new Normalize();
        m_Filter.setInputFormat(insts);
        insts = Filter.useFilter(insts, m_Filter);
    } else {
        m_Filter = null;
    }
    m_classIndex = insts.classIndex();
    m_classAttribute = insts.classAttribute();
    m_KernelIsLinear = (m_kernel instanceof PolyKernel) && (((PolyKernel) m_kernel).getExponent() == 1.0);
    // Generate subsets representing each class
    Instances[] subsets = new Instances[insts.numClasses()];
    for (int i = 0; i < insts.numClasses(); i++) {
        subsets[i] = new Instances(insts, insts.numInstances());
    }
    for (int j = 0; j < insts.numInstances(); j++) {
        Instance inst = insts.instance(j);
        subsets[(int) inst.classValue()].add(inst);
    }
    for (int i = 0; i < insts.numClasses(); i++) {
        subsets[i].compactify();
    }
    // Build the binary classifiers
    Random rand = new Random(m_randomSeed);
    m_classifiers = new BinarySMO[insts.numClasses()][insts.numClasses()];
    for (int i = 0; i < insts.numClasses(); i++) {
        for (int j = i + 1; j < insts.numClasses(); j++) {
            m_classifiers[i][j] = new BinarySMO();
            m_classifiers[i][j].setKernel(Kernel.makeCopy(getKernel()));
            Instances data = new Instances(insts, insts.numInstances());
            for (int k = 0; k < subsets[i].numInstances(); k++) {
                data.add(subsets[i].instance(k));
            }
            for (int k = 0; k < subsets[j].numInstances(); k++) {
                data.add(subsets[j].instance(k));
            }
            data.compactify();
            data.randomize(rand);
            m_classifiers[i][j].buildClassifier(data, i, j, m_fitLogisticModels, m_numFolds, m_randomSeed);
        }
    }
}
Also used : Normalize(weka.filters.unsupervised.attribute.Normalize) Instance(weka.core.Instance) DenseInstance(weka.core.DenseInstance) NominalToBinary(weka.filters.unsupervised.attribute.NominalToBinary) Instances(weka.core.Instances) Standardize(weka.filters.unsupervised.attribute.Standardize) Random(java.util.Random) PolyKernel(weka.classifiers.functions.supportVector.PolyKernel) ReplaceMissingValues(weka.filters.unsupervised.attribute.ReplaceMissingValues)

Example 4 with ReplaceMissingValues

use of weka.filters.unsupervised.attribute.ReplaceMissingValues in project umple by umple.

the class SMOreg method buildClassifier.

/**
 * Method for building the classifier.
 *
 * @param instances the set of training instances
 * @throws Exception if the classifier can't be built successfully
 */
public void buildClassifier(Instances instances) throws Exception {
    // can classifier handle the data?
    getCapabilities().testWithFail(instances);
    // remove instances with missing class
    instances = new Instances(instances);
    instances.deleteWithMissingClass();
    // Removes all the instances with weight equal to 0.
    // MUST be done since condition (8) of Keerthi's paper
    // is made with the assertion Ci > 0 (See equation (3a).
    Instances data = new Instances(instances, 0);
    for (int i = 0; i < instances.numInstances(); i++) {
        if (instances.instance(i).weight() > 0) {
            data.add(instances.instance(i));
        }
    }
    if (data.numInstances() == 0) {
        throw new Exception("No training instances left after removing " + "instance with either a weight null or a missing class!");
    }
    instances = data;
    m_onlyNumeric = true;
    for (int i = 0; i < instances.numAttributes(); i++) {
        if (i != instances.classIndex()) {
            if (!instances.attribute(i).isNumeric()) {
                m_onlyNumeric = false;
                break;
            }
        }
    }
    m_Missing = new ReplaceMissingValues();
    m_Missing.setInputFormat(instances);
    instances = Filter.useFilter(instances, m_Missing);
    if (getCapabilities().handles(Capability.NUMERIC_ATTRIBUTES)) {
        if (!m_onlyNumeric) {
            m_NominalToBinary = new NominalToBinary();
            m_NominalToBinary.setInputFormat(instances);
            instances = Filter.useFilter(instances, m_NominalToBinary);
        } else {
            m_NominalToBinary = null;
        }
    } else {
        m_NominalToBinary = null;
    }
    // retrieve two different class values used to determine filter transformation
    double y0 = instances.instance(0).classValue();
    int index = 1;
    while (index < instances.numInstances() && instances.instance(index).classValue() == y0) {
        index++;
    }
    if (index == instances.numInstances()) {
        // we don't want to deal with this, too much hassle
        throw new Exception("All class values are the same. At least two class values should be different");
    }
    double y1 = instances.instance(index).classValue();
    // apply filters
    if (m_filterType == FILTER_STANDARDIZE) {
        m_Filter = new Standardize();
        ((Standardize) m_Filter).setIgnoreClass(true);
        m_Filter.setInputFormat(instances);
        instances = Filter.useFilter(instances, m_Filter);
    } else if (m_filterType == FILTER_NORMALIZE) {
        m_Filter = new Normalize();
        ((Normalize) m_Filter).setIgnoreClass(true);
        m_Filter.setInputFormat(instances);
        instances = Filter.useFilter(instances, m_Filter);
    } else {
        m_Filter = null;
    }
    if (m_Filter != null) {
        double z0 = instances.instance(0).classValue();
        double z1 = instances.instance(index).classValue();
        // no division by zero, since y0 != y1 guaranteed => z0 != z1 ???
        m_x1 = (y0 - y1) / (z0 - z1);
        // = y1 - m_x1 * z1
        m_x0 = (y0 - m_x1 * z0);
    } else {
        m_x1 = 1.0;
        m_x0 = 0.0;
    }
    m_optimizer.setSMOReg(this);
    m_optimizer.buildClassifier(instances);
}
Also used : Instances(weka.core.Instances) Standardize(weka.filters.unsupervised.attribute.Standardize) Normalize(weka.filters.unsupervised.attribute.Normalize) NominalToBinary(weka.filters.unsupervised.attribute.NominalToBinary) ReplaceMissingValues(weka.filters.unsupervised.attribute.ReplaceMissingValues)

Example 5 with ReplaceMissingValues

use of weka.filters.unsupervised.attribute.ReplaceMissingValues in project umple by umple.

the class SimpleLogistic method buildClassifier.

/**
 * Builds the logistic regression using LogitBoost.
 * @param data the training data
 * @throws Exception if something goes wrong
 */
public void buildClassifier(Instances data) throws Exception {
    // can classifier handle the data?
    getCapabilities().testWithFail(data);
    // remove instances with missing class
    data = new Instances(data);
    data.deleteWithMissingClass();
    // replace missing values
    m_ReplaceMissingValues = new ReplaceMissingValues();
    m_ReplaceMissingValues.setInputFormat(data);
    data = Filter.useFilter(data, m_ReplaceMissingValues);
    // convert nominal attributes
    m_NominalToBinary = new NominalToBinary();
    m_NominalToBinary.setInputFormat(data);
    data = Filter.useFilter(data, m_NominalToBinary);
    // create actual logistic model
    m_boostedModel = new LogisticBase(m_numBoostingIterations, m_useCrossValidation, m_errorOnProbabilities);
    m_boostedModel.setMaxIterations(m_maxBoostingIterations);
    m_boostedModel.setHeuristicStop(m_heuristicStop);
    m_boostedModel.setWeightTrimBeta(m_weightTrimBeta);
    m_boostedModel.setUseAIC(m_useAIC);
    // build logistic model
    m_boostedModel.buildClassifier(data);
}
Also used : Instances(weka.core.Instances) NominalToBinary(weka.filters.unsupervised.attribute.NominalToBinary) ReplaceMissingValues(weka.filters.unsupervised.attribute.ReplaceMissingValues) LogisticBase(weka.classifiers.trees.lmt.LogisticBase)

Aggregations

ReplaceMissingValues (weka.filters.unsupervised.attribute.ReplaceMissingValues)22 Instances (weka.core.Instances)19 NominalToBinary (weka.filters.unsupervised.attribute.NominalToBinary)9 Random (java.util.Random)8 Instance (weka.core.Instance)7 Normalize (weka.filters.unsupervised.attribute.Normalize)4 Standardize (weka.filters.unsupervised.attribute.Standardize)4 NominalToBinary (weka.filters.supervised.attribute.NominalToBinary)3 RemoveUseless (weka.filters.unsupervised.attribute.RemoveUseless)3 IOException (java.io.IOException)2 ArrayList (java.util.ArrayList)2 AmbitException (net.idea.modbcum.i.exceptions.AmbitException)2 ResourceException (org.restlet.resource.ResourceException)2 Matrix (weka.core.matrix.Matrix)2 LiteratureEntry (ambit2.base.data.LiteratureEntry)1 PredictedVarsTemplate (ambit2.base.data.PredictedVarsTemplate)1 Property (ambit2.base.data.Property)1 PropertyAnnotation (ambit2.base.data.PropertyAnnotation)1 PropertyAnnotations (ambit2.base.data.PropertyAnnotations)1 Template (ambit2.base.data.Template)1