Search in sources :

Example 1 with NominalToBinary

use of weka.filters.unsupervised.attribute.NominalToBinary in project umple by umple.

the class SMO method buildClassifier.

/**
 * Method for building the classifier. Implements a one-against-one
 * wrapper for multi-class problems.
 *
 * @param insts the set of training instances
 * @throws Exception if the classifier can't be built successfully
 */
public void buildClassifier(Instances insts) throws Exception {
    if (!m_checksTurnedOff) {
        // can classifier handle the data?
        getCapabilities().testWithFail(insts);
        // remove instances with missing class
        insts = new Instances(insts);
        insts.deleteWithMissingClass();
        /* Removes all the instances with weight equal to 0.
       MUST be done since condition (8) of Keerthi's paper 
       is made with the assertion Ci > 0 (See equation (3a). */
        Instances data = new Instances(insts, insts.numInstances());
        for (int i = 0; i < insts.numInstances(); i++) {
            if (insts.instance(i).weight() > 0)
                data.add(insts.instance(i));
        }
        if (data.numInstances() == 0) {
            throw new Exception("No training instances left after removing " + "instances with weight 0!");
        }
        insts = data;
    }
    if (!m_checksTurnedOff) {
        m_Missing = new ReplaceMissingValues();
        m_Missing.setInputFormat(insts);
        insts = Filter.useFilter(insts, m_Missing);
    } else {
        m_Missing = null;
    }
    if (getCapabilities().handles(Capability.NUMERIC_ATTRIBUTES)) {
        boolean onlyNumeric = true;
        if (!m_checksTurnedOff) {
            for (int i = 0; i < insts.numAttributes(); i++) {
                if (i != insts.classIndex()) {
                    if (!insts.attribute(i).isNumeric()) {
                        onlyNumeric = false;
                        break;
                    }
                }
            }
        }
        if (!onlyNumeric) {
            m_NominalToBinary = new NominalToBinary();
            m_NominalToBinary.setInputFormat(insts);
            insts = Filter.useFilter(insts, m_NominalToBinary);
        } else {
            m_NominalToBinary = null;
        }
    } else {
        m_NominalToBinary = null;
    }
    if (m_filterType == FILTER_STANDARDIZE) {
        m_Filter = new Standardize();
        m_Filter.setInputFormat(insts);
        insts = Filter.useFilter(insts, m_Filter);
    } else if (m_filterType == FILTER_NORMALIZE) {
        m_Filter = new Normalize();
        m_Filter.setInputFormat(insts);
        insts = Filter.useFilter(insts, m_Filter);
    } else {
        m_Filter = null;
    }
    m_classIndex = insts.classIndex();
    m_classAttribute = insts.classAttribute();
    m_KernelIsLinear = (m_kernel instanceof PolyKernel) && (((PolyKernel) m_kernel).getExponent() == 1.0);
    // Generate subsets representing each class
    Instances[] subsets = new Instances[insts.numClasses()];
    for (int i = 0; i < insts.numClasses(); i++) {
        subsets[i] = new Instances(insts, insts.numInstances());
    }
    for (int j = 0; j < insts.numInstances(); j++) {
        Instance inst = insts.instance(j);
        subsets[(int) inst.classValue()].add(inst);
    }
    for (int i = 0; i < insts.numClasses(); i++) {
        subsets[i].compactify();
    }
    // Build the binary classifiers
    Random rand = new Random(m_randomSeed);
    m_classifiers = new BinarySMO[insts.numClasses()][insts.numClasses()];
    for (int i = 0; i < insts.numClasses(); i++) {
        for (int j = i + 1; j < insts.numClasses(); j++) {
            m_classifiers[i][j] = new BinarySMO();
            m_classifiers[i][j].setKernel(Kernel.makeCopy(getKernel()));
            Instances data = new Instances(insts, insts.numInstances());
            for (int k = 0; k < subsets[i].numInstances(); k++) {
                data.add(subsets[i].instance(k));
            }
            for (int k = 0; k < subsets[j].numInstances(); k++) {
                data.add(subsets[j].instance(k));
            }
            data.compactify();
            data.randomize(rand);
            m_classifiers[i][j].buildClassifier(data, i, j, m_fitLogisticModels, m_numFolds, m_randomSeed);
        }
    }
}
Also used : Normalize(weka.filters.unsupervised.attribute.Normalize) Instance(weka.core.Instance) DenseInstance(weka.core.DenseInstance) NominalToBinary(weka.filters.unsupervised.attribute.NominalToBinary) Instances(weka.core.Instances) Standardize(weka.filters.unsupervised.attribute.Standardize) Random(java.util.Random) PolyKernel(weka.classifiers.functions.supportVector.PolyKernel) ReplaceMissingValues(weka.filters.unsupervised.attribute.ReplaceMissingValues)

Example 2 with NominalToBinary

use of weka.filters.unsupervised.attribute.NominalToBinary in project umple by umple.

the class SMOreg method buildClassifier.

/**
 * Method for building the classifier.
 *
 * @param instances the set of training instances
 * @throws Exception if the classifier can't be built successfully
 */
public void buildClassifier(Instances instances) throws Exception {
    // can classifier handle the data?
    getCapabilities().testWithFail(instances);
    // remove instances with missing class
    instances = new Instances(instances);
    instances.deleteWithMissingClass();
    // Removes all the instances with weight equal to 0.
    // MUST be done since condition (8) of Keerthi's paper
    // is made with the assertion Ci > 0 (See equation (3a).
    Instances data = new Instances(instances, 0);
    for (int i = 0; i < instances.numInstances(); i++) {
        if (instances.instance(i).weight() > 0) {
            data.add(instances.instance(i));
        }
    }
    if (data.numInstances() == 0) {
        throw new Exception("No training instances left after removing " + "instance with either a weight null or a missing class!");
    }
    instances = data;
    m_onlyNumeric = true;
    for (int i = 0; i < instances.numAttributes(); i++) {
        if (i != instances.classIndex()) {
            if (!instances.attribute(i).isNumeric()) {
                m_onlyNumeric = false;
                break;
            }
        }
    }
    m_Missing = new ReplaceMissingValues();
    m_Missing.setInputFormat(instances);
    instances = Filter.useFilter(instances, m_Missing);
    if (getCapabilities().handles(Capability.NUMERIC_ATTRIBUTES)) {
        if (!m_onlyNumeric) {
            m_NominalToBinary = new NominalToBinary();
            m_NominalToBinary.setInputFormat(instances);
            instances = Filter.useFilter(instances, m_NominalToBinary);
        } else {
            m_NominalToBinary = null;
        }
    } else {
        m_NominalToBinary = null;
    }
    // retrieve two different class values used to determine filter transformation
    double y0 = instances.instance(0).classValue();
    int index = 1;
    while (index < instances.numInstances() && instances.instance(index).classValue() == y0) {
        index++;
    }
    if (index == instances.numInstances()) {
        // we don't want to deal with this, too much hassle
        throw new Exception("All class values are the same. At least two class values should be different");
    }
    double y1 = instances.instance(index).classValue();
    // apply filters
    if (m_filterType == FILTER_STANDARDIZE) {
        m_Filter = new Standardize();
        ((Standardize) m_Filter).setIgnoreClass(true);
        m_Filter.setInputFormat(instances);
        instances = Filter.useFilter(instances, m_Filter);
    } else if (m_filterType == FILTER_NORMALIZE) {
        m_Filter = new Normalize();
        ((Normalize) m_Filter).setIgnoreClass(true);
        m_Filter.setInputFormat(instances);
        instances = Filter.useFilter(instances, m_Filter);
    } else {
        m_Filter = null;
    }
    if (m_Filter != null) {
        double z0 = instances.instance(0).classValue();
        double z1 = instances.instance(index).classValue();
        // no division by zero, since y0 != y1 guaranteed => z0 != z1 ???
        m_x1 = (y0 - y1) / (z0 - z1);
        // = y1 - m_x1 * z1
        m_x0 = (y0 - m_x1 * z0);
    } else {
        m_x1 = 1.0;
        m_x0 = 0.0;
    }
    m_optimizer.setSMOReg(this);
    m_optimizer.buildClassifier(instances);
}
Also used : Instances(weka.core.Instances) Standardize(weka.filters.unsupervised.attribute.Standardize) Normalize(weka.filters.unsupervised.attribute.Normalize) NominalToBinary(weka.filters.unsupervised.attribute.NominalToBinary) ReplaceMissingValues(weka.filters.unsupervised.attribute.ReplaceMissingValues)

Example 3 with NominalToBinary

use of weka.filters.unsupervised.attribute.NominalToBinary in project umple by umple.

the class SimpleLogistic method buildClassifier.

/**
 * Builds the logistic regression using LogitBoost.
 * @param data the training data
 * @throws Exception if something goes wrong
 */
public void buildClassifier(Instances data) throws Exception {
    // can classifier handle the data?
    getCapabilities().testWithFail(data);
    // remove instances with missing class
    data = new Instances(data);
    data.deleteWithMissingClass();
    // replace missing values
    m_ReplaceMissingValues = new ReplaceMissingValues();
    m_ReplaceMissingValues.setInputFormat(data);
    data = Filter.useFilter(data, m_ReplaceMissingValues);
    // convert nominal attributes
    m_NominalToBinary = new NominalToBinary();
    m_NominalToBinary.setInputFormat(data);
    data = Filter.useFilter(data, m_NominalToBinary);
    // create actual logistic model
    m_boostedModel = new LogisticBase(m_numBoostingIterations, m_useCrossValidation, m_errorOnProbabilities);
    m_boostedModel.setMaxIterations(m_maxBoostingIterations);
    m_boostedModel.setHeuristicStop(m_heuristicStop);
    m_boostedModel.setWeightTrimBeta(m_weightTrimBeta);
    m_boostedModel.setUseAIC(m_useAIC);
    // build logistic model
    m_boostedModel.buildClassifier(data);
}
Also used : Instances(weka.core.Instances) NominalToBinary(weka.filters.unsupervised.attribute.NominalToBinary) ReplaceMissingValues(weka.filters.unsupervised.attribute.ReplaceMissingValues) LogisticBase(weka.classifiers.trees.lmt.LogisticBase)

Example 4 with NominalToBinary

use of weka.filters.unsupervised.attribute.NominalToBinary in project umple by umple.

the class VotedPerceptron method buildClassifier.

/**
 * Builds the ensemble of perceptrons.
 *
 * @param insts the data to train the classifier with
 * @throws Exception if something goes wrong during building
 */
public void buildClassifier(Instances insts) throws Exception {
    // can classifier handle the data?
    getCapabilities().testWithFail(insts);
    // remove instances with missing class
    insts = new Instances(insts);
    insts.deleteWithMissingClass();
    // Filter data
    m_Train = new Instances(insts);
    m_ReplaceMissingValues = new ReplaceMissingValues();
    m_ReplaceMissingValues.setInputFormat(m_Train);
    m_Train = Filter.useFilter(m_Train, m_ReplaceMissingValues);
    m_NominalToBinary = new NominalToBinary();
    m_NominalToBinary.setInputFormat(m_Train);
    m_Train = Filter.useFilter(m_Train, m_NominalToBinary);
    /**
     * Randomize training data
     */
    m_Train.randomize(new Random(m_Seed));
    /**
     * Make space to store perceptrons
     */
    m_Additions = new int[m_MaxK + 1];
    m_IsAddition = new boolean[m_MaxK + 1];
    m_Weights = new int[m_MaxK + 1];
    /**
     * Compute perceptrons
     */
    m_K = 0;
    out: for (int it = 0; it < m_NumIterations; it++) {
        for (int i = 0; i < m_Train.numInstances(); i++) {
            Instance inst = m_Train.instance(i);
            if (!inst.classIsMissing()) {
                int prediction = makePrediction(m_K, inst);
                int classValue = (int) inst.classValue();
                if (prediction == classValue) {
                    m_Weights[m_K]++;
                } else {
                    m_IsAddition[m_K] = (classValue == 1);
                    m_Additions[m_K] = i;
                    m_K++;
                    m_Weights[m_K]++;
                }
                if (m_K == m_MaxK) {
                    break out;
                }
            }
        }
    }
}
Also used : Instances(weka.core.Instances) Random(java.util.Random) Instance(weka.core.Instance) NominalToBinary(weka.filters.unsupervised.attribute.NominalToBinary) ReplaceMissingValues(weka.filters.unsupervised.attribute.ReplaceMissingValues)

Example 5 with NominalToBinary

use of weka.filters.unsupervised.attribute.NominalToBinary in project umple by umple.

the class Logistic method buildClassifier.

/**
 * Builds the classifier
 *
 * @param train the training data to be used for generating the boosted
 *          classifier.
 * @throws Exception if the classifier could not be built successfully
 */
@Override
public void buildClassifier(Instances train) throws Exception {
    // can classifier handle the data?
    getCapabilities().testWithFail(train);
    // remove instances with missing class
    train = new Instances(train);
    train.deleteWithMissingClass();
    // Replace missing values
    m_ReplaceMissingValues = new ReplaceMissingValues();
    m_ReplaceMissingValues.setInputFormat(train);
    train = Filter.useFilter(train, m_ReplaceMissingValues);
    // Remove useless attributes
    m_AttFilter = new RemoveUseless();
    m_AttFilter.setInputFormat(train);
    train = Filter.useFilter(train, m_AttFilter);
    // Transform attributes
    m_NominalToBinary = new NominalToBinary();
    m_NominalToBinary.setInputFormat(train);
    train = Filter.useFilter(train, m_NominalToBinary);
    // Save the structure for printing the model
    m_structure = new Instances(train, 0);
    // Extract data
    m_ClassIndex = train.classIndex();
    m_NumClasses = train.numClasses();
    // Only K-1 class labels needed
    int nK = m_NumClasses - 1;
    int nR = m_NumPredictors = train.numAttributes() - 1;
    int nC = train.numInstances();
    // Data values
    m_Data = new double[nC][nR + 1];
    // Class labels
    int[] Y = new int[nC];
    // Attribute means
    double[] xMean = new double[nR + 1];
    // Attribute stddev's
    double[] xSD = new double[nR + 1];
    // Number of classes
    double[] sY = new double[nK + 1];
    // Weights of instances
    double[] weights = new double[nC];
    // Total weights of the instances
    double totWeights = 0;
    // Optimized parameter values
    m_Par = new double[nR + 1][nK];
    if (m_Debug) {
        System.out.println("Extracting data...");
    }
    for (int i = 0; i < nC; i++) {
        // initialize X[][]
        Instance current = train.instance(i);
        // Class value starts from 0
        Y[i] = (int) current.classValue();
        // Dealing with weights
        weights[i] = current.weight();
        totWeights += weights[i];
        m_Data[i][0] = 1;
        int j = 1;
        for (int k = 0; k <= nR; k++) {
            if (k != m_ClassIndex) {
                double x = current.value(k);
                m_Data[i][j] = x;
                xMean[j] += weights[i] * x;
                xSD[j] += weights[i] * x * x;
                j++;
            }
        }
        // Class count
        sY[Y[i]]++;
    }
    if ((totWeights <= 1) && (nC > 1)) {
        throw new Exception("Sum of weights of instances less than 1, please reweight!");
    }
    xMean[0] = 0;
    xSD[0] = 1;
    for (int j = 1; j <= nR; j++) {
        xMean[j] = xMean[j] / totWeights;
        if (totWeights > 1) {
            xSD[j] = Math.sqrt(Math.abs(xSD[j] - totWeights * xMean[j] * xMean[j]) / (totWeights - 1));
        } else {
            xSD[j] = 0;
        }
    }
    if (m_Debug) {
        // Output stats about input data
        System.out.println("Descriptives...");
        for (int m = 0; m <= nK; m++) {
            System.out.println(sY[m] + " cases have class " + m);
        }
        System.out.println("\n Variable     Avg       SD    ");
        for (int j = 1; j <= nR; j++) {
            System.out.println(Utils.doubleToString(j, 8, 4) + Utils.doubleToString(xMean[j], 10, 4) + Utils.doubleToString(xSD[j], 10, 4));
        }
    }
    // Normalise input data
    for (int i = 0; i < nC; i++) {
        for (int j = 0; j <= nR; j++) {
            if (xSD[j] != 0) {
                m_Data[i][j] = (m_Data[i][j] - xMean[j]) / xSD[j];
            }
        }
    }
    if (m_Debug) {
        System.out.println("\nIteration History...");
    }
    double[] x = new double[(nR + 1) * nK];
    // Boundary constraints, N/A here
    double[][] b = new double[2][x.length];
    // Initialize
    for (int p = 0; p < nK; p++) {
        int offset = p * (nR + 1);
        // Null model
        x[offset] = Math.log(sY[p] + 1.0) - Math.log(sY[nK] + 1.0);
        b[0][offset] = Double.NaN;
        b[1][offset] = Double.NaN;
        for (int q = 1; q <= nR; q++) {
            x[offset + q] = 0.0;
            b[0][offset + q] = Double.NaN;
            b[1][offset + q] = Double.NaN;
        }
    }
    OptObject oO = new OptObject();
    oO.setWeights(weights);
    oO.setClassLabels(Y);
    Optimization opt = null;
    if (m_useConjugateGradientDescent) {
        opt = new OptEngCG(oO);
    } else {
        opt = new OptEng(oO);
    }
    opt.setDebug(m_Debug);
    if (m_MaxIts == -1) {
        // Search until convergence
        x = opt.findArgmin(x, b);
        while (x == null) {
            x = opt.getVarbValues();
            if (m_Debug) {
                System.out.println("First set of iterations finished, not enough!");
            }
            x = opt.findArgmin(x, b);
        }
        if (m_Debug) {
            System.out.println(" -------------<Converged>--------------");
        }
    } else {
        opt.setMaxIteration(m_MaxIts);
        x = opt.findArgmin(x, b);
        if (x == null) {
            x = opt.getVarbValues();
        }
    }
    // Log-likelihood
    m_LL = -opt.getMinFunction();
    // Don't need data matrix anymore
    m_Data = null;
    // Convert coefficients back to non-normalized attribute units
    for (int i = 0; i < nK; i++) {
        m_Par[0][i] = x[i * (nR + 1)];
        for (int j = 1; j <= nR; j++) {
            m_Par[j][i] = x[i * (nR + 1) + j];
            if (xSD[j] != 0) {
                m_Par[j][i] /= xSD[j];
                m_Par[0][i] -= m_Par[j][i] * xMean[j];
            }
        }
    }
}
Also used : RemoveUseless(weka.filters.unsupervised.attribute.RemoveUseless) Instance(weka.core.Instance) NominalToBinary(weka.filters.unsupervised.attribute.NominalToBinary) Optimization(weka.core.Optimization) ConjugateGradientOptimization(weka.core.ConjugateGradientOptimization) Instances(weka.core.Instances) ReplaceMissingValues(weka.filters.unsupervised.attribute.ReplaceMissingValues)

Aggregations

NominalToBinary (weka.filters.unsupervised.attribute.NominalToBinary)10 Instances (weka.core.Instances)9 ReplaceMissingValues (weka.filters.unsupervised.attribute.ReplaceMissingValues)9 Random (java.util.Random)4 Instance (weka.core.Instance)4 Standardize (weka.filters.unsupervised.attribute.Standardize)4 Normalize (weka.filters.unsupervised.attribute.Normalize)3 IOException (java.io.IOException)2 Matrix (weka.core.matrix.Matrix)2 LiteratureEntry (ambit2.base.data.LiteratureEntry)1 PredictedVarsTemplate (ambit2.base.data.PredictedVarsTemplate)1 Property (ambit2.base.data.Property)1 PropertyAnnotation (ambit2.base.data.PropertyAnnotation)1 PropertyAnnotations (ambit2.base.data.PropertyAnnotations)1 Template (ambit2.base.data.Template)1 IEvaluation (ambit2.core.data.model.IEvaluation)1 ModelQueryResults (ambit2.core.data.model.ModelQueryResults)1 EvaluationStats (ambit2.model.evaluation.EvaluationStats)1 AlgorithmURIReporter (ambit2.rest.algorithm.AlgorithmURIReporter)1 BorderLayout (java.awt.BorderLayout)1