Search in sources :

Example 1 with Distribution

use of cosmos.distribution.v1beta1.Distribution in project umple by umple.

the class C45PruneableDecList method buildDecList.

/**
 * Builds the partial tree without hold out set.
 *
 * @exception Exception if something goes wrong
 */
@Override
public void buildDecList(Instances data, boolean leaf) throws Exception {
    Instances[] localInstances;
    int ind;
    int i, j;
    double sumOfWeights;
    NoSplit noSplit;
    m_train = null;
    m_test = null;
    m_isLeaf = false;
    m_isEmpty = false;
    m_sons = null;
    indeX = 0;
    sumOfWeights = data.sumOfWeights();
    noSplit = new NoSplit(new Distribution(data));
    if (leaf) {
        m_localModel = noSplit;
    } else {
        m_localModel = m_toSelectModel.selectModel(data);
    }
    if (m_localModel.numSubsets() > 1) {
        localInstances = m_localModel.split(data);
        data = null;
        m_sons = new ClassifierDecList[m_localModel.numSubsets()];
        i = 0;
        do {
            i++;
            ind = chooseIndex();
            if (ind == -1) {
                for (j = 0; j < m_sons.length; j++) {
                    if (m_sons[j] == null) {
                        m_sons[j] = getNewDecList(localInstances[j], true);
                    }
                }
                if (i < 2) {
                    m_localModel = noSplit;
                    m_isLeaf = true;
                    m_sons = null;
                    if (Utils.eq(sumOfWeights, 0)) {
                        m_isEmpty = true;
                    }
                    return;
                }
                ind = 0;
                break;
            } else {
                m_sons[ind] = getNewDecList(localInstances[ind], false);
            }
        } while ((i < m_sons.length) && (m_sons[ind].m_isLeaf));
        // Check if all successors are leaves
        for (j = 0; j < m_sons.length; j++) {
            if ((m_sons[j] == null) || (!m_sons[j].m_isLeaf)) {
                break;
            }
        }
        if (j == m_sons.length) {
            pruneEnd();
            if (!m_isLeaf) {
                indeX = chooseLastIndex();
            }
        } else {
            indeX = chooseLastIndex();
        }
    } else {
        m_isLeaf = true;
        if (Utils.eq(sumOfWeights, 0)) {
            m_isEmpty = true;
        }
    }
}
Also used : Instances(weka.core.Instances) Distribution(weka.classifiers.trees.j48.Distribution) NoSplit(weka.classifiers.trees.j48.NoSplit)

Example 2 with Distribution

use of cosmos.distribution.v1beta1.Distribution in project umple by umple.

the class PruneableDecList method buildDecList.

/**
 * Builds the partial tree with hold out set
 *
 * @throws Exception if something goes wrong
 */
public void buildDecList(Instances train, Instances test, boolean leaf) throws Exception {
    Instances[] localTrain, localTest;
    int ind;
    int i, j;
    double sumOfWeights;
    NoSplit noSplit;
    m_train = null;
    m_isLeaf = false;
    m_isEmpty = false;
    m_sons = null;
    indeX = 0;
    sumOfWeights = train.sumOfWeights();
    noSplit = new NoSplit(new Distribution(train));
    if (leaf) {
        m_localModel = noSplit;
    } else {
        m_localModel = m_toSelectModel.selectModel(train, test);
    }
    m_test = new Distribution(test, m_localModel);
    if (m_localModel.numSubsets() > 1) {
        localTrain = m_localModel.split(train);
        localTest = m_localModel.split(test);
        train = null;
        test = null;
        m_sons = new ClassifierDecList[m_localModel.numSubsets()];
        i = 0;
        do {
            i++;
            ind = chooseIndex();
            if (ind == -1) {
                for (j = 0; j < m_sons.length; j++) {
                    if (m_sons[j] == null) {
                        m_sons[j] = getNewDecList(localTrain[j], localTest[j], true);
                    }
                }
                if (i < 2) {
                    m_localModel = noSplit;
                    m_isLeaf = true;
                    m_sons = null;
                    if (Utils.eq(sumOfWeights, 0)) {
                        m_isEmpty = true;
                    }
                    return;
                }
                ind = 0;
                break;
            } else {
                m_sons[ind] = getNewDecList(localTrain[ind], localTest[ind], false);
            }
        } while ((i < m_sons.length) && (m_sons[ind].m_isLeaf));
        // Check if all successors are leaves
        for (j = 0; j < m_sons.length; j++) {
            if ((m_sons[j] == null) || (!m_sons[j].m_isLeaf)) {
                break;
            }
        }
        if (j == m_sons.length) {
            pruneEnd();
            if (!m_isLeaf) {
                indeX = chooseLastIndex();
            }
        } else {
            indeX = chooseLastIndex();
        }
    } else {
        m_isLeaf = true;
        if (Utils.eq(sumOfWeights, 0)) {
            m_isEmpty = true;
        }
    }
}
Also used : Instances(weka.core.Instances) Distribution(weka.classifiers.trees.j48.Distribution) NoSplit(weka.classifiers.trees.j48.NoSplit)

Example 3 with Distribution

use of cosmos.distribution.v1beta1.Distribution in project umple by umple.

the class ResidualModelSelection method selectModel.

/**
 * Selects split based on residuals for the given dataset.
 */
public final ClassifierSplitModel selectModel(Instances data, double[][] dataZs, double[][] dataWs) throws Exception {
    int numAttributes = data.numAttributes();
    if (numAttributes < 2)
        throw new Exception("Can't select Model without non-class attribute");
    if (data.numInstances() < m_minNumInstances)
        return new NoSplit(new Distribution(data));
    double bestGain = -Double.MAX_VALUE;
    int bestAttribute = -1;
    // try split on every attribute
    for (int i = 0; i < numAttributes; i++) {
        if (i != data.classIndex()) {
            // build split
            ResidualSplit split = new ResidualSplit(i);
            split.buildClassifier(data, dataZs, dataWs);
            if (split.checkModel(m_minNumInstances)) {
                // evaluate split
                double gain = split.entropyGain();
                if (gain > bestGain) {
                    bestGain = gain;
                    bestAttribute = i;
                }
            }
        }
    }
    if (bestGain >= m_minInfoGain) {
        // return best split
        ResidualSplit split = new ResidualSplit(bestAttribute);
        split.buildClassifier(data, dataZs, dataWs);
        return split;
    } else {
        // could not find any split with enough information gain
        return new NoSplit(new Distribution(data));
    }
}
Also used : Distribution(weka.classifiers.trees.j48.Distribution) NoSplit(weka.classifiers.trees.j48.NoSplit)

Example 4 with Distribution

use of cosmos.distribution.v1beta1.Distribution in project umple by umple.

the class ClassifierDecList method buildDecList.

/**
 * Builds the partial tree without hold out set.
 *
 * @exception Exception if something goes wrong
 */
public void buildDecList(Instances data, boolean leaf) throws Exception {
    Instances[] localInstances;
    int ind;
    int i, j;
    double sumOfWeights;
    NoSplit noSplit;
    m_train = null;
    m_test = null;
    m_isLeaf = false;
    m_isEmpty = false;
    m_sons = null;
    indeX = 0;
    sumOfWeights = data.sumOfWeights();
    noSplit = new NoSplit(new Distribution(data));
    if (leaf) {
        m_localModel = noSplit;
    } else {
        m_localModel = m_toSelectModel.selectModel(data);
    }
    if (m_localModel.numSubsets() > 1) {
        localInstances = m_localModel.split(data);
        data = null;
        m_sons = new ClassifierDecList[m_localModel.numSubsets()];
        i = 0;
        do {
            i++;
            ind = chooseIndex();
            if (ind == -1) {
                for (j = 0; j < m_sons.length; j++) {
                    if (m_sons[j] == null) {
                        m_sons[j] = getNewDecList(localInstances[j], true);
                    }
                }
                if (i < 2) {
                    m_localModel = noSplit;
                    m_isLeaf = true;
                    m_sons = null;
                    if (Utils.eq(sumOfWeights, 0)) {
                        m_isEmpty = true;
                    }
                    return;
                }
                ind = 0;
                break;
            } else {
                m_sons[ind] = getNewDecList(localInstances[ind], false);
            }
        } while ((i < m_sons.length) && (m_sons[ind].m_isLeaf));
        // Choose rule
        indeX = chooseLastIndex();
    } else {
        m_isLeaf = true;
        if (Utils.eq(sumOfWeights, 0)) {
            m_isEmpty = true;
        }
    }
}
Also used : Instances(weka.core.Instances) Distribution(weka.classifiers.trees.j48.Distribution) NoSplit(weka.classifiers.trees.j48.NoSplit)

Example 5 with Distribution

use of cosmos.distribution.v1beta1.Distribution in project umple by umple.

the class ResidualSplit method buildClassifier.

/**
 * Builds the split.
 * Needs the Z/W values of LogitBoost for the set of instances.
 */
public void buildClassifier(Instances data, double[][] dataZs, double[][] dataWs) throws Exception {
    m_numClasses = data.numClasses();
    m_numInstances = data.numInstances();
    if (m_numInstances == 0)
        throw new Exception("Can't build split on 0 instances");
    // save data/Zs/Ws
    m_data = data;
    m_dataZs = dataZs;
    m_dataWs = dataWs;
    m_attribute = data.attribute(m_attIndex);
    // determine number of subsets and split point for numeric attributes
    if (m_attribute.isNominal()) {
        m_splitPoint = 0.0;
        m_numSubsets = m_attribute.numValues();
    } else {
        getSplitPoint();
        m_numSubsets = 2;
    }
    // create distribution for data
    m_distribution = new Distribution(data, this);
}
Also used : Distribution(weka.classifiers.trees.j48.Distribution)

Aggregations

Distribution (weka.classifiers.trees.j48.Distribution)5 NoSplit (weka.classifiers.trees.j48.NoSplit)4 Instances (weka.core.Instances)3