Search in sources :

Example 1 with NoSplit

use of weka.classifiers.trees.j48.NoSplit in project umple by umple.

the class C45PruneableDecList method buildDecList.

/**
 * Builds the partial tree without hold out set.
 *
 * @exception Exception if something goes wrong
 */
@Override
public void buildDecList(Instances data, boolean leaf) throws Exception {
    Instances[] localInstances;
    int ind;
    int i, j;
    double sumOfWeights;
    NoSplit noSplit;
    m_train = null;
    m_test = null;
    m_isLeaf = false;
    m_isEmpty = false;
    m_sons = null;
    indeX = 0;
    sumOfWeights = data.sumOfWeights();
    noSplit = new NoSplit(new Distribution(data));
    if (leaf) {
        m_localModel = noSplit;
    } else {
        m_localModel = m_toSelectModel.selectModel(data);
    }
    if (m_localModel.numSubsets() > 1) {
        localInstances = m_localModel.split(data);
        data = null;
        m_sons = new ClassifierDecList[m_localModel.numSubsets()];
        i = 0;
        do {
            i++;
            ind = chooseIndex();
            if (ind == -1) {
                for (j = 0; j < m_sons.length; j++) {
                    if (m_sons[j] == null) {
                        m_sons[j] = getNewDecList(localInstances[j], true);
                    }
                }
                if (i < 2) {
                    m_localModel = noSplit;
                    m_isLeaf = true;
                    m_sons = null;
                    if (Utils.eq(sumOfWeights, 0)) {
                        m_isEmpty = true;
                    }
                    return;
                }
                ind = 0;
                break;
            } else {
                m_sons[ind] = getNewDecList(localInstances[ind], false);
            }
        } while ((i < m_sons.length) && (m_sons[ind].m_isLeaf));
        // Check if all successors are leaves
        for (j = 0; j < m_sons.length; j++) {
            if ((m_sons[j] == null) || (!m_sons[j].m_isLeaf)) {
                break;
            }
        }
        if (j == m_sons.length) {
            pruneEnd();
            if (!m_isLeaf) {
                indeX = chooseLastIndex();
            }
        } else {
            indeX = chooseLastIndex();
        }
    } else {
        m_isLeaf = true;
        if (Utils.eq(sumOfWeights, 0)) {
            m_isEmpty = true;
        }
    }
}
Also used : Instances(weka.core.Instances) Distribution(weka.classifiers.trees.j48.Distribution) NoSplit(weka.classifiers.trees.j48.NoSplit)

Example 2 with NoSplit

use of weka.classifiers.trees.j48.NoSplit in project umple by umple.

the class PruneableDecList method pruneEnd.

/**
 * Prunes the end of the rule.
 */
protected void pruneEnd() throws Exception {
    double errorsLeaf, errorsTree;
    errorsTree = errorsForTree();
    errorsLeaf = errorsForLeaf();
    if (Utils.smOrEq(errorsLeaf, errorsTree)) {
        m_isLeaf = true;
        m_sons = null;
        m_localModel = new NoSplit(localModel().distribution());
    }
}
Also used : NoSplit(weka.classifiers.trees.j48.NoSplit)

Example 3 with NoSplit

use of weka.classifiers.trees.j48.NoSplit in project umple by umple.

the class PruneableDecList method buildDecList.

/**
 * Builds the partial tree with hold out set
 *
 * @throws Exception if something goes wrong
 */
public void buildDecList(Instances train, Instances test, boolean leaf) throws Exception {
    Instances[] localTrain, localTest;
    int ind;
    int i, j;
    double sumOfWeights;
    NoSplit noSplit;
    m_train = null;
    m_isLeaf = false;
    m_isEmpty = false;
    m_sons = null;
    indeX = 0;
    sumOfWeights = train.sumOfWeights();
    noSplit = new NoSplit(new Distribution(train));
    if (leaf) {
        m_localModel = noSplit;
    } else {
        m_localModel = m_toSelectModel.selectModel(train, test);
    }
    m_test = new Distribution(test, m_localModel);
    if (m_localModel.numSubsets() > 1) {
        localTrain = m_localModel.split(train);
        localTest = m_localModel.split(test);
        train = null;
        test = null;
        m_sons = new ClassifierDecList[m_localModel.numSubsets()];
        i = 0;
        do {
            i++;
            ind = chooseIndex();
            if (ind == -1) {
                for (j = 0; j < m_sons.length; j++) {
                    if (m_sons[j] == null) {
                        m_sons[j] = getNewDecList(localTrain[j], localTest[j], true);
                    }
                }
                if (i < 2) {
                    m_localModel = noSplit;
                    m_isLeaf = true;
                    m_sons = null;
                    if (Utils.eq(sumOfWeights, 0)) {
                        m_isEmpty = true;
                    }
                    return;
                }
                ind = 0;
                break;
            } else {
                m_sons[ind] = getNewDecList(localTrain[ind], localTest[ind], false);
            }
        } while ((i < m_sons.length) && (m_sons[ind].m_isLeaf));
        // Check if all successors are leaves
        for (j = 0; j < m_sons.length; j++) {
            if ((m_sons[j] == null) || (!m_sons[j].m_isLeaf)) {
                break;
            }
        }
        if (j == m_sons.length) {
            pruneEnd();
            if (!m_isLeaf) {
                indeX = chooseLastIndex();
            }
        } else {
            indeX = chooseLastIndex();
        }
    } else {
        m_isLeaf = true;
        if (Utils.eq(sumOfWeights, 0)) {
            m_isEmpty = true;
        }
    }
}
Also used : Instances(weka.core.Instances) Distribution(weka.classifiers.trees.j48.Distribution) NoSplit(weka.classifiers.trees.j48.NoSplit)

Example 4 with NoSplit

use of weka.classifiers.trees.j48.NoSplit in project umple by umple.

the class ResidualModelSelection method selectModel.

/**
 * Selects split based on residuals for the given dataset.
 */
public final ClassifierSplitModel selectModel(Instances data, double[][] dataZs, double[][] dataWs) throws Exception {
    int numAttributes = data.numAttributes();
    if (numAttributes < 2)
        throw new Exception("Can't select Model without non-class attribute");
    if (data.numInstances() < m_minNumInstances)
        return new NoSplit(new Distribution(data));
    double bestGain = -Double.MAX_VALUE;
    int bestAttribute = -1;
    // try split on every attribute
    for (int i = 0; i < numAttributes; i++) {
        if (i != data.classIndex()) {
            // build split
            ResidualSplit split = new ResidualSplit(i);
            split.buildClassifier(data, dataZs, dataWs);
            if (split.checkModel(m_minNumInstances)) {
                // evaluate split
                double gain = split.entropyGain();
                if (gain > bestGain) {
                    bestGain = gain;
                    bestAttribute = i;
                }
            }
        }
    }
    if (bestGain >= m_minInfoGain) {
        // return best split
        ResidualSplit split = new ResidualSplit(bestAttribute);
        split.buildClassifier(data, dataZs, dataWs);
        return split;
    } else {
        // could not find any split with enough information gain
        return new NoSplit(new Distribution(data));
    }
}
Also used : Distribution(weka.classifiers.trees.j48.Distribution) NoSplit(weka.classifiers.trees.j48.NoSplit)

Example 5 with NoSplit

use of weka.classifiers.trees.j48.NoSplit in project umple by umple.

the class C45PruneableDecList method pruneEnd.

/**
 * Prunes the end of the rule.
 */
protected void pruneEnd() {
    double errorsLeaf, errorsTree;
    errorsTree = getEstimatedErrorsForTree();
    errorsLeaf = getEstimatedErrorsForLeaf();
    if (Utils.smOrEq(errorsLeaf, errorsTree + 0.1)) {
        // +0.1 as in C4.5
        m_isLeaf = true;
        m_sons = null;
        m_localModel = new NoSplit(localModel().distribution());
    }
}
Also used : NoSplit(weka.classifiers.trees.j48.NoSplit)

Aggregations

NoSplit (weka.classifiers.trees.j48.NoSplit)6 Distribution (weka.classifiers.trees.j48.Distribution)4 Instances (weka.core.Instances)3