Search in sources :

Example 6 with ColumnSample

use of org.knime.base.node.mine.treeensemble2.sample.column.ColumnSample in project knime-core by knime.

the class TreeLearnerClassification method learnSingleTreeRecursive.

private TreeModelClassification learnSingleTreeRecursive(final ExecutionMonitor exec, final RandomData rd) throws CanceledExecutionException {
    final TreeData data = getData();
    final RowSample rowSampling = getRowSampling();
    final TreeEnsembleLearnerConfiguration config = getConfig();
    final TreeTargetNominalColumnData targetColumn = (TreeTargetNominalColumnData) data.getTargetColumn();
    final // new RootDataMem(rowSampling, getIndexManager());
    DataMemberships rootDataMemberships = new RootDataMemberships(rowSampling, data, getIndexManager());
    ClassificationPriors targetPriors = targetColumn.getDistribution(rootDataMemberships, config);
    BitSet forbiddenColumnSet = new BitSet(data.getNrAttributes());
    // final DataMemberships rootDataMemberships = new IntArrayDataMemberships(sampleWeights, data);
    final TreeNodeSignature rootSignature = TreeNodeSignature.ROOT_SIGNATURE;
    final ColumnSample rootColumnSample = getColSamplingStrategy().getColumnSampleForTreeNode(rootSignature);
    TreeNodeClassification rootNode = null;
    rootNode = buildTreeNode(exec, 0, rootDataMemberships, rootColumnSample, rootSignature, targetPriors, forbiddenColumnSet);
    assert forbiddenColumnSet.cardinality() == 0;
    rootNode.setTreeNodeCondition(TreeNodeTrueCondition.INSTANCE);
    return new TreeModelClassification(rootNode);
}
Also used : TreeEnsembleLearnerConfiguration(org.knime.base.node.mine.treeensemble2.node.learner.TreeEnsembleLearnerConfiguration) RootDataMemberships(org.knime.base.node.mine.treeensemble2.data.memberships.RootDataMemberships) TreeNodeClassification(org.knime.base.node.mine.treeensemble2.model.TreeNodeClassification) ColumnSample(org.knime.base.node.mine.treeensemble2.sample.column.ColumnSample) BitSet(java.util.BitSet) TreeData(org.knime.base.node.mine.treeensemble2.data.TreeData) RowSample(org.knime.base.node.mine.treeensemble2.sample.row.RowSample) TreeNodeSignature(org.knime.base.node.mine.treeensemble2.model.TreeNodeSignature) TreeTargetNominalColumnData(org.knime.base.node.mine.treeensemble2.data.TreeTargetNominalColumnData) ClassificationPriors(org.knime.base.node.mine.treeensemble2.data.ClassificationPriors) TreeModelClassification(org.knime.base.node.mine.treeensemble2.model.TreeModelClassification)

Example 7 with ColumnSample

use of org.knime.base.node.mine.treeensemble2.sample.column.ColumnSample in project knime-core by knime.

the class AllColumnSampleStrategyTest method testGetColumnSampleForTreeNodeTest.

/**
 * Tests the method {@link AllColumnSampleStrategy#getColumnSampleForTreeNode(org.knime.base.node.mine.treeensemble2.model.TreeNodeSignature)}
 * This also tests the class {@link AllColumnSample}
 *
 * @throws Exception
 */
@Test
public void testGetColumnSampleForTreeNodeTest() throws Exception {
    final AllColumnSampleStrategy allColStrategy = new AllColumnSampleStrategy(createTreeData());
    final TreeNodeSignatureFactory sigFac = createSignatureFactory();
    TreeNodeSignature rootSig = sigFac.getRootSignature();
    ColumnSample sample = allColStrategy.getColumnSampleForTreeNode(rootSig);
    assertEquals("Wrong number of columns in sample.", TREE_DATA_SIZE, sample.getNumCols());
    int[] colIndices = new int[TREE_DATA_SIZE];
    for (int i = 0; i < colIndices.length; i++) {
        colIndices[i] = i;
    }
    assertArrayEquals(colIndices, sample.getColumnIndices());
    TreeNodeSignature childSig = sigFac.getChildSignatureFor(rootSig, (byte) 0);
    sample = allColStrategy.getColumnSampleForTreeNode(childSig);
    assertEquals("Wrong number of columns in sample.", TREE_DATA_SIZE, sample.getNumCols());
    assertArrayEquals(colIndices, sample.getColumnIndices());
}
Also used : TreeNodeSignature(org.knime.base.node.mine.treeensemble2.model.TreeNodeSignature) TreeNodeSignatureFactory(org.knime.base.node.mine.treeensemble2.learner.TreeNodeSignatureFactory) Test(org.junit.Test)

Example 8 with ColumnSample

use of org.knime.base.node.mine.treeensemble2.sample.column.ColumnSample in project knime-core by knime.

the class RFSubsetColumnSampleStrategyTest method testGetColumnSampleForTreeNode.

/**
 * Tests the method {@link RFSubsetColumnSampleStrategy#getColumnSampleForTreeNode(org.knime.base.node.mine.treeensemble2.model.TreeNodeSignature)}
 *
 * @throws Exception
 */
@Test
public void testGetColumnSampleForTreeNode() throws Exception {
    final RFSubsetColumnSampleStrategy strategy = new RFSubsetColumnSampleStrategy(createTreeData(), RD, 5);
    final TreeNodeSignatureFactory sigFac = createSignatureFactory();
    TreeNodeSignature rootSig = sigFac.getRootSignature();
    ColumnSample sample = strategy.getColumnSampleForTreeNode(rootSig);
    assertEquals("Wrong number of columns in sample.", 5, sample.getNumCols());
    int[] colIndices0 = sample.getColumnIndices();
    sample = strategy.getColumnSampleForTreeNode(sigFac.getChildSignatureFor(rootSig, (byte) 0));
    assertEquals("Wrong number of columns in sample.", 5, sample.getNumCols());
    int[] colIndices1 = sample.getColumnIndices();
    sample = strategy.getColumnSampleForTreeNode(sigFac.getChildSignatureFor(rootSig, (byte) 1));
    assertEquals("Wrong number of columns in sample.", 5, sample.getNumCols());
    int[] colIndices2 = sample.getColumnIndices();
    assertEquals("sample sizes differ.", colIndices0.length, colIndices1.length);
    assertEquals("sample sizes differ.", colIndices0.length, colIndices2.length);
    assertEquals("sample sizes differ.", colIndices1.length, colIndices2.length);
    boolean match = true;
    for (int i = 0; i < colIndices0.length; i++) {
        match = match && colIndices0[i] == colIndices1[i] && colIndices0[i] == colIndices2[i];
        if (!match) {
            break;
        }
    }
    assertFalse("It is very unlikely that we get 3 times the same column sample.", match);
}
Also used : TreeNodeSignature(org.knime.base.node.mine.treeensemble2.model.TreeNodeSignature) TreeNodeSignatureFactory(org.knime.base.node.mine.treeensemble2.learner.TreeNodeSignatureFactory) Test(org.junit.Test)

Example 9 with ColumnSample

use of org.knime.base.node.mine.treeensemble2.sample.column.ColumnSample in project knime-core by knime.

the class Surrogates method learnSurrogates.

/**
 * This function searches for splits in the remaining columns of <b>colSample</b>. It is doing so by taking the
 * directions (left or right) that are induced by the <b>bestSplit</b> as new target.
 *
 * @param dataMemberships provides information which rows are in the current branch
 * @param bestSplit the best split for the current node
 * @param oldData the TreeData object that contains all attributes and the target
 * @param colSample provides information which columns are to be considered as surrogates
 * @param config the configuration
 * @param rd
 * @return a SurrogateSplit that contains the conditions for both children
 */
public static SurrogateSplit learnSurrogates(final DataMemberships dataMemberships, final SplitCandidate bestSplit, final TreeData oldData, final ColumnSample colSample, final TreeEnsembleLearnerConfiguration config, final RandomData rd) {
    TreeAttributeColumnData bestSplitCol = bestSplit.getColumnData();
    TreeNodeCondition[] bestSplitChildConditions = bestSplit.getChildConditions();
    // calculate new Target
    BitSet bestSplitLeft = bestSplitCol.updateChildMemberships(bestSplitChildConditions[0], dataMemberships);
    BitSet bestSplitRight = bestSplitCol.updateChildMemberships(bestSplitChildConditions[1], dataMemberships);
    // create DataMemberships that only contains the instances that are not missed by bestSplit
    BitSet surrogateBitSet = (BitSet) bestSplitLeft.clone();
    surrogateBitSet.or(bestSplitRight);
    DataMemberships surrogateCalcDataMemberships = dataMemberships.createChildMemberships(surrogateBitSet);
    TreeTargetNominalColumnData newTarget = createNewTargetColumn(bestSplitLeft, bestSplitRight, oldData.getNrRows(), surrogateCalcDataMemberships);
    // find best splits on new target
    ArrayList<SplitCandidate> candidates = new ArrayList<SplitCandidate>();
    ClassificationPriors newTargetPriors = newTarget.getDistribution(surrogateCalcDataMemberships, config);
    for (TreeAttributeColumnData col : colSample) {
        if (col != bestSplitCol) {
            SplitCandidate candidate = col.calcBestSplitClassification(surrogateCalcDataMemberships, newTargetPriors, newTarget, rd);
            if (candidate != null) {
                candidates.add(candidate);
            }
        }
    }
    SplitCandidate[] candidatesWithBestAtHead = new SplitCandidate[candidates.size() + 1];
    candidatesWithBestAtHead[0] = bestSplit;
    for (int i = 1; i < candidatesWithBestAtHead.length; i++) {
        candidatesWithBestAtHead[i] = candidates.get(i - 1);
    }
    return calculateSurrogates(dataMemberships, candidatesWithBestAtHead);
}
Also used : TreeAttributeColumnData(org.knime.base.node.mine.treeensemble2.data.TreeAttributeColumnData) BitSet(java.util.BitSet) ArrayList(java.util.ArrayList) TreeNodeCondition(org.knime.base.node.mine.treeensemble2.model.TreeNodeCondition) DataMemberships(org.knime.base.node.mine.treeensemble2.data.memberships.DataMemberships) TreeTargetNominalColumnData(org.knime.base.node.mine.treeensemble2.data.TreeTargetNominalColumnData) ClassificationPriors(org.knime.base.node.mine.treeensemble2.data.ClassificationPriors)

Example 10 with ColumnSample

use of org.knime.base.node.mine.treeensemble2.sample.column.ColumnSample in project knime-core by knime.

the class TreeLearnerClassification method buildTreeNode.

private TreeNodeClassification buildTreeNode(final ExecutionMonitor exec, final int currentDepth, final DataMemberships dataMemberships, final ColumnSample columnSample, final TreeNodeSignature treeNodeSignature, final ClassificationPriors targetPriors, final BitSet forbiddenColumnSet) throws CanceledExecutionException {
    final TreeData data = getData();
    final TreeEnsembleLearnerConfiguration config = getConfig();
    exec.checkCanceled();
    final boolean useSurrogates = getConfig().getMissingValueHandling() == MissingValueHandling.Surrogate;
    TreeNodeCondition[] childConditions;
    boolean markAttributeAsForbidden = false;
    final TreeTargetNominalColumnData targetColumn = (TreeTargetNominalColumnData) data.getTargetColumn();
    TreeNodeClassification[] childNodes;
    int attributeIndex = -1;
    if (useSurrogates) {
        SplitCandidate[] candidates = findBestSplitsClassification(currentDepth, dataMemberships, columnSample, treeNodeSignature, targetPriors, forbiddenColumnSet);
        if (candidates == null) {
            return new TreeNodeClassification(treeNodeSignature, targetPriors, config);
        }
        SurrogateSplit surrogateSplit = Surrogates.learnSurrogates(dataMemberships, candidates[0], data, columnSample, config, getRandomData());
        childConditions = surrogateSplit.getChildConditions();
        BitSet[] childMarkers = surrogateSplit.getChildMarkers();
        childNodes = new TreeNodeClassification[2];
        for (int i = 0; i < 2; i++) {
            DataMemberships childMemberships = dataMemberships.createChildMemberships(childMarkers[i]);
            ClassificationPriors childTargetPriors = targetColumn.getDistribution(childMemberships, config);
            TreeNodeSignature childSignature = getSignatureFactory().getChildSignatureFor(treeNodeSignature, (byte) i);
            ColumnSample childColumnSample = getColSamplingStrategy().getColumnSampleForTreeNode(childSignature);
            childNodes[i] = buildTreeNode(exec, currentDepth + 1, childMemberships, childColumnSample, childSignature, childTargetPriors, forbiddenColumnSet);
            childNodes[i].setTreeNodeCondition(childConditions[i]);
        }
    } else {
        // handle non surrogate case
        SplitCandidate bestSplit = findBestSplitClassification(currentDepth, dataMemberships, columnSample, treeNodeSignature, targetPriors, forbiddenColumnSet);
        if (bestSplit == null) {
            return new TreeNodeClassification(treeNodeSignature, targetPriors, config);
        }
        TreeAttributeColumnData splitColumn = bestSplit.getColumnData();
        attributeIndex = splitColumn.getMetaData().getAttributeIndex();
        markAttributeAsForbidden = !bestSplit.canColumnBeSplitFurther();
        forbiddenColumnSet.set(attributeIndex, markAttributeAsForbidden);
        childConditions = bestSplit.getChildConditions();
        childNodes = new TreeNodeClassification[childConditions.length];
        if (childConditions.length > Short.MAX_VALUE) {
            throw new RuntimeException("Too many children when splitting " + "attribute " + bestSplit.getColumnData() + " (maximum supported: " + Short.MAX_VALUE + "): " + childConditions.length);
        }
        // Build child nodes
        for (int i = 0; i < childConditions.length; i++) {
            DataMemberships childMemberships = null;
            TreeNodeCondition cond = childConditions[i];
            childMemberships = dataMemberships.createChildMemberships(splitColumn.updateChildMemberships(cond, dataMemberships));
            ClassificationPriors childTargetPriors = targetColumn.getDistribution(childMemberships, config);
            TreeNodeSignature childSignature = treeNodeSignature.createChildSignature((byte) i);
            ColumnSample childColumnSample = getColSamplingStrategy().getColumnSampleForTreeNode(childSignature);
            childNodes[i] = buildTreeNode(exec, currentDepth + 1, childMemberships, childColumnSample, childSignature, childTargetPriors, forbiddenColumnSet);
            childNodes[i].setTreeNodeCondition(cond);
        }
    }
    if (markAttributeAsForbidden) {
        forbiddenColumnSet.set(attributeIndex, false);
    }
    return new TreeNodeClassification(treeNodeSignature, targetPriors, childNodes, getConfig());
}
Also used : TreeEnsembleLearnerConfiguration(org.knime.base.node.mine.treeensemble2.node.learner.TreeEnsembleLearnerConfiguration) TreeNodeClassification(org.knime.base.node.mine.treeensemble2.model.TreeNodeClassification) TreeAttributeColumnData(org.knime.base.node.mine.treeensemble2.data.TreeAttributeColumnData) ColumnSample(org.knime.base.node.mine.treeensemble2.sample.column.ColumnSample) BitSet(java.util.BitSet) TreeNodeSignature(org.knime.base.node.mine.treeensemble2.model.TreeNodeSignature) RootDataMemberships(org.knime.base.node.mine.treeensemble2.data.memberships.RootDataMemberships) DataMemberships(org.knime.base.node.mine.treeensemble2.data.memberships.DataMemberships) TreeData(org.knime.base.node.mine.treeensemble2.data.TreeData) TreeNodeCondition(org.knime.base.node.mine.treeensemble2.model.TreeNodeCondition) TreeTargetNominalColumnData(org.knime.base.node.mine.treeensemble2.data.TreeTargetNominalColumnData) ClassificationPriors(org.knime.base.node.mine.treeensemble2.data.ClassificationPriors)

Aggregations

TreeAttributeColumnData (org.knime.base.node.mine.treeensemble2.data.TreeAttributeColumnData)8 TreeData (org.knime.base.node.mine.treeensemble2.data.TreeData)8 TreeNodeSignature (org.knime.base.node.mine.treeensemble2.model.TreeNodeSignature)8 TreeEnsembleLearnerConfiguration (org.knime.base.node.mine.treeensemble2.node.learner.TreeEnsembleLearnerConfiguration)8 BitSet (java.util.BitSet)5 RandomData (org.apache.commons.math.random.RandomData)5 TreeTargetNominalColumnData (org.knime.base.node.mine.treeensemble2.data.TreeTargetNominalColumnData)5 ColumnSample (org.knime.base.node.mine.treeensemble2.sample.column.ColumnSample)5 TreeTargetNumericColumnData (org.knime.base.node.mine.treeensemble2.data.TreeTargetNumericColumnData)4 DataMemberships (org.knime.base.node.mine.treeensemble2.data.memberships.DataMemberships)4 RootDataMemberships (org.knime.base.node.mine.treeensemble2.data.memberships.RootDataMemberships)4 Test (org.junit.Test)3 ClassificationPriors (org.knime.base.node.mine.treeensemble2.data.ClassificationPriors)3 TreeNodeSignatureFactory (org.knime.base.node.mine.treeensemble2.learner.TreeNodeSignatureFactory)3 TreeNodeCondition (org.knime.base.node.mine.treeensemble2.model.TreeNodeCondition)3 ArrayList (java.util.ArrayList)2 Comparator (java.util.Comparator)2 RegressionPriors (org.knime.base.node.mine.treeensemble2.data.RegressionPriors)2 TreeNodeClassification (org.knime.base.node.mine.treeensemble2.model.TreeNodeClassification)2 TreeNodeRegression (org.knime.base.node.mine.treeensemble2.model.TreeNodeRegression)2