Search in sources :

Example 51 with TreeEnsembleLearnerConfiguration

use of org.knime.base.node.mine.treeensemble2.node.learner.TreeEnsembleLearnerConfiguration in project knime-core by knime.

the class TreeNominalColumnDataTest method testCalcBestSplitClassificationMultiwayXGBoostMissingValueHandling.

/**
 * This method tests the XGBoost missing value handling for classification in case of multiway splits.
 *
 * @throws Exception
 */
@Test
public void testCalcBestSplitClassificationMultiwayXGBoostMissingValueHandling() throws Exception {
    final TreeEnsembleLearnerConfiguration config = createConfig(false);
    config.setUseBinaryNominalSplits(false);
    config.setMissingValueHandling(MissingValueHandling.XGBoost);
    final TestDataGenerator dataGen = new TestDataGenerator(config);
    final RandomData rd = config.createRandomData();
    // test the case that there are no missing values in the training data
    final String noMissingCSV = "a, a, a, b, b, b, b, c, c";
    final String noMissingTarget = "A, B, B, C, C, C, B, A, B";
    TreeNominalColumnData dataCol = dataGen.createNominalAttributeColumn(noMissingCSV, "noMissings", 0);
    TreeTargetNominalColumnData targetCol = TestDataGenerator.createNominalTargetColumn(noMissingTarget);
    DataMemberships dataMem = createMockDataMemberships(targetCol.getNrRows());
    SplitCandidate split = dataCol.calcBestSplitClassification(dataMem, targetCol.getDistribution(dataMem, config), targetCol, rd);
    assertNotNull("There is a possible split.", split);
    assertEquals("Incorrect gain.", 0.216, split.getGainValue(), 1e-3);
    assertThat(split, instanceOf(NominalMultiwaySplitCandidate.class));
    NominalMultiwaySplitCandidate nomSplit = (NominalMultiwaySplitCandidate) split;
    assertTrue("No missing values in the column.", nomSplit.getMissedRows().isEmpty());
    TreeNodeNominalCondition[] conditions = nomSplit.getChildConditions();
    assertEquals("Wrong number of child conditions.", 3, conditions.length);
    assertEquals("Wrong value in child condition.", "a", conditions[0].getValue());
    assertEquals("Wrong value in child condition.", "b", conditions[1].getValue());
    assertEquals("Wrong value in child condition.", "c", conditions[2].getValue());
    assertFalse("Missing values should be sent to the majority child (i.e. b)", conditions[0].acceptsMissings());
    assertTrue("Missing values should be sent to the majority child (i.e. b)", conditions[1].acceptsMissings());
    assertFalse("Missing values should be sent to the majority child (i.e. b)", conditions[2].acceptsMissings());
    // test the case that there are missing values in the training data
    final String missingCSV = "a, a, a, b, b, b, b, c, c, ?";
    final String missingTarget = "A, B, B, C, C, C, B, A, B, C";
    dataCol = dataGen.createNominalAttributeColumn(missingCSV, "missings", 0);
    targetCol = TestDataGenerator.createNominalTargetColumn(missingTarget);
    dataMem = createMockDataMemberships(targetCol.getNrRows());
    split = dataCol.calcBestSplitClassification(dataMem, targetCol.getDistribution(dataMem, config), targetCol, rd);
    assertNotNull("There is a possible split.", split);
    assertEquals("Incorrect gain.", 0.2467, split.getGainValue(), 1e-3);
    assertThat(split, instanceOf(NominalMultiwaySplitCandidate.class));
    nomSplit = (NominalMultiwaySplitCandidate) split;
    assertTrue("Split should handle missing values.", nomSplit.getMissedRows().isEmpty());
    conditions = nomSplit.getChildConditions();
    assertEquals("Wrong number of child conditions.", 3, conditions.length);
    assertEquals("Wrong value in child condition.", "a", conditions[0].getValue());
    assertEquals("Wrong value in child condition.", "b", conditions[1].getValue());
    assertEquals("Wrong value in child condition.", "c", conditions[2].getValue());
    assertFalse("Missing values should be sent to b", conditions[0].acceptsMissings());
    assertTrue("Missing values should be sent to b", conditions[1].acceptsMissings());
    assertFalse("Missing values should be sent to b", conditions[2].acceptsMissings());
}
Also used : TreeEnsembleLearnerConfiguration(org.knime.base.node.mine.treeensemble2.node.learner.TreeEnsembleLearnerConfiguration) RandomData(org.apache.commons.math.random.RandomData) TreeNodeNominalCondition(org.knime.base.node.mine.treeensemble2.model.TreeNodeNominalCondition) NominalMultiwaySplitCandidate(org.knime.base.node.mine.treeensemble2.learner.NominalMultiwaySplitCandidate) NominalMultiwaySplitCandidate(org.knime.base.node.mine.treeensemble2.learner.NominalMultiwaySplitCandidate) NominalBinarySplitCandidate(org.knime.base.node.mine.treeensemble2.learner.NominalBinarySplitCandidate) SplitCandidate(org.knime.base.node.mine.treeensemble2.learner.SplitCandidate) DataMemberships(org.knime.base.node.mine.treeensemble2.data.memberships.DataMemberships) RootDataMemberships(org.knime.base.node.mine.treeensemble2.data.memberships.RootDataMemberships) Test(org.junit.Test)

Example 52 with TreeEnsembleLearnerConfiguration

use of org.knime.base.node.mine.treeensemble2.node.learner.TreeEnsembleLearnerConfiguration in project knime-core by knime.

the class TreeNominalColumnDataTest method testCalcBestSplitClassificationBinaryPCA.

/**
 * Tests the method
 * {@link TreeNominalColumnData#calcBestSplitClassification(DataMemberships, ClassificationPriors, TreeTargetNominalColumnData, RandomData)}
 * using binary splits. In this test case the data has more than two classes and the used algorithm is therefore PCA
 * based.
 *
 * @throws Exception
 */
@Test
public void testCalcBestSplitClassificationBinaryPCA() throws Exception {
    TreeEnsembleLearnerConfiguration config = createConfig(false);
    Pair<TreeNominalColumnData, TreeTargetNominalColumnData> pcaData = createPCATestData(config);
    TreeNominalColumnData columnData = pcaData.getFirst();
    TreeTargetNominalColumnData targetData = pcaData.getSecond();
    TreeData treeData = createTreeData(pcaData);
    assertEquals(SplitCriterion.Gini, config.getSplitCriterion());
    double[] rowWeights = new double[targetData.getNrRows()];
    Arrays.fill(rowWeights, 1.0);
    IDataIndexManager indexManager = new DefaultDataIndexManager(treeData);
    DataMemberships dataMemberships = new RootDataMemberships(rowWeights, treeData, indexManager);
    ClassificationPriors priors = targetData.getDistribution(rowWeights, config);
    SplitCandidate splitCandidate = columnData.calcBestSplitClassification(dataMemberships, priors, targetData, null);
    assertNotNull(splitCandidate);
    assertThat(splitCandidate, instanceOf(NominalBinarySplitCandidate.class));
    assertTrue(splitCandidate.canColumnBeSplitFurther());
    assertEquals(0.0659, splitCandidate.getGainValue(), 0.0001);
    NominalBinarySplitCandidate binarySplitCandidate = (NominalBinarySplitCandidate) splitCandidate;
    TreeNodeNominalBinaryCondition[] childConditions = binarySplitCandidate.getChildConditions();
    assertEquals(2, childConditions.length);
    assertArrayEquals(new String[] { "E" }, childConditions[0].getValues());
    assertArrayEquals(new String[] { "E" }, childConditions[1].getValues());
    assertEquals(SetLogic.IS_NOT_IN, childConditions[0].getSetLogic());
    assertEquals(SetLogic.IS_IN, childConditions[1].getSetLogic());
}
Also used : TreeEnsembleLearnerConfiguration(org.knime.base.node.mine.treeensemble2.node.learner.TreeEnsembleLearnerConfiguration) RootDataMemberships(org.knime.base.node.mine.treeensemble2.data.memberships.RootDataMemberships) IDataIndexManager(org.knime.base.node.mine.treeensemble2.data.memberships.IDataIndexManager) NominalMultiwaySplitCandidate(org.knime.base.node.mine.treeensemble2.learner.NominalMultiwaySplitCandidate) NominalBinarySplitCandidate(org.knime.base.node.mine.treeensemble2.learner.NominalBinarySplitCandidate) SplitCandidate(org.knime.base.node.mine.treeensemble2.learner.SplitCandidate) DefaultDataIndexManager(org.knime.base.node.mine.treeensemble2.data.memberships.DefaultDataIndexManager) DataMemberships(org.knime.base.node.mine.treeensemble2.data.memberships.DataMemberships) RootDataMemberships(org.knime.base.node.mine.treeensemble2.data.memberships.RootDataMemberships) TreeNodeNominalBinaryCondition(org.knime.base.node.mine.treeensemble2.model.TreeNodeNominalBinaryCondition) NominalBinarySplitCandidate(org.knime.base.node.mine.treeensemble2.learner.NominalBinarySplitCandidate) Test(org.junit.Test)

Example 53 with TreeEnsembleLearnerConfiguration

use of org.knime.base.node.mine.treeensemble2.node.learner.TreeEnsembleLearnerConfiguration in project knime-core by knime.

the class TreeNominalColumnDataTest method testUpdateChildMemberships.

/**
 * Tests the method
 * {@link TreeNominalColumnData#updateChildMemberships(org.knime.base.node.mine.treeensemble2.model.TreeNodeCondition, DataMemberships)}
 * .
 *
 * @throws Exception
 */
@Test
public void testUpdateChildMemberships() throws Exception {
    // in this case it doesn't matter if we use regression or classification (as well as binary and multiway splits)
    final TreeEnsembleLearnerConfiguration config = createConfig(true);
    final TestDataGenerator dataGen = new TestDataGenerator(config);
    final String dataCSV = "A, A, A, A, B, B, B, C, C, C, ?, ?";
    TreeNominalColumnData col = dataGen.createNominalAttributeColumn(dataCSV, "test-col", 0);
    final int[] indices = new int[12];
    final double[] weights = new double[indices.length];
    for (int i = 0; i < indices.length; i++) {
        indices[i] = i;
        weights[i] = 1.0;
    }
    final DataMemberships dataMem = new MockDataColMem(indices, indices, weights);
    TreeNodeNominalBinaryCondition binCond = new TreeNodeNominalBinaryCondition(col.getMetaData(), BigInteger.valueOf(2), true, false);
    BitSet expected = new BitSet(12);
    BitSet inChild = col.updateChildMemberships(binCond, dataMem);
    expected.set(4, 7);
    assertEquals("The produced BitSet is incorrect.", expected, inChild);
    binCond = new TreeNodeNominalBinaryCondition(col.getMetaData(), BigInteger.valueOf(2), true, true);
    expected.clear();
    expected.set(4, 7);
    expected.set(10, 12);
    inChild = col.updateChildMemberships(binCond, dataMem);
    assertEquals("The produced BitSet is incorrect.", expected, inChild);
    binCond = new TreeNodeNominalBinaryCondition(col.getMetaData(), BigInteger.valueOf(2), false, false);
    expected.clear();
    expected.set(0, 4);
    expected.set(7, 10);
    inChild = col.updateChildMemberships(binCond, dataMem);
    assertEquals("The produced BitSet is incorrect.", expected, inChild);
    binCond = new TreeNodeNominalBinaryCondition(col.getMetaData(), BigInteger.valueOf(2), false, true);
    expected.clear();
    expected.set(0, 4);
    expected.set(7, 12);
    inChild = col.updateChildMemberships(binCond, dataMem);
    assertEquals("The produced BitSet is incorrect.", expected, inChild);
    binCond = new TreeNodeNominalBinaryCondition(col.getMetaData(), BigInteger.valueOf(5), true, false);
    expected.clear();
    expected.set(0, 4);
    expected.set(7, 10);
    inChild = col.updateChildMemberships(binCond, dataMem);
    assertEquals("The produced BitSet is incorrect.", expected, inChild);
    binCond = new TreeNodeNominalBinaryCondition(col.getMetaData(), BigInteger.valueOf(5), true, true);
    expected.clear();
    expected.set(0, 4);
    expected.set(7, 12);
    inChild = col.updateChildMemberships(binCond, dataMem);
    assertEquals("The produced BitSet is incorrect.", expected, inChild);
    TreeNodeNominalCondition multiCond = new TreeNodeNominalCondition(col.getMetaData(), 0, false);
    expected.clear();
    expected.set(0, 4);
    inChild = col.updateChildMemberships(multiCond, dataMem);
    assertEquals("The produced BitSet is incorrect.", expected, inChild);
    multiCond = new TreeNodeNominalCondition(col.getMetaData(), 0, true);
    expected.clear();
    expected.set(0, 4);
    expected.set(10, 12);
    inChild = col.updateChildMemberships(multiCond, dataMem);
    assertEquals("The produced BitSet is incorrect.", expected, inChild);
    multiCond = new TreeNodeNominalCondition(col.getMetaData(), 2, false);
    expected.clear();
    expected.set(7, 10);
    inChild = col.updateChildMemberships(multiCond, dataMem);
    assertEquals("The produced BitSet is incorrect.", expected, inChild);
    multiCond = new TreeNodeNominalCondition(col.getMetaData(), 2, true);
    expected.clear();
    expected.set(7, 12);
    inChild = col.updateChildMemberships(multiCond, dataMem);
    assertEquals("The produced BitSet is incorrect.", expected, inChild);
}
Also used : TreeEnsembleLearnerConfiguration(org.knime.base.node.mine.treeensemble2.node.learner.TreeEnsembleLearnerConfiguration) TreeNodeNominalCondition(org.knime.base.node.mine.treeensemble2.model.TreeNodeNominalCondition) TreeNodeNominalBinaryCondition(org.knime.base.node.mine.treeensemble2.model.TreeNodeNominalBinaryCondition) BitSet(java.util.BitSet) DataMemberships(org.knime.base.node.mine.treeensemble2.data.memberships.DataMemberships) RootDataMemberships(org.knime.base.node.mine.treeensemble2.data.memberships.RootDataMemberships) Test(org.junit.Test)

Example 54 with TreeEnsembleLearnerConfiguration

use of org.knime.base.node.mine.treeensemble2.node.learner.TreeEnsembleLearnerConfiguration in project knime-core by knime.

the class TreeTargetNominalColumnDataTest method testGetDistribution.

/**
 * Tests the {@link TreeTargetNominalColumnData#getDistribution(DataMemberships, TreeEnsembleLearnerConfiguration)}
 * and {@link TreeTargetNominalColumnData#getDistribution(double[], TreeEnsembleLearnerConfiguration)} methods.
 * @throws InvalidSettingsException
 */
@Test
public void testGetDistribution() throws InvalidSettingsException {
    String targetCSV = "A,A,A,B,B,B,A";
    String attributeCSV = "1,2,3,4,5,6,7";
    TreeEnsembleLearnerConfiguration config = new TreeEnsembleLearnerConfiguration(false);
    TestDataGenerator dataGen = new TestDataGenerator(config);
    TreeTargetNominalColumnData target = TestDataGenerator.createNominalTargetColumn(targetCSV);
    TreeNumericColumnData attribute = dataGen.createNumericAttributeColumn(attributeCSV, "test-col", 0);
    TreeData data = new TreeData(new TreeAttributeColumnData[] { attribute }, target, TreeType.Ordinary);
    double[] weights = new double[7];
    Arrays.fill(weights, 1.0);
    DataMemberships rootMemberships = new RootDataMemberships(weights, data, new DefaultDataIndexManager(data));
    // Gini
    config.setSplitCriterion(SplitCriterion.Gini);
    double expectedGini = 0.4897959184;
    double[] expectedDistribution = new double[] { 4.0, 3.0 };
    ClassificationPriors giniPriorsDatMem = target.getDistribution(rootMemberships, config);
    assertEquals(expectedGini, giniPriorsDatMem.getPriorImpurity(), DELTA);
    assertArrayEquals(expectedDistribution, giniPriorsDatMem.getDistribution(), DELTA);
    ClassificationPriors giniPriorsWeights = target.getDistribution(weights, config);
    assertEquals(expectedGini, giniPriorsWeights.getPriorImpurity(), DELTA);
    assertArrayEquals(expectedDistribution, giniPriorsWeights.getDistribution(), DELTA);
    // Information Gain
    config.setSplitCriterion(SplitCriterion.InformationGain);
    double expectedEntropy = 0.985228136;
    ClassificationPriors igPriorsDatMem = target.getDistribution(rootMemberships, config);
    assertEquals(expectedEntropy, igPriorsDatMem.getPriorImpurity(), DELTA);
    assertArrayEquals(expectedDistribution, igPriorsDatMem.getDistribution(), DELTA);
    ClassificationPriors igPriorsWeights = target.getDistribution(weights, config);
    assertEquals(expectedEntropy, igPriorsWeights.getPriorImpurity(), DELTA);
    assertArrayEquals(expectedDistribution, igPriorsWeights.getDistribution(), DELTA);
    // Information Gain Ratio
    config.setSplitCriterion(SplitCriterion.InformationGainRatio);
    // prior impurity is the same as IG
    ClassificationPriors igrPriorsDatMem = target.getDistribution(rootMemberships, config);
    assertEquals(expectedEntropy, igrPriorsDatMem.getPriorImpurity(), DELTA);
    assertArrayEquals(expectedDistribution, igrPriorsDatMem.getDistribution(), DELTA);
    ClassificationPriors igrPriorsWeights = target.getDistribution(weights, config);
    assertEquals(expectedEntropy, igrPriorsWeights.getPriorImpurity(), DELTA);
    assertArrayEquals(expectedDistribution, igrPriorsWeights.getDistribution(), DELTA);
}
Also used : TreeEnsembleLearnerConfiguration(org.knime.base.node.mine.treeensemble2.node.learner.TreeEnsembleLearnerConfiguration) RootDataMemberships(org.knime.base.node.mine.treeensemble2.data.memberships.RootDataMemberships) RootDataMemberships(org.knime.base.node.mine.treeensemble2.data.memberships.RootDataMemberships) DataMemberships(org.knime.base.node.mine.treeensemble2.data.memberships.DataMemberships) DefaultDataIndexManager(org.knime.base.node.mine.treeensemble2.data.memberships.DefaultDataIndexManager) Test(org.junit.Test)

Example 55 with TreeEnsembleLearnerConfiguration

use of org.knime.base.node.mine.treeensemble2.node.learner.TreeEnsembleLearnerConfiguration in project knime-core by knime.

the class TreeNodeNumericConditionTest method testTestCondition.

/**
 * This method tests the
 * {@link TreeNodeNominalCondition#testCondition(org.knime.base.node.mine.treeensemble2.data.PredictorRecord)}
 * method.
 *
 * @throws Exception
 */
@Test
public void testTestCondition() throws Exception {
    final TreeEnsembleLearnerConfiguration config = new TreeEnsembleLearnerConfiguration(false);
    final TestDataGenerator dataGen = new TestDataGenerator(config);
    final TreeNumericColumnData col = dataGen.createNumericAttributeColumn("1,2,3,4,4,5,6,7", "testCol", 0);
    TreeNodeNumericCondition cond = new TreeNodeNumericCondition(col.getMetaData(), 3, NumericOperator.LessThanOrEqual, false);
    final Map<String, Object> map = Maps.newHashMap();
    final String colName = col.getMetaData().getAttributeName();
    map.put(colName, 2.5);
    final PredictorRecord record = new PredictorRecord(map);
    assertTrue("2.5 was falsely rejected.", cond.testCondition(record));
    map.clear();
    map.put(colName, 3);
    assertTrue("3 was falsely rejected.", cond.testCondition(record));
    map.clear();
    map.put(colName, 4);
    assertFalse("4 was falsely accepted.", cond.testCondition(record));
    map.clear();
    map.put(colName, PredictorRecord.NULL);
    assertFalse("Missing values were falsely accepted.", cond.testCondition(record));
    cond = new TreeNodeNumericCondition(col.getMetaData(), 3, NumericOperator.LessThanOrEqual, true);
    map.clear();
    map.put(colName, 2.5);
    assertTrue("2.5 was falsely rejected.", cond.testCondition(record));
    map.clear();
    map.put(colName, 3);
    assertTrue("3 was falsely rejected.", cond.testCondition(record));
    map.clear();
    map.put(colName, 4);
    assertFalse("4 was falsely accepted.", cond.testCondition(record));
    map.clear();
    map.put(colName, PredictorRecord.NULL);
    assertTrue("Missing values were falsely rejected.", cond.testCondition(record));
    cond = new TreeNodeNumericCondition(col.getMetaData(), 4, NumericOperator.LargerThan, false);
    map.clear();
    map.put(colName, 2.5);
    assertFalse("2.5 was falsely accepted.", cond.testCondition(record));
    map.clear();
    map.put(colName, 3);
    assertFalse("3 was falsely accepted.", cond.testCondition(record));
    map.clear();
    map.put(colName, 4);
    assertFalse("4 was falsely accepted.", cond.testCondition(record));
    map.clear();
    map.put(colName, 4.01);
    assertTrue("4.01 was falsely rejected.", cond.testCondition(record));
    map.clear();
    map.put(colName, PredictorRecord.NULL);
    assertFalse("Missing values were falsely accepted.", cond.testCondition(record));
    cond = new TreeNodeNumericCondition(col.getMetaData(), 4, NumericOperator.LargerThan, true);
    map.clear();
    map.put(colName, 2.5);
    assertFalse("2.5 was falsely accepted.", cond.testCondition(record));
    map.clear();
    map.put(colName, 3);
    assertFalse("3 was falsely accepted.", cond.testCondition(record));
    map.clear();
    map.put(colName, 4.01);
    assertTrue("4 was falsely rejected.", cond.testCondition(record));
    map.clear();
    map.put(colName, PredictorRecord.NULL);
    assertTrue("Missing values were falsely rejected.", cond.testCondition(record));
}
Also used : TreeEnsembleLearnerConfiguration(org.knime.base.node.mine.treeensemble2.node.learner.TreeEnsembleLearnerConfiguration) PredictorRecord(org.knime.base.node.mine.treeensemble2.data.PredictorRecord) TreeNumericColumnData(org.knime.base.node.mine.treeensemble2.data.TreeNumericColumnData) TestDataGenerator(org.knime.base.node.mine.treeensemble2.data.TestDataGenerator) Test(org.junit.Test)

Aggregations

TreeEnsembleLearnerConfiguration (org.knime.base.node.mine.treeensemble2.node.learner.TreeEnsembleLearnerConfiguration)62 Test (org.junit.Test)29 DataMemberships (org.knime.base.node.mine.treeensemble2.data.memberships.DataMemberships)27 RootDataMemberships (org.knime.base.node.mine.treeensemble2.data.memberships.RootDataMemberships)26 SplitCandidate (org.knime.base.node.mine.treeensemble2.learner.SplitCandidate)19 RandomData (org.apache.commons.math.random.RandomData)17 BitSet (java.util.BitSet)16 DefaultDataIndexManager (org.knime.base.node.mine.treeensemble2.data.memberships.DefaultDataIndexManager)15 NominalBinarySplitCandidate (org.knime.base.node.mine.treeensemble2.learner.NominalBinarySplitCandidate)15 IDataIndexManager (org.knime.base.node.mine.treeensemble2.data.memberships.IDataIndexManager)13 NominalMultiwaySplitCandidate (org.knime.base.node.mine.treeensemble2.learner.NominalMultiwaySplitCandidate)13 TreeData (org.knime.base.node.mine.treeensemble2.data.TreeData)10 TreeNodeNominalBinaryCondition (org.knime.base.node.mine.treeensemble2.model.TreeNodeNominalBinaryCondition)10 TestDataGenerator (org.knime.base.node.mine.treeensemble2.data.TestDataGenerator)9 TreeAttributeColumnData (org.knime.base.node.mine.treeensemble2.data.TreeAttributeColumnData)8 NumericSplitCandidate (org.knime.base.node.mine.treeensemble2.learner.NumericSplitCandidate)8 TreeNodeNumericCondition (org.knime.base.node.mine.treeensemble2.model.TreeNodeNumericCondition)7 NumericMissingSplitCandidate (org.knime.base.node.mine.treeensemble2.learner.NumericMissingSplitCandidate)6 TreeNodeNominalCondition (org.knime.base.node.mine.treeensemble2.model.TreeNodeNominalCondition)6 TreeTargetNominalColumnData (org.knime.base.node.mine.treeensemble2.data.TreeTargetNominalColumnData)5