Search in sources :

Example 11 with DoubleArrayVectorizer

use of in project ignite by apache.

the class LinearRegressionLSQRTrainerTest method testBigDataFit.

 * Tests {@code fit()} method on a big (100000 x 100) dataset.
public void testBigDataFit() {
    Random rnd = new Random(0);
    Map<Integer, double[]> data = new HashMap<>();
    double[] coef = new double[100];
    double intercept = rnd.nextDouble() * 10;
    for (int i = 0; i < 100000; i++) {
        double[] x = new double[coef.length + 1];
        for (int j = 0; j < coef.length; j++) x[j] = rnd.nextDouble() * 10;
        x[coef.length] = intercept;
        data.put(i, x);
    LinearRegressionLSQRTrainer trainer = new LinearRegressionLSQRTrainer();
    LinearRegressionModel mdl =, parts, new DoubleArrayVectorizer<Integer>().labeled(Vectorizer.LabelCoordinate.LAST));
    assertArrayEquals(coef, mdl.weights().getStorage().data(), 1e-6);
    assertEquals(intercept, mdl.intercept(), 1e-6);
Also used : DoubleArrayVectorizer( Random(java.util.Random) HashMap(java.util.HashMap) TrainerTest( Test(org.junit.Test)

Example 12 with DoubleArrayVectorizer

use of in project ignite by apache.

the class DecisionTreeClassificationTrainerIntegrationTest method testFit.

public void testFit() {
    int size = 100;
    CacheConfiguration<Integer, double[]> trainingSetCacheCfg = new CacheConfiguration<>();
    trainingSetCacheCfg.setAffinity(new RendezvousAffinityFunction(false, 10));
    IgniteCache<Integer, double[]> data = ignite.createCache(trainingSetCacheCfg);
    Random rnd = new Random(0);
    for (int i = 0; i < size; i++) {
        double x = rnd.nextDouble() - 0.5;
        data.put(i, new double[] { x, x > 0 ? 1 : 0 });
    DecisionTreeClassificationTrainer trainer = new DecisionTreeClassificationTrainer(1, 0);
    DecisionTreeModel tree =, data, new DoubleArrayVectorizer<Integer>().labeled(1));
    DecisionTreeNode decisionTreeNode = tree.getRootNode();
    assertTrue(decisionTreeNode instanceof DecisionTreeConditionalNode);
    DecisionTreeConditionalNode node = (DecisionTreeConditionalNode) decisionTreeNode;
    assertEquals(0, node.getThreshold(), 1e-3);
    assertTrue(node.getThenNode() instanceof DecisionTreeLeafNode);
    assertTrue(node.getElseNode() instanceof DecisionTreeLeafNode);
    DecisionTreeLeafNode thenNode = (DecisionTreeLeafNode) node.getThenNode();
    DecisionTreeLeafNode elseNode = (DecisionTreeLeafNode) node.getElseNode();
    assertEquals(1, thenNode.getVal(), 1e-10);
    assertEquals(0, elseNode.getVal(), 1e-10);
Also used : DoubleArrayVectorizer( Random(java.util.Random) RendezvousAffinityFunction(org.apache.ignite.cache.affinity.rendezvous.RendezvousAffinityFunction) CacheConfiguration(org.apache.ignite.configuration.CacheConfiguration) GridCommonAbstractTest(org.apache.ignite.testframework.junits.common.GridCommonAbstractTest) Test(org.junit.Test)

Example 13 with DoubleArrayVectorizer

use of in project ignite by apache.

the class GmmTrainerIntegrationTest method testFit.

public void testFit() {
    CacheConfiguration<Integer, double[]> trainingSetCacheCfg = new CacheConfiguration<>();
    trainingSetCacheCfg.setAffinity(new RendezvousAffinityFunction(false, 3));
    IgniteCache<Integer, double[]> data = ignite.createCache(trainingSetCacheCfg);
    data.put(0, new double[] { 1.0, 1.0, 1.0 });
    data.put(1, new double[] { 1.0, 2.0, 1.0 });
    data.put(2, new double[] { 2.0, 1.0, 1.0 });
    data.put(3, new double[] { -1.0, -1.0, 2.0 });
    data.put(4, new double[] { -1.0, -2.0, 2.0 });
    data.put(5, new double[] { -2.0, -1.0, 2.0 });
    GmmTrainer trainer = new GmmTrainer(2, 1).withInitialMeans(Arrays.asList(VectorUtils.of(1.0, 2.0), VectorUtils.of(-1.0, -2.0)));
    GmmModel model = CacheBasedDatasetBuilder<>(ignite, data), new DoubleArrayVectorizer<Integer>().labeled(Vectorizer.LabelCoordinate.LAST));
    Assert.assertEquals(2, model.countOfComponents());
    Assert.assertEquals(2, model.dimension());
    Assert.assertArrayEquals(new double[] { 1.33, 1.33 }, model.distributions().get(0).mean().asArray(), 1e-2);
    Assert.assertArrayEquals(new double[] { -1.33, -1.33 }, model.distributions().get(1).mean().asArray(), 1e-2);
Also used : DoubleArrayVectorizer( RendezvousAffinityFunction(org.apache.ignite.cache.affinity.rendezvous.RendezvousAffinityFunction) CacheConfiguration(org.apache.ignite.configuration.CacheConfiguration) GridCommonAbstractTest(org.apache.ignite.testframework.junits.common.GridCommonAbstractTest) Test(org.junit.Test)

Example 14 with DoubleArrayVectorizer

use of in project ignite by apache.

the class CompoundNaiveBayesTest method testLearnsAndPredictCorrectly.

 * Test.
public void testLearnsAndPredictCorrectly() {
    CompoundNaiveBayesTrainer trainer = new CompoundNaiveBayesTrainer().withPriorProbabilities(classProbabilities).withGaussianNaiveBayesTrainer(new GaussianNaiveBayesTrainer()).withGaussianFeatureIdsToSkip(asList(3, 4, 5, 6, 7)).withDiscreteNaiveBayesTrainer(new DiscreteNaiveBayesTrainer().setBucketThresholds(binarizedDataThresholds)).withDiscreteFeatureIdsToSkip(asList(0, 1, 2));
    CompoundNaiveBayesModel mdl = LocalDatasetBuilder<>(data, 2), new DoubleArrayVectorizer<Integer>().labeled(Vectorizer.LabelCoordinate.LAST));
    Vector observation1 = VectorUtils.of(5.92, 165, 10, 1, 1, 0, 0, 0);
    assertEquals(LABEL_1, mdl.predict(observation1), PRECISION);
    Vector observation2 = VectorUtils.of(6, 130, 8, 1, 0, 1, 1, 0);
    assertEquals(LABEL_2, mdl.predict(observation2), PRECISION);
Also used : DoubleArrayVectorizer( GaussianNaiveBayesTrainer( DiscreteNaiveBayesTrainer( Vector( Test(org.junit.Test)

Example 15 with DoubleArrayVectorizer

use of in project ignite by apache.

the class DiscreteNaiveBayesTest method testLearnsAndPredictCorrently.

 * Example from book Barber D. Bayesian reasoning and machine learning. Chapter 10.
public void testLearnsAndPredictCorrently() {
    double english = 1.;
    double scottish = 2.;
    Map<Integer, double[]> data = new HashMap<>();
    data.put(0, new double[] { 0, 0, 1, 1, 1, english });
    data.put(1, new double[] { 1, 0, 1, 1, 0, english });
    data.put(2, new double[] { 1, 1, 0, 0, 1, english });
    data.put(3, new double[] { 1, 1, 0, 0, 0, english });
    data.put(4, new double[] { 0, 1, 0, 0, 1, english });
    data.put(5, new double[] { 0, 0, 0, 1, 0, english });
    data.put(6, new double[] { 1, 0, 0, 1, 1, scottish });
    data.put(7, new double[] { 1, 1, 0, 0, 1, scottish });
    data.put(8, new double[] { 1, 1, 1, 1, 0, scottish });
    data.put(9, new double[] { 1, 1, 0, 1, 0, scottish });
    data.put(10, new double[] { 1, 1, 0, 1, 1, scottish });
    data.put(11, new double[] { 1, 0, 1, 1, 0, scottish });
    data.put(12, new double[] { 1, 0, 1, 0, 0, scottish });
    double[][] thresholds = new double[][] { { .5 }, { .5 }, { .5 }, { .5 }, { .5 } };
    DiscreteNaiveBayesTrainer trainer = new DiscreteNaiveBayesTrainer().setBucketThresholds(thresholds);
    DiscreteNaiveBayesModel mdl = LocalDatasetBuilder<>(data, 2), new DoubleArrayVectorizer<Integer>().labeled(Vectorizer.LabelCoordinate.LAST));
    Vector observation = VectorUtils.of(1, 0, 1, 1, 0);
    Assert.assertEquals(scottish, mdl.predict(observation), PRECISION);
Also used : DoubleArrayVectorizer( HashMap(java.util.HashMap) Vector( Test(org.junit.Test)


DoubleArrayVectorizer ( Test (org.junit.Test)23 HashMap (java.util.HashMap)17 Vector ( TrainerTest ( EuclideanDistance ( Ignite (org.apache.ignite.Ignite)5 RendezvousAffinityFunction (org.apache.ignite.cache.affinity.rendezvous.RendezvousAffinityFunction)5 CacheConfiguration (org.apache.ignite.configuration.CacheConfiguration)5 MeanAbsValueConvergenceCheckerFactory ( KNNClassificationModel ( KNNClassificationTrainer ( GDBModel ( GDBTrainer ( VectorUtils ( SimpleGDUpdateCalculator ( Path (java.nio.file.Path)3 Random (java.util.Random)3 KNNRegressionModel ( KNNRegressionTrainer (