Search in sources :

Example 11 with DoubleArrayVectorizer

use of org.apache.ignite.ml.dataset.feature.extractor.impl.DoubleArrayVectorizer in project ignite by apache.

the class LinearRegressionLSQRTrainerTest method testBigDataFit.

/**
 * Tests {@code fit()} method on a big (100000 x 100) dataset.
 */
@Test
public void testBigDataFit() {
    Random rnd = new Random(0);
    Map<Integer, double[]> data = new HashMap<>();
    double[] coef = new double[100];
    double intercept = rnd.nextDouble() * 10;
    for (int i = 0; i < 100000; i++) {
        double[] x = new double[coef.length + 1];
        for (int j = 0; j < coef.length; j++) x[j] = rnd.nextDouble() * 10;
        x[coef.length] = intercept;
        data.put(i, x);
    }
    LinearRegressionLSQRTrainer trainer = new LinearRegressionLSQRTrainer();
    LinearRegressionModel mdl = trainer.fit(data, parts, new DoubleArrayVectorizer<Integer>().labeled(Vectorizer.LabelCoordinate.LAST));
    assertArrayEquals(coef, mdl.weights().getStorage().data(), 1e-6);
    assertEquals(intercept, mdl.intercept(), 1e-6);
}
Also used : DoubleArrayVectorizer(org.apache.ignite.ml.dataset.feature.extractor.impl.DoubleArrayVectorizer) Random(java.util.Random) HashMap(java.util.HashMap) TrainerTest(org.apache.ignite.ml.common.TrainerTest) Test(org.junit.Test)

Example 12 with DoubleArrayVectorizer

use of org.apache.ignite.ml.dataset.feature.extractor.impl.DoubleArrayVectorizer in project ignite by apache.

the class DecisionTreeClassificationTrainerIntegrationTest method testFit.

/**
 */
@Test
public void testFit() {
    int size = 100;
    CacheConfiguration<Integer, double[]> trainingSetCacheCfg = new CacheConfiguration<>();
    trainingSetCacheCfg.setAffinity(new RendezvousAffinityFunction(false, 10));
    trainingSetCacheCfg.setName("TRAINING_SET");
    IgniteCache<Integer, double[]> data = ignite.createCache(trainingSetCacheCfg);
    Random rnd = new Random(0);
    for (int i = 0; i < size; i++) {
        double x = rnd.nextDouble() - 0.5;
        data.put(i, new double[] { x, x > 0 ? 1 : 0 });
    }
    DecisionTreeClassificationTrainer trainer = new DecisionTreeClassificationTrainer(1, 0);
    DecisionTreeModel tree = trainer.fit(ignite, data, new DoubleArrayVectorizer<Integer>().labeled(1));
    DecisionTreeNode decisionTreeNode = tree.getRootNode();
    assertTrue(decisionTreeNode instanceof DecisionTreeConditionalNode);
    DecisionTreeConditionalNode node = (DecisionTreeConditionalNode) decisionTreeNode;
    assertEquals(0, node.getThreshold(), 1e-3);
    assertTrue(node.getThenNode() instanceof DecisionTreeLeafNode);
    assertTrue(node.getElseNode() instanceof DecisionTreeLeafNode);
    DecisionTreeLeafNode thenNode = (DecisionTreeLeafNode) node.getThenNode();
    DecisionTreeLeafNode elseNode = (DecisionTreeLeafNode) node.getElseNode();
    assertEquals(1, thenNode.getVal(), 1e-10);
    assertEquals(0, elseNode.getVal(), 1e-10);
}
Also used : DoubleArrayVectorizer(org.apache.ignite.ml.dataset.feature.extractor.impl.DoubleArrayVectorizer) Random(java.util.Random) RendezvousAffinityFunction(org.apache.ignite.cache.affinity.rendezvous.RendezvousAffinityFunction) CacheConfiguration(org.apache.ignite.configuration.CacheConfiguration) GridCommonAbstractTest(org.apache.ignite.testframework.junits.common.GridCommonAbstractTest) Test(org.junit.Test)

Example 13 with DoubleArrayVectorizer

use of org.apache.ignite.ml.dataset.feature.extractor.impl.DoubleArrayVectorizer in project ignite by apache.

the class GmmTrainerIntegrationTest method testFit.

/**
 */
@Test
public void testFit() {
    CacheConfiguration<Integer, double[]> trainingSetCacheCfg = new CacheConfiguration<>();
    trainingSetCacheCfg.setAffinity(new RendezvousAffinityFunction(false, 3));
    trainingSetCacheCfg.setName("TRAINING_SET");
    IgniteCache<Integer, double[]> data = ignite.createCache(trainingSetCacheCfg);
    data.put(0, new double[] { 1.0, 1.0, 1.0 });
    data.put(1, new double[] { 1.0, 2.0, 1.0 });
    data.put(2, new double[] { 2.0, 1.0, 1.0 });
    data.put(3, new double[] { -1.0, -1.0, 2.0 });
    data.put(4, new double[] { -1.0, -2.0, 2.0 });
    data.put(5, new double[] { -2.0, -1.0, 2.0 });
    GmmTrainer trainer = new GmmTrainer(2, 1).withInitialMeans(Arrays.asList(VectorUtils.of(1.0, 2.0), VectorUtils.of(-1.0, -2.0)));
    GmmModel model = trainer.fit(new CacheBasedDatasetBuilder<>(ignite, data), new DoubleArrayVectorizer<Integer>().labeled(Vectorizer.LabelCoordinate.LAST));
    Assert.assertEquals(2, model.countOfComponents());
    Assert.assertEquals(2, model.dimension());
    Assert.assertArrayEquals(new double[] { 1.33, 1.33 }, model.distributions().get(0).mean().asArray(), 1e-2);
    Assert.assertArrayEquals(new double[] { -1.33, -1.33 }, model.distributions().get(1).mean().asArray(), 1e-2);
}
Also used : DoubleArrayVectorizer(org.apache.ignite.ml.dataset.feature.extractor.impl.DoubleArrayVectorizer) RendezvousAffinityFunction(org.apache.ignite.cache.affinity.rendezvous.RendezvousAffinityFunction) CacheConfiguration(org.apache.ignite.configuration.CacheConfiguration) GridCommonAbstractTest(org.apache.ignite.testframework.junits.common.GridCommonAbstractTest) Test(org.junit.Test)

Example 14 with DoubleArrayVectorizer

use of org.apache.ignite.ml.dataset.feature.extractor.impl.DoubleArrayVectorizer in project ignite by apache.

the class CompoundNaiveBayesTest method testLearnsAndPredictCorrectly.

/**
 * Test.
 */
@Test
public void testLearnsAndPredictCorrectly() {
    CompoundNaiveBayesTrainer trainer = new CompoundNaiveBayesTrainer().withPriorProbabilities(classProbabilities).withGaussianNaiveBayesTrainer(new GaussianNaiveBayesTrainer()).withGaussianFeatureIdsToSkip(asList(3, 4, 5, 6, 7)).withDiscreteNaiveBayesTrainer(new DiscreteNaiveBayesTrainer().setBucketThresholds(binarizedDataThresholds)).withDiscreteFeatureIdsToSkip(asList(0, 1, 2));
    CompoundNaiveBayesModel mdl = trainer.fit(new LocalDatasetBuilder<>(data, 2), new DoubleArrayVectorizer<Integer>().labeled(Vectorizer.LabelCoordinate.LAST));
    Vector observation1 = VectorUtils.of(5.92, 165, 10, 1, 1, 0, 0, 0);
    assertEquals(LABEL_1, mdl.predict(observation1), PRECISION);
    Vector observation2 = VectorUtils.of(6, 130, 8, 1, 0, 1, 1, 0);
    assertEquals(LABEL_2, mdl.predict(observation2), PRECISION);
}
Also used : DoubleArrayVectorizer(org.apache.ignite.ml.dataset.feature.extractor.impl.DoubleArrayVectorizer) GaussianNaiveBayesTrainer(org.apache.ignite.ml.naivebayes.gaussian.GaussianNaiveBayesTrainer) DiscreteNaiveBayesTrainer(org.apache.ignite.ml.naivebayes.discrete.DiscreteNaiveBayesTrainer) Vector(org.apache.ignite.ml.math.primitives.vector.Vector) Test(org.junit.Test)

Example 15 with DoubleArrayVectorizer

use of org.apache.ignite.ml.dataset.feature.extractor.impl.DoubleArrayVectorizer in project ignite by apache.

the class DiscreteNaiveBayesTest method testLearnsAndPredictCorrently.

/**
 * Example from book Barber D. Bayesian reasoning and machine learning. Chapter 10.
 */
@Test
public void testLearnsAndPredictCorrently() {
    double english = 1.;
    double scottish = 2.;
    Map<Integer, double[]> data = new HashMap<>();
    data.put(0, new double[] { 0, 0, 1, 1, 1, english });
    data.put(1, new double[] { 1, 0, 1, 1, 0, english });
    data.put(2, new double[] { 1, 1, 0, 0, 1, english });
    data.put(3, new double[] { 1, 1, 0, 0, 0, english });
    data.put(4, new double[] { 0, 1, 0, 0, 1, english });
    data.put(5, new double[] { 0, 0, 0, 1, 0, english });
    data.put(6, new double[] { 1, 0, 0, 1, 1, scottish });
    data.put(7, new double[] { 1, 1, 0, 0, 1, scottish });
    data.put(8, new double[] { 1, 1, 1, 1, 0, scottish });
    data.put(9, new double[] { 1, 1, 0, 1, 0, scottish });
    data.put(10, new double[] { 1, 1, 0, 1, 1, scottish });
    data.put(11, new double[] { 1, 0, 1, 1, 0, scottish });
    data.put(12, new double[] { 1, 0, 1, 0, 0, scottish });
    double[][] thresholds = new double[][] { { .5 }, { .5 }, { .5 }, { .5 }, { .5 } };
    DiscreteNaiveBayesTrainer trainer = new DiscreteNaiveBayesTrainer().setBucketThresholds(thresholds);
    DiscreteNaiveBayesModel mdl = trainer.fit(new LocalDatasetBuilder<>(data, 2), new DoubleArrayVectorizer<Integer>().labeled(Vectorizer.LabelCoordinate.LAST));
    Vector observation = VectorUtils.of(1, 0, 1, 1, 0);
    Assert.assertEquals(scottish, mdl.predict(observation), PRECISION);
}
Also used : DoubleArrayVectorizer(org.apache.ignite.ml.dataset.feature.extractor.impl.DoubleArrayVectorizer) HashMap(java.util.HashMap) Vector(org.apache.ignite.ml.math.primitives.vector.Vector) Test(org.junit.Test)

Aggregations

DoubleArrayVectorizer (org.apache.ignite.ml.dataset.feature.extractor.impl.DoubleArrayVectorizer)30 Test (org.junit.Test)23 HashMap (java.util.HashMap)17 Vector (org.apache.ignite.ml.math.primitives.vector.Vector)14 TrainerTest (org.apache.ignite.ml.common.TrainerTest)11 EuclideanDistance (org.apache.ignite.ml.math.distances.EuclideanDistance)10 Ignite (org.apache.ignite.Ignite)5 RendezvousAffinityFunction (org.apache.ignite.cache.affinity.rendezvous.RendezvousAffinityFunction)5 CacheConfiguration (org.apache.ignite.configuration.CacheConfiguration)5 MeanAbsValueConvergenceCheckerFactory (org.apache.ignite.ml.composition.boosting.convergence.mean.MeanAbsValueConvergenceCheckerFactory)5 KNNClassificationModel (org.apache.ignite.ml.knn.classification.KNNClassificationModel)5 KNNClassificationTrainer (org.apache.ignite.ml.knn.classification.KNNClassificationTrainer)5 GDBModel (org.apache.ignite.ml.composition.boosting.GDBModel)4 GDBTrainer (org.apache.ignite.ml.composition.boosting.GDBTrainer)4 VectorUtils (org.apache.ignite.ml.math.primitives.vector.VectorUtils)4 SimpleGDUpdateCalculator (org.apache.ignite.ml.optimization.updatecalculators.SimpleGDUpdateCalculator)4 Path (java.nio.file.Path)3 Random (java.util.Random)3 KNNRegressionModel (org.apache.ignite.ml.knn.regression.KNNRegressionModel)3 KNNRegressionTrainer (org.apache.ignite.ml.knn.regression.KNNRegressionTrainer)3