Search in sources :

Example 21 with DoubleArrayVectorizer

use of org.apache.ignite.ml.dataset.feature.extractor.impl.DoubleArrayVectorizer in project ignite by apache.

the class KMeansTrainerTest method findOneClusters.

/**
 * A few points, one cluster, one iteration
 */
@Test
public void findOneClusters() {
    KMeansTrainer trainer = createAndCheckTrainer();
    KMeansModel knnMdl = trainer.withAmountOfClusters(1).fit(new LocalDatasetBuilder<>(data, parts), new DoubleArrayVectorizer<Integer>().labeled(Vectorizer.LabelCoordinate.LAST));
    Vector firstVector = new DenseVector(new double[] { 2.0, 2.0 });
    assertEquals(knnMdl.predict(firstVector), 0.0, PRECISION);
    Vector secondVector = new DenseVector(new double[] { -2.0, -2.0 });
    assertEquals(knnMdl.predict(secondVector), 0.0, PRECISION);
    assertEquals(trainer.getMaxIterations(), 1);
    assertEquals(trainer.getEpsilon(), PRECISION, PRECISION);
}
Also used : KMeansModel(org.apache.ignite.ml.clustering.kmeans.KMeansModel) DoubleArrayVectorizer(org.apache.ignite.ml.dataset.feature.extractor.impl.DoubleArrayVectorizer) KMeansTrainer(org.apache.ignite.ml.clustering.kmeans.KMeansTrainer) Vector(org.apache.ignite.ml.math.primitives.vector.Vector) DenseVector(org.apache.ignite.ml.math.primitives.vector.impl.DenseVector) DenseVector(org.apache.ignite.ml.math.primitives.vector.impl.DenseVector) TrainerTest(org.apache.ignite.ml.common.TrainerTest) Test(org.junit.Test)

Example 22 with DoubleArrayVectorizer

use of org.apache.ignite.ml.dataset.feature.extractor.impl.DoubleArrayVectorizer in project ignite by apache.

the class GaussianNaiveBayesTest method scikitLearnExample.

/**
 * Dataset from Gaussian NB example in the scikit-learn documentation
 */
@Test
public void scikitLearnExample() {
    Map<Integer, double[]> data = new HashMap<>();
    double one = 1.;
    double two = 2.;
    data.put(0, new double[] { one, -1, 1 });
    data.put(2, new double[] { one, -2, -1 });
    data.put(3, new double[] { one, -3, -2 });
    data.put(4, new double[] { two, 1, 1 });
    data.put(5, new double[] { two, 2, 1 });
    data.put(6, new double[] { two, 3, 2 });
    GaussianNaiveBayesTrainer trainer = new GaussianNaiveBayesTrainer();
    GaussianNaiveBayesModel model = trainer.fit(new LocalDatasetBuilder<>(data, 2), new DoubleArrayVectorizer<Integer>().labeled(Vectorizer.LabelCoordinate.FIRST));
    Vector observation = VectorUtils.of(-0.8, -1);
    Assert.assertEquals(one, model.predict(observation), PRECISION);
}
Also used : DoubleArrayVectorizer(org.apache.ignite.ml.dataset.feature.extractor.impl.DoubleArrayVectorizer) HashMap(java.util.HashMap) Vector(org.apache.ignite.ml.math.primitives.vector.Vector) Test(org.junit.Test)

Example 23 with DoubleArrayVectorizer

use of org.apache.ignite.ml.dataset.feature.extractor.impl.DoubleArrayVectorizer in project ignite by apache.

the class GaussianNaiveBayesTest method wikipediaSexClassificationDataset.

/**
 * An example data set from wikipedia article about Naive Bayes https://en.wikipedia.org/wiki/Naive_Bayes_classifier#Sex_classification
 */
@Test
public void wikipediaSexClassificationDataset() {
    Map<Integer, double[]> data = new HashMap<>();
    double male = 0.;
    double female = 1.;
    data.put(0, new double[] { male, 6, 180, 12 });
    data.put(2, new double[] { male, 5.92, 190, 11 });
    data.put(3, new double[] { male, 5.58, 170, 12 });
    data.put(4, new double[] { male, 5.92, 165, 10 });
    data.put(5, new double[] { female, 5, 100, 6 });
    data.put(6, new double[] { female, 5.5, 150, 8 });
    data.put(7, new double[] { female, 5.42, 130, 7 });
    data.put(8, new double[] { female, 5.75, 150, 9 });
    GaussianNaiveBayesTrainer trainer = new GaussianNaiveBayesTrainer();
    GaussianNaiveBayesModel model = trainer.fit(new LocalDatasetBuilder<>(data, 2), new DoubleArrayVectorizer<Integer>().labeled(Vectorizer.LabelCoordinate.FIRST));
    Vector observation = VectorUtils.of(6, 130, 8);
    Assert.assertEquals(female, model.predict(observation), PRECISION);
}
Also used : DoubleArrayVectorizer(org.apache.ignite.ml.dataset.feature.extractor.impl.DoubleArrayVectorizer) HashMap(java.util.HashMap) Vector(org.apache.ignite.ml.math.primitives.vector.Vector) Test(org.junit.Test)

Example 24 with DoubleArrayVectorizer

use of org.apache.ignite.ml.dataset.feature.extractor.impl.DoubleArrayVectorizer in project ignite by apache.

the class LogisticRegressionSGDTrainerTest method testUpdate.

/**
 */
@Test
public void testUpdate() {
    Map<Integer, double[]> cacheMock = new HashMap<>();
    for (int i = 0; i < twoLinearlySeparableClasses.length; i++) cacheMock.put(i, twoLinearlySeparableClasses[i]);
    LogisticRegressionSGDTrainer trainer = new LogisticRegressionSGDTrainer().withUpdatesStgy(new UpdatesStrategy<>(new SimpleGDUpdateCalculator(0.2), SimpleGDParameterUpdate.SUM_LOCAL, SimpleGDParameterUpdate.AVG)).withMaxIterations(100000).withLocIterations(100).withBatchSize(10).withSeed(123L);
    LogisticRegressionModel originalMdl = trainer.fit(cacheMock, parts, new DoubleArrayVectorizer<Integer>().labeled(0));
    Vectorizer<Integer, double[], Integer, Double> vectorizer = new DoubleArrayVectorizer<Integer>().labeled(Vectorizer.LabelCoordinate.FIRST);
    LogisticRegressionModel updatedOnSameDS = trainer.update(originalMdl, cacheMock, parts, vectorizer);
    LogisticRegressionModel updatedOnEmptyDS = trainer.update(originalMdl, new HashMap<>(), parts, vectorizer);
    Vector v1 = VectorUtils.of(100, 10);
    Vector v2 = VectorUtils.of(10, 100);
    TestUtils.assertEquals(originalMdl.predict(v1), updatedOnSameDS.predict(v1), PRECISION);
    TestUtils.assertEquals(originalMdl.predict(v2), updatedOnSameDS.predict(v2), PRECISION);
    TestUtils.assertEquals(originalMdl.predict(v2), updatedOnEmptyDS.predict(v2), PRECISION);
    TestUtils.assertEquals(originalMdl.predict(v1), updatedOnEmptyDS.predict(v1), PRECISION);
}
Also used : DoubleArrayVectorizer(org.apache.ignite.ml.dataset.feature.extractor.impl.DoubleArrayVectorizer) HashMap(java.util.HashMap) SimpleGDUpdateCalculator(org.apache.ignite.ml.optimization.updatecalculators.SimpleGDUpdateCalculator) Vector(org.apache.ignite.ml.math.primitives.vector.Vector) TrainerTest(org.apache.ignite.ml.common.TrainerTest) Test(org.junit.Test)

Example 25 with DoubleArrayVectorizer

use of org.apache.ignite.ml.dataset.feature.extractor.impl.DoubleArrayVectorizer in project ignite by apache.

the class OneVsRestTrainerTest method testTrainWithTheLinearlySeparableCase.

/**
 * Test trainer on 2 linearly separable sets.
 */
@Test
public void testTrainWithTheLinearlySeparableCase() {
    Map<Integer, double[]> cacheMock = new HashMap<>();
    for (int i = 0; i < twoLinearlySeparableClasses.length; i++) cacheMock.put(i, twoLinearlySeparableClasses[i]);
    LogisticRegressionSGDTrainer binaryTrainer = new LogisticRegressionSGDTrainer().withUpdatesStgy(new UpdatesStrategy<>(new SimpleGDUpdateCalculator(0.2), SimpleGDParameterUpdate.SUM_LOCAL, SimpleGDParameterUpdate.AVG)).withMaxIterations(1000).withLocIterations(10).withBatchSize(100).withSeed(123L);
    OneVsRestTrainer<LogisticRegressionModel> trainer = new OneVsRestTrainer<>(binaryTrainer);
    MultiClassModel mdl = trainer.fit(cacheMock, parts, new DoubleArrayVectorizer<Integer>().labeled(Vectorizer.LabelCoordinate.FIRST));
    Assert.assertTrue(!mdl.toString().isEmpty());
    Assert.assertTrue(!mdl.toString(true).isEmpty());
    Assert.assertTrue(!mdl.toString(false).isEmpty());
    TestUtils.assertEquals(1, mdl.predict(VectorUtils.of(-100, 0)), PRECISION);
    TestUtils.assertEquals(0, mdl.predict(VectorUtils.of(100, 0)), PRECISION);
}
Also used : LogisticRegressionSGDTrainer(org.apache.ignite.ml.regressions.logistic.LogisticRegressionSGDTrainer) DoubleArrayVectorizer(org.apache.ignite.ml.dataset.feature.extractor.impl.DoubleArrayVectorizer) HashMap(java.util.HashMap) SimpleGDUpdateCalculator(org.apache.ignite.ml.optimization.updatecalculators.SimpleGDUpdateCalculator) LogisticRegressionModel(org.apache.ignite.ml.regressions.logistic.LogisticRegressionModel) TrainerTest(org.apache.ignite.ml.common.TrainerTest) Test(org.junit.Test)

Aggregations

DoubleArrayVectorizer (org.apache.ignite.ml.dataset.feature.extractor.impl.DoubleArrayVectorizer)30 Test (org.junit.Test)23 HashMap (java.util.HashMap)17 Vector (org.apache.ignite.ml.math.primitives.vector.Vector)14 TrainerTest (org.apache.ignite.ml.common.TrainerTest)11 EuclideanDistance (org.apache.ignite.ml.math.distances.EuclideanDistance)10 Ignite (org.apache.ignite.Ignite)5 RendezvousAffinityFunction (org.apache.ignite.cache.affinity.rendezvous.RendezvousAffinityFunction)5 CacheConfiguration (org.apache.ignite.configuration.CacheConfiguration)5 MeanAbsValueConvergenceCheckerFactory (org.apache.ignite.ml.composition.boosting.convergence.mean.MeanAbsValueConvergenceCheckerFactory)5 KNNClassificationModel (org.apache.ignite.ml.knn.classification.KNNClassificationModel)5 KNNClassificationTrainer (org.apache.ignite.ml.knn.classification.KNNClassificationTrainer)5 GDBModel (org.apache.ignite.ml.composition.boosting.GDBModel)4 GDBTrainer (org.apache.ignite.ml.composition.boosting.GDBTrainer)4 VectorUtils (org.apache.ignite.ml.math.primitives.vector.VectorUtils)4 SimpleGDUpdateCalculator (org.apache.ignite.ml.optimization.updatecalculators.SimpleGDUpdateCalculator)4 Path (java.nio.file.Path)3 Random (java.util.Random)3 KNNRegressionModel (org.apache.ignite.ml.knn.regression.KNNRegressionModel)3 KNNRegressionTrainer (org.apache.ignite.ml.knn.regression.KNNRegressionTrainer)3