use of org.apache.ignite.ml.dataset.feature.extractor.impl.DoubleArrayVectorizer in project ignite by apache.
the class LinearRegressionLSQRTrainerTest method testBigDataFit.
/**
* Tests {@code fit()} method on a big (100000 x 100) dataset.
*/
@Test
public void testBigDataFit() {
Random rnd = new Random(0);
Map<Integer, double[]> data = new HashMap<>();
double[] coef = new double[100];
double intercept = rnd.nextDouble() * 10;
for (int i = 0; i < 100000; i++) {
double[] x = new double[coef.length + 1];
for (int j = 0; j < coef.length; j++) x[j] = rnd.nextDouble() * 10;
x[coef.length] = intercept;
data.put(i, x);
}
LinearRegressionLSQRTrainer trainer = new LinearRegressionLSQRTrainer();
LinearRegressionModel mdl = trainer.fit(data, parts, new DoubleArrayVectorizer<Integer>().labeled(Vectorizer.LabelCoordinate.LAST));
assertArrayEquals(coef, mdl.weights().getStorage().data(), 1e-6);
assertEquals(intercept, mdl.intercept(), 1e-6);
}
use of org.apache.ignite.ml.dataset.feature.extractor.impl.DoubleArrayVectorizer in project ignite by apache.
the class DecisionTreeClassificationTrainerIntegrationTest method testFit.
/**
*/
@Test
public void testFit() {
int size = 100;
CacheConfiguration<Integer, double[]> trainingSetCacheCfg = new CacheConfiguration<>();
trainingSetCacheCfg.setAffinity(new RendezvousAffinityFunction(false, 10));
trainingSetCacheCfg.setName("TRAINING_SET");
IgniteCache<Integer, double[]> data = ignite.createCache(trainingSetCacheCfg);
Random rnd = new Random(0);
for (int i = 0; i < size; i++) {
double x = rnd.nextDouble() - 0.5;
data.put(i, new double[] { x, x > 0 ? 1 : 0 });
}
DecisionTreeClassificationTrainer trainer = new DecisionTreeClassificationTrainer(1, 0);
DecisionTreeModel tree = trainer.fit(ignite, data, new DoubleArrayVectorizer<Integer>().labeled(1));
DecisionTreeNode decisionTreeNode = tree.getRootNode();
assertTrue(decisionTreeNode instanceof DecisionTreeConditionalNode);
DecisionTreeConditionalNode node = (DecisionTreeConditionalNode) decisionTreeNode;
assertEquals(0, node.getThreshold(), 1e-3);
assertTrue(node.getThenNode() instanceof DecisionTreeLeafNode);
assertTrue(node.getElseNode() instanceof DecisionTreeLeafNode);
DecisionTreeLeafNode thenNode = (DecisionTreeLeafNode) node.getThenNode();
DecisionTreeLeafNode elseNode = (DecisionTreeLeafNode) node.getElseNode();
assertEquals(1, thenNode.getVal(), 1e-10);
assertEquals(0, elseNode.getVal(), 1e-10);
}
use of org.apache.ignite.ml.dataset.feature.extractor.impl.DoubleArrayVectorizer in project ignite by apache.
the class GmmTrainerIntegrationTest method testFit.
/**
*/
@Test
public void testFit() {
CacheConfiguration<Integer, double[]> trainingSetCacheCfg = new CacheConfiguration<>();
trainingSetCacheCfg.setAffinity(new RendezvousAffinityFunction(false, 3));
trainingSetCacheCfg.setName("TRAINING_SET");
IgniteCache<Integer, double[]> data = ignite.createCache(trainingSetCacheCfg);
data.put(0, new double[] { 1.0, 1.0, 1.0 });
data.put(1, new double[] { 1.0, 2.0, 1.0 });
data.put(2, new double[] { 2.0, 1.0, 1.0 });
data.put(3, new double[] { -1.0, -1.0, 2.0 });
data.put(4, new double[] { -1.0, -2.0, 2.0 });
data.put(5, new double[] { -2.0, -1.0, 2.0 });
GmmTrainer trainer = new GmmTrainer(2, 1).withInitialMeans(Arrays.asList(VectorUtils.of(1.0, 2.0), VectorUtils.of(-1.0, -2.0)));
GmmModel model = trainer.fit(new CacheBasedDatasetBuilder<>(ignite, data), new DoubleArrayVectorizer<Integer>().labeled(Vectorizer.LabelCoordinate.LAST));
Assert.assertEquals(2, model.countOfComponents());
Assert.assertEquals(2, model.dimension());
Assert.assertArrayEquals(new double[] { 1.33, 1.33 }, model.distributions().get(0).mean().asArray(), 1e-2);
Assert.assertArrayEquals(new double[] { -1.33, -1.33 }, model.distributions().get(1).mean().asArray(), 1e-2);
}
use of org.apache.ignite.ml.dataset.feature.extractor.impl.DoubleArrayVectorizer in project ignite by apache.
the class CompoundNaiveBayesTest method testLearnsAndPredictCorrectly.
/**
* Test.
*/
@Test
public void testLearnsAndPredictCorrectly() {
CompoundNaiveBayesTrainer trainer = new CompoundNaiveBayesTrainer().withPriorProbabilities(classProbabilities).withGaussianNaiveBayesTrainer(new GaussianNaiveBayesTrainer()).withGaussianFeatureIdsToSkip(asList(3, 4, 5, 6, 7)).withDiscreteNaiveBayesTrainer(new DiscreteNaiveBayesTrainer().setBucketThresholds(binarizedDataThresholds)).withDiscreteFeatureIdsToSkip(asList(0, 1, 2));
CompoundNaiveBayesModel mdl = trainer.fit(new LocalDatasetBuilder<>(data, 2), new DoubleArrayVectorizer<Integer>().labeled(Vectorizer.LabelCoordinate.LAST));
Vector observation1 = VectorUtils.of(5.92, 165, 10, 1, 1, 0, 0, 0);
assertEquals(LABEL_1, mdl.predict(observation1), PRECISION);
Vector observation2 = VectorUtils.of(6, 130, 8, 1, 0, 1, 1, 0);
assertEquals(LABEL_2, mdl.predict(observation2), PRECISION);
}
use of org.apache.ignite.ml.dataset.feature.extractor.impl.DoubleArrayVectorizer in project ignite by apache.
the class DiscreteNaiveBayesTest method testLearnsAndPredictCorrently.
/**
* Example from book Barber D. Bayesian reasoning and machine learning. Chapter 10.
*/
@Test
public void testLearnsAndPredictCorrently() {
double english = 1.;
double scottish = 2.;
Map<Integer, double[]> data = new HashMap<>();
data.put(0, new double[] { 0, 0, 1, 1, 1, english });
data.put(1, new double[] { 1, 0, 1, 1, 0, english });
data.put(2, new double[] { 1, 1, 0, 0, 1, english });
data.put(3, new double[] { 1, 1, 0, 0, 0, english });
data.put(4, new double[] { 0, 1, 0, 0, 1, english });
data.put(5, new double[] { 0, 0, 0, 1, 0, english });
data.put(6, new double[] { 1, 0, 0, 1, 1, scottish });
data.put(7, new double[] { 1, 1, 0, 0, 1, scottish });
data.put(8, new double[] { 1, 1, 1, 1, 0, scottish });
data.put(9, new double[] { 1, 1, 0, 1, 0, scottish });
data.put(10, new double[] { 1, 1, 0, 1, 1, scottish });
data.put(11, new double[] { 1, 0, 1, 1, 0, scottish });
data.put(12, new double[] { 1, 0, 1, 0, 0, scottish });
double[][] thresholds = new double[][] { { .5 }, { .5 }, { .5 }, { .5 }, { .5 } };
DiscreteNaiveBayesTrainer trainer = new DiscreteNaiveBayesTrainer().setBucketThresholds(thresholds);
DiscreteNaiveBayesModel mdl = trainer.fit(new LocalDatasetBuilder<>(data, 2), new DoubleArrayVectorizer<Integer>().labeled(Vectorizer.LabelCoordinate.LAST));
Vector observation = VectorUtils.of(1, 0, 1, 1, 0);
Assert.assertEquals(scottish, mdl.predict(observation), PRECISION);
}
Aggregations