Search in sources :

Example 6 with PartitionDataBuilder

use of org.apache.ignite.ml.dataset.PartitionDataBuilder in project ignite by apache.

the class LearningEnvironmentTest method testRandomNumbersGenerator.

/**
 * Test random number generator provided by  {@link LearningEnvironment}.
 * We test that:
 * 1. Correct random generator is returned for each partition.
 * 2. Its state is saved between compute calls (for this we do several iterations of compute).
 */
@Test
public void testRandomNumbersGenerator() {
    // We make such builders that provide as functions returning partition index * iteration as random number generator nextInt
    LearningEnvironmentBuilder envBuilder = TestUtils.testEnvBuilder().withRandomDependency(MockRandom::new);
    int partitions = 10;
    int iterations = 2;
    DatasetTrainer<IgniteModel<Object, Vector>, Void> trainer = new DatasetTrainer<IgniteModel<Object, Vector>, Void>() {

        /**
         * {@inheritDoc}
         */
        @Override
        public <K, V> IgniteModel<Object, Vector> fitWithInitializedDeployingContext(DatasetBuilder<K, V> datasetBuilder, Preprocessor<K, V> preprocessor) {
            Dataset<EmptyContext, TestUtils.DataWrapper<Integer>> ds = datasetBuilder.build(envBuilder, new EmptyContextBuilder<>(), (PartitionDataBuilder<K, V, EmptyContext, TestUtils.DataWrapper<Integer>>) (env, upstreamData, upstreamDataSize, ctx) -> TestUtils.DataWrapper.of(env.partition()), envBuilder.buildForTrainer());
            Vector v = null;
            for (int iter = 0; iter < iterations; iter++) {
                v = ds.compute((dw, env) -> VectorUtils.fill(-1, partitions).set(env.partition(), env.randomNumbersGenerator().nextInt()), (v1, v2) -> zipOverridingEmpty(v1, v2, -1));
            }
            return constantModel(v);
        }

        /**
         * {@inheritDoc}
         */
        @Override
        public boolean isUpdateable(IgniteModel<Object, Vector> mdl) {
            return false;
        }

        /**
         * {@inheritDoc}
         */
        @Override
        protected <K, V> IgniteModel<Object, Vector> updateModel(IgniteModel<Object, Vector> mdl, DatasetBuilder<K, V> datasetBuilder, Preprocessor<K, V> preprocessor) {
            return null;
        }
    };
    trainer.withEnvironmentBuilder(envBuilder);
    IgniteModel<Object, Vector> mdl = trainer.fit(getCacheMock(partitions), partitions, null);
    Vector exp = VectorUtils.zeroes(partitions);
    for (int i = 0; i < partitions; i++) exp.set(i, i * iterations);
    Vector res = mdl.predict(null);
    assertEquals(exp, res);
}
Also used : IntStream(java.util.stream.IntStream) TestUtils(org.apache.ignite.ml.TestUtils) Vector(org.apache.ignite.ml.math.primitives.vector.Vector) Preprocessor(org.apache.ignite.ml.preprocessing.Preprocessor) Random(java.util.Random) DatasetTrainer(org.apache.ignite.ml.trainers.DatasetTrainer) ParallelismStrategy(org.apache.ignite.ml.environment.parallelism.ParallelismStrategy) FeatureMeta(org.apache.ignite.ml.dataset.feature.FeatureMeta) RandomForestRegressionTrainer(org.apache.ignite.ml.tree.randomforest.RandomForestRegressionTrainer) Map(java.util.Map) EmptyContextBuilder(org.apache.ignite.ml.dataset.primitive.builder.context.EmptyContextBuilder) MLLogger(org.apache.ignite.ml.environment.logging.MLLogger) PartitionDataBuilder(org.apache.ignite.ml.dataset.PartitionDataBuilder) EmptyContext(org.apache.ignite.ml.dataset.primitive.context.EmptyContext) ConsoleLogger(org.apache.ignite.ml.environment.logging.ConsoleLogger) Test(org.junit.Test) FeaturesCountSelectionStrategies(org.apache.ignite.ml.tree.randomforest.data.FeaturesCountSelectionStrategies) IgniteModel(org.apache.ignite.ml.IgniteModel) DatasetBuilder(org.apache.ignite.ml.dataset.DatasetBuilder) Collectors(java.util.stream.Collectors) VectorUtils(org.apache.ignite.ml.math.primitives.vector.VectorUtils) Dataset(org.apache.ignite.ml.dataset.Dataset) DefaultParallelismStrategy(org.apache.ignite.ml.environment.parallelism.DefaultParallelismStrategy) TestUtils.constantModel(org.apache.ignite.ml.TestUtils.constantModel) Assert.assertEquals(org.junit.Assert.assertEquals) EmptyContext(org.apache.ignite.ml.dataset.primitive.context.EmptyContext) DatasetTrainer(org.apache.ignite.ml.trainers.DatasetTrainer) DatasetBuilder(org.apache.ignite.ml.dataset.DatasetBuilder) Preprocessor(org.apache.ignite.ml.preprocessing.Preprocessor) IgniteModel(org.apache.ignite.ml.IgniteModel) Vector(org.apache.ignite.ml.math.primitives.vector.Vector) Test(org.junit.Test)

Aggregations

PartitionDataBuilder (org.apache.ignite.ml.dataset.PartitionDataBuilder)6 DatasetBuilder (org.apache.ignite.ml.dataset.DatasetBuilder)5 Serializable (java.io.Serializable)4 Dataset (org.apache.ignite.ml.dataset.Dataset)4 EmptyContext (org.apache.ignite.ml.dataset.primitive.context.EmptyContext)4 Preprocessor (org.apache.ignite.ml.preprocessing.Preprocessor)4 Map (java.util.Map)3 Collectors (java.util.stream.Collectors)3 UpstreamEntry (org.apache.ignite.ml.dataset.UpstreamEntry)3 LearningEnvironmentBuilder (org.apache.ignite.ml.environment.LearningEnvironmentBuilder)3 IgniteFunction (org.apache.ignite.ml.math.functions.IgniteFunction)3 Vector (org.apache.ignite.ml.math.primitives.vector.Vector)3 ArrayList (java.util.ArrayList)2 Arrays (java.util.Arrays)2 Iterator (java.util.Iterator)2 List (java.util.List)2 Random (java.util.Random)2 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)2 IntStream (java.util.stream.IntStream)2 IgniteBiPredicate (org.apache.ignite.lang.IgniteBiPredicate)2