Search in sources :

Example 1 with LearningEnvironment

use of org.apache.ignite.ml.environment.LearningEnvironment in project ignite by apache.

the class KNNUtils method buildDataset.

/**
 * Builds dataset.
 *
 * @param envBuilder Learning environment builder.
 * @param datasetBuilder Dataset builder.
 * @param vectorizer Upstream vectorizer.
 * @return Dataset.
 */
@Nullable
public static <K, V, C extends Serializable> Dataset<EmptyContext, LabeledVectorSet<LabeledVector>> buildDataset(LearningEnvironmentBuilder envBuilder, DatasetBuilder<K, V> datasetBuilder, Preprocessor<K, V> vectorizer) {
    LearningEnvironment environment = envBuilder.buildForTrainer();
    environment.initDeployingContext(vectorizer);
    PartitionDataBuilder<K, V, EmptyContext, LabeledVectorSet<LabeledVector>> partDataBuilder = new LabeledDatasetPartitionDataBuilderOnHeap<>(vectorizer);
    Dataset<EmptyContext, LabeledVectorSet<LabeledVector>> dataset = null;
    if (datasetBuilder != null) {
        dataset = datasetBuilder.build(envBuilder, (env, upstream, upstreamSize) -> new EmptyContext(), partDataBuilder, environment);
    }
    return dataset;
}
Also used : LearningEnvironment(org.apache.ignite.ml.environment.LearningEnvironment) Nullable(org.jetbrains.annotations.Nullable) LabeledVector(org.apache.ignite.ml.structures.LabeledVector) LearningEnvironment(org.apache.ignite.ml.environment.LearningEnvironment) Dataset(org.apache.ignite.ml.dataset.Dataset) Preprocessor(org.apache.ignite.ml.preprocessing.Preprocessor) LabeledVectorSet(org.apache.ignite.ml.structures.LabeledVectorSet) DatasetBuilder(org.apache.ignite.ml.dataset.DatasetBuilder) PartitionDataBuilder(org.apache.ignite.ml.dataset.PartitionDataBuilder) LearningEnvironmentBuilder(org.apache.ignite.ml.environment.LearningEnvironmentBuilder) Serializable(java.io.Serializable) EmptyContext(org.apache.ignite.ml.dataset.primitive.context.EmptyContext) LabeledDatasetPartitionDataBuilderOnHeap(org.apache.ignite.ml.structures.partition.LabeledDatasetPartitionDataBuilderOnHeap) EmptyContext(org.apache.ignite.ml.dataset.primitive.context.EmptyContext) LabeledDatasetPartitionDataBuilderOnHeap(org.apache.ignite.ml.structures.partition.LabeledDatasetPartitionDataBuilderOnHeap) LabeledVectorSet(org.apache.ignite.ml.structures.LabeledVectorSet) Nullable(org.jetbrains.annotations.Nullable)

Example 2 with LearningEnvironment

use of org.apache.ignite.ml.environment.LearningEnvironment in project ignite by apache.

the class CacheBasedDataset method computeWithCtx.

/**
 * {@inheritDoc}
 */
@Override
public <R> R computeWithCtx(IgniteTriFunction<C, D, LearningEnvironment, R> map, IgniteBinaryOperator<R> reduce, R identity) {
    String upstreamCacheName = upstreamCache.getName();
    String datasetCacheName = datasetCache.getName();
    return computeForAllPartitions(part -> {
        LearningEnvironment env = ComputeUtils.getLearningEnvironment(ignite, datasetId, part, envBuilder);
        C ctx = ComputeUtils.getContext(Ignition.localIgnite(), datasetCacheName, part);
        D data = ComputeUtils.getData(Ignition.localIgnite(), upstreamCacheName, filter, upstreamTransformerBuilder, datasetCacheName, datasetId, partDataBuilder, env, upstreamKeepBinary);
        if (data != null) {
            R res = map.apply(ctx, data, env);
            // Saves partition context after update.
            ComputeUtils.saveContext(Ignition.localIgnite(), datasetCacheName, part, ctx);
            return res;
        }
        return null;
    }, reduce, identity);
}
Also used : LearningEnvironment(org.apache.ignite.ml.environment.LearningEnvironment) UUID(java.util.UUID)

Example 3 with LearningEnvironment

use of org.apache.ignite.ml.environment.LearningEnvironment in project ignite by apache.

the class DatasetFactory method create.

/**
 * Creates a new instance of distributed dataset using the specified {@code partCtxBuilder} and {@code
 * partDataBuilder}. This is the generic methods that allows to create any Ignite Cache based datasets with any
 * desired partition {@code context} and {@code data}.
 *
 * @param datasetBuilder Dataset builder.
 * @param partCtxBuilder Partition {@code context} builder.
 * @param partDataBuilder Partition {@code data} builder.
 * @param <K> Type of a key in {@code upstream} data.
 * @param <V> ype of a value in {@code upstream} data.
 * @param <C> Type of a partition {@code context}.
 * @param <D> Type of a partition {@code data}.
 * @return Dataset.
 */
public static <K, V, C extends Serializable, D extends AutoCloseable> Dataset<C, D> create(DatasetBuilder<K, V> datasetBuilder, PartitionContextBuilder<K, V, C> partCtxBuilder, PartitionDataBuilder<K, V, C, D> partDataBuilder) {
    LearningEnvironment environment = LearningEnvironmentBuilder.defaultBuilder().buildForTrainer();
    environment.deployingContext().initByClientObject(partDataBuilder);
    return datasetBuilder.build(LearningEnvironmentBuilder.defaultBuilder(), partCtxBuilder, partDataBuilder, environment);
}
Also used : LearningEnvironment(org.apache.ignite.ml.environment.LearningEnvironment)

Example 4 with LearningEnvironment

use of org.apache.ignite.ml.environment.LearningEnvironment in project ignite by apache.

the class DataStreamGeneratorTest method testAsDatasetBuilder.

/**
 */
@Test
public void testAsDatasetBuilder() throws Exception {
    AtomicInteger cntr = new AtomicInteger();
    DataStreamGenerator generator = new DataStreamGenerator() {

        @Override
        public Stream<LabeledVector<Double>> labeled() {
            return Stream.generate(() -> {
                int val = cntr.getAndIncrement();
                return new LabeledVector<>(VectorUtils.of(val), (double) val % 2);
            });
        }
    };
    int N = 100;
    cntr.set(0);
    DatasetBuilder<Vector, Double> b1 = generator.asDatasetBuilder(N, 2);
    cntr.set(0);
    DatasetBuilder<Vector, Double> b2 = generator.asDatasetBuilder(N, (v, l) -> l == 0, 2);
    cntr.set(0);
    DatasetBuilder<Vector, Double> b3 = generator.asDatasetBuilder(N, (v, l) -> l == 1, 2, new UpstreamTransformerBuilder() {

        @Override
        public UpstreamTransformer build(LearningEnvironment env) {
            return new UpstreamTransformerForTest();
        }
    });
    checkDataset(N, b1, v -> (Double) v.label() == 0 || (Double) v.label() == 1);
    checkDataset(N / 2, b2, v -> (Double) v.label() == 0);
    checkDataset(N / 2, b3, v -> (Double) v.label() < 0);
}
Also used : LearningEnvironment(org.apache.ignite.ml.environment.LearningEnvironment) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) UpstreamTransformer(org.apache.ignite.ml.dataset.UpstreamTransformer) LabeledVector(org.apache.ignite.ml.structures.LabeledVector) UpstreamTransformerBuilder(org.apache.ignite.ml.dataset.UpstreamTransformerBuilder) Vector(org.apache.ignite.ml.math.primitives.vector.Vector) LabeledVector(org.apache.ignite.ml.structures.LabeledVector) Test(org.junit.Test)

Example 5 with LearningEnvironment

use of org.apache.ignite.ml.environment.LearningEnvironment in project ignite by apache.

the class ComputeUtils method initContext.

/**
 * Initializes partition {@code context} by loading it from a partition {@code upstream}.
 * @param ignite Ignite instance.
 * @param upstreamCacheName Name of an {@code upstream} cache.
 * @param filter Filter for {@code upstream} data.
 * @param transformerBuilder Upstream transformer builder.
 * @param ctxBuilder Partition {@code context} builder.
 * @param envBuilder Environment builder.
 * @param isKeepBinary Support of binary objects.
 * @param deployingCtx Deploy context.
 * @param <K> Type of a key in {@code upstream} data.
 * @param <V> Type of a value in {@code upstream} data.
 * @param <C> Type of a partition {@code context}.
 */
public static <K, V, C extends Serializable> void initContext(Ignite ignite, String upstreamCacheName, UpstreamTransformerBuilder transformerBuilder, IgniteBiPredicate<K, V> filter, String datasetCacheName, PartitionContextBuilder<K, V, C> ctxBuilder, LearningEnvironmentBuilder envBuilder, int retries, int interval, boolean isKeepBinary, DeployingContext deployingCtx) {
    affinityCallWithRetries(ignite, Arrays.asList(datasetCacheName, upstreamCacheName), part -> {
        Ignite locIgnite = Ignition.localIgnite();
        LearningEnvironment env = envBuilder.buildForWorker(part);
        IgniteCache<K, V> locUpstreamCache = locIgnite.cache(upstreamCacheName);
        if (isKeepBinary)
            locUpstreamCache = locUpstreamCache.withKeepBinary();
        ScanQuery<K, V> qry = new ScanQuery<>();
        qry.setLocal(true);
        qry.setPartition(part);
        qry.setFilter(filter);
        C ctx;
        UpstreamTransformer transformer = transformerBuilder.build(env);
        UpstreamTransformer transformerCp = Utils.copy(transformer);
        long cnt = computeCount(locUpstreamCache, qry, transformer);
        try (QueryCursor<UpstreamEntry<K, V>> cursor = locUpstreamCache.query(qry, e -> new UpstreamEntry<>(e.getKey(), e.getValue()))) {
            Iterator<UpstreamEntry<K, V>> it = cursor.iterator();
            Stream<UpstreamEntry> transformedStream = transformerCp.transform(Utils.asStream(it, cnt).map(x -> (UpstreamEntry) x));
            it = Utils.asStream(transformedStream.iterator()).map(x -> (UpstreamEntry<K, V>) x).iterator();
            Iterator<UpstreamEntry<K, V>> iter = new IteratorWithConcurrentModificationChecker<>(it, cnt, "Cache expected to be not modified during dataset data building [partition=" + part + ']');
            ctx = ctxBuilder.build(env, iter, cnt);
        }
        IgniteCache<Integer, C> datasetCache = locIgnite.cache(datasetCacheName);
        datasetCache.put(part, ctx);
        return part;
    }, retries, interval, deployingCtx);
}
Also used : UpstreamTransformer(org.apache.ignite.ml.dataset.UpstreamTransformer) Arrays(java.util.Arrays) DeployingContext(org.apache.ignite.ml.environment.deploy.DeployingContext) IgniteBiPredicate(org.apache.ignite.lang.IgniteBiPredicate) IgniteFunction(org.apache.ignite.ml.math.functions.IgniteFunction) Affinity(org.apache.ignite.cache.affinity.Affinity) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ConcurrentMap(java.util.concurrent.ConcurrentMap) IgniteCallable(org.apache.ignite.lang.IgniteCallable) PartitionContextBuilder(org.apache.ignite.ml.dataset.PartitionContextBuilder) LearningEnvironment(org.apache.ignite.ml.environment.LearningEnvironment) Map(java.util.Map) PartitionDataBuilder(org.apache.ignite.ml.dataset.PartitionDataBuilder) UpstreamTransformerBuilder(org.apache.ignite.ml.dataset.UpstreamTransformerBuilder) LearningEnvironmentBuilder(org.apache.ignite.ml.environment.LearningEnvironmentBuilder) UpstreamEntry(org.apache.ignite.ml.dataset.UpstreamEntry) ClusterGroup(org.apache.ignite.cluster.ClusterGroup) IgniteFuture(org.apache.ignite.lang.IgniteFuture) Iterator(java.util.Iterator) Collection(java.util.Collection) IgniteException(org.apache.ignite.IgniteException) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) UUID(java.util.UUID) Ignite(org.apache.ignite.Ignite) IgniteCache(org.apache.ignite.IgniteCache) Serializable(java.io.Serializable) LockSupport(java.util.concurrent.locks.LockSupport) Stream(java.util.stream.Stream) Ignition(org.apache.ignite.Ignition) Utils(org.apache.ignite.ml.util.Utils) QueryCursor(org.apache.ignite.cache.query.QueryCursor) BitSet(java.util.BitSet) ScanQuery(org.apache.ignite.cache.query.ScanQuery) GridPeerDeployAware(org.apache.ignite.internal.util.lang.GridPeerDeployAware) ScanQuery(org.apache.ignite.cache.query.ScanQuery) LearningEnvironment(org.apache.ignite.ml.environment.LearningEnvironment) UpstreamTransformer(org.apache.ignite.ml.dataset.UpstreamTransformer) Ignite(org.apache.ignite.Ignite) UpstreamEntry(org.apache.ignite.ml.dataset.UpstreamEntry)

Aggregations

LearningEnvironment (org.apache.ignite.ml.environment.LearningEnvironment)18 LearningEnvironmentBuilder (org.apache.ignite.ml.environment.LearningEnvironmentBuilder)6 UUID (java.util.UUID)5 Serializable (java.io.Serializable)4 PartitionDataBuilder (org.apache.ignite.ml.dataset.PartitionDataBuilder)4 IgniteFunction (org.apache.ignite.ml.math.functions.IgniteFunction)4 ArrayList (java.util.ArrayList)3 Arrays (java.util.Arrays)3 Iterator (java.util.Iterator)3 Map (java.util.Map)3 Ignite (org.apache.ignite.Ignite)3 IgniteCache (org.apache.ignite.IgniteCache)3 Ignition (org.apache.ignite.Ignition)3 IgniteBiPredicate (org.apache.ignite.lang.IgniteBiPredicate)3 PartitionContextBuilder (org.apache.ignite.ml.dataset.PartitionContextBuilder)3 UpstreamTransformer (org.apache.ignite.ml.dataset.UpstreamTransformer)3 UpstreamTransformerBuilder (org.apache.ignite.ml.dataset.UpstreamTransformerBuilder)3 EmptyContext (org.apache.ignite.ml.dataset.primitive.context.EmptyContext)3 BitSet (java.util.BitSet)2 Collection (java.util.Collection)2