Search in sources :

Example 1 with UpstreamTransformerBuilder

use of org.apache.ignite.ml.dataset.UpstreamTransformerBuilder in project ignite by apache.

the class DataStreamGeneratorTest method testAsDatasetBuilder.

/**
 */
@Test
public void testAsDatasetBuilder() throws Exception {
    AtomicInteger cntr = new AtomicInteger();
    DataStreamGenerator generator = new DataStreamGenerator() {

        @Override
        public Stream<LabeledVector<Double>> labeled() {
            return Stream.generate(() -> {
                int val = cntr.getAndIncrement();
                return new LabeledVector<>(VectorUtils.of(val), (double) val % 2);
            });
        }
    };
    int N = 100;
    cntr.set(0);
    DatasetBuilder<Vector, Double> b1 = generator.asDatasetBuilder(N, 2);
    cntr.set(0);
    DatasetBuilder<Vector, Double> b2 = generator.asDatasetBuilder(N, (v, l) -> l == 0, 2);
    cntr.set(0);
    DatasetBuilder<Vector, Double> b3 = generator.asDatasetBuilder(N, (v, l) -> l == 1, 2, new UpstreamTransformerBuilder() {

        @Override
        public UpstreamTransformer build(LearningEnvironment env) {
            return new UpstreamTransformerForTest();
        }
    });
    checkDataset(N, b1, v -> (Double) v.label() == 0 || (Double) v.label() == 1);
    checkDataset(N / 2, b2, v -> (Double) v.label() == 0);
    checkDataset(N / 2, b3, v -> (Double) v.label() < 0);
}
Also used : LearningEnvironment(org.apache.ignite.ml.environment.LearningEnvironment) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) UpstreamTransformer(org.apache.ignite.ml.dataset.UpstreamTransformer) LabeledVector(org.apache.ignite.ml.structures.LabeledVector) UpstreamTransformerBuilder(org.apache.ignite.ml.dataset.UpstreamTransformerBuilder) Vector(org.apache.ignite.ml.math.primitives.vector.Vector) LabeledVector(org.apache.ignite.ml.structures.LabeledVector) Test(org.junit.Test)

Example 2 with UpstreamTransformerBuilder

use of org.apache.ignite.ml.dataset.UpstreamTransformerBuilder in project ignite by apache.

the class ComputeUtils method initContext.

/**
 * Initializes partition {@code context} by loading it from a partition {@code upstream}.
 * @param ignite Ignite instance.
 * @param upstreamCacheName Name of an {@code upstream} cache.
 * @param filter Filter for {@code upstream} data.
 * @param transformerBuilder Upstream transformer builder.
 * @param ctxBuilder Partition {@code context} builder.
 * @param envBuilder Environment builder.
 * @param isKeepBinary Support of binary objects.
 * @param deployingCtx Deploy context.
 * @param <K> Type of a key in {@code upstream} data.
 * @param <V> Type of a value in {@code upstream} data.
 * @param <C> Type of a partition {@code context}.
 */
public static <K, V, C extends Serializable> void initContext(Ignite ignite, String upstreamCacheName, UpstreamTransformerBuilder transformerBuilder, IgniteBiPredicate<K, V> filter, String datasetCacheName, PartitionContextBuilder<K, V, C> ctxBuilder, LearningEnvironmentBuilder envBuilder, int retries, int interval, boolean isKeepBinary, DeployingContext deployingCtx) {
    affinityCallWithRetries(ignite, Arrays.asList(datasetCacheName, upstreamCacheName), part -> {
        Ignite locIgnite = Ignition.localIgnite();
        LearningEnvironment env = envBuilder.buildForWorker(part);
        IgniteCache<K, V> locUpstreamCache = locIgnite.cache(upstreamCacheName);
        if (isKeepBinary)
            locUpstreamCache = locUpstreamCache.withKeepBinary();
        ScanQuery<K, V> qry = new ScanQuery<>();
        qry.setLocal(true);
        qry.setPartition(part);
        qry.setFilter(filter);
        C ctx;
        UpstreamTransformer transformer = transformerBuilder.build(env);
        UpstreamTransformer transformerCp = Utils.copy(transformer);
        long cnt = computeCount(locUpstreamCache, qry, transformer);
        try (QueryCursor<UpstreamEntry<K, V>> cursor = locUpstreamCache.query(qry, e -> new UpstreamEntry<>(e.getKey(), e.getValue()))) {
            Iterator<UpstreamEntry<K, V>> it = cursor.iterator();
            Stream<UpstreamEntry> transformedStream = transformerCp.transform(Utils.asStream(it, cnt).map(x -> (UpstreamEntry) x));
            it = Utils.asStream(transformedStream.iterator()).map(x -> (UpstreamEntry<K, V>) x).iterator();
            Iterator<UpstreamEntry<K, V>> iter = new IteratorWithConcurrentModificationChecker<>(it, cnt, "Cache expected to be not modified during dataset data building [partition=" + part + ']');
            ctx = ctxBuilder.build(env, iter, cnt);
        }
        IgniteCache<Integer, C> datasetCache = locIgnite.cache(datasetCacheName);
        datasetCache.put(part, ctx);
        return part;
    }, retries, interval, deployingCtx);
}
Also used : UpstreamTransformer(org.apache.ignite.ml.dataset.UpstreamTransformer) Arrays(java.util.Arrays) DeployingContext(org.apache.ignite.ml.environment.deploy.DeployingContext) IgniteBiPredicate(org.apache.ignite.lang.IgniteBiPredicate) IgniteFunction(org.apache.ignite.ml.math.functions.IgniteFunction) Affinity(org.apache.ignite.cache.affinity.Affinity) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ConcurrentMap(java.util.concurrent.ConcurrentMap) IgniteCallable(org.apache.ignite.lang.IgniteCallable) PartitionContextBuilder(org.apache.ignite.ml.dataset.PartitionContextBuilder) LearningEnvironment(org.apache.ignite.ml.environment.LearningEnvironment) Map(java.util.Map) PartitionDataBuilder(org.apache.ignite.ml.dataset.PartitionDataBuilder) UpstreamTransformerBuilder(org.apache.ignite.ml.dataset.UpstreamTransformerBuilder) LearningEnvironmentBuilder(org.apache.ignite.ml.environment.LearningEnvironmentBuilder) UpstreamEntry(org.apache.ignite.ml.dataset.UpstreamEntry) ClusterGroup(org.apache.ignite.cluster.ClusterGroup) IgniteFuture(org.apache.ignite.lang.IgniteFuture) Iterator(java.util.Iterator) Collection(java.util.Collection) IgniteException(org.apache.ignite.IgniteException) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) UUID(java.util.UUID) Ignite(org.apache.ignite.Ignite) IgniteCache(org.apache.ignite.IgniteCache) Serializable(java.io.Serializable) LockSupport(java.util.concurrent.locks.LockSupport) Stream(java.util.stream.Stream) Ignition(org.apache.ignite.Ignition) Utils(org.apache.ignite.ml.util.Utils) QueryCursor(org.apache.ignite.cache.query.QueryCursor) BitSet(java.util.BitSet) ScanQuery(org.apache.ignite.cache.query.ScanQuery) GridPeerDeployAware(org.apache.ignite.internal.util.lang.GridPeerDeployAware) ScanQuery(org.apache.ignite.cache.query.ScanQuery) LearningEnvironment(org.apache.ignite.ml.environment.LearningEnvironment) UpstreamTransformer(org.apache.ignite.ml.dataset.UpstreamTransformer) Ignite(org.apache.ignite.Ignite) UpstreamEntry(org.apache.ignite.ml.dataset.UpstreamEntry)

Example 3 with UpstreamTransformerBuilder

use of org.apache.ignite.ml.dataset.UpstreamTransformerBuilder in project ignite by apache.

the class ComputeUtils method getData.

/**
 * Extracts partition {@code data} from the local storage, if it's not found in local storage recovers this {@code
 * data} from a partition {@code upstream} and {@code context}. Be aware that this method should be called from
 * the node where partition is placed.
 *
 * @param ignite Ignite instance.
 * @param upstreamCacheName Name of an {@code upstream} cache.
 * @param filter Filter for {@code upstream} data.
 * @param transformerBuilder Builder of upstream transformers.
 * @param datasetCacheName Name of a partition {@code context} cache.
 * @param datasetId Dataset ID.
 * @param partDataBuilder Partition data builder.
 * @param env Learning environment.
 * @param <K> Type of a key in {@code upstream} data.
 * @param <V> Type of a value in {@code upstream} data.
 * @param <C> Type of a partition {@code context}.
 * @param <D> Type of a partition {@code data}.
 * @return Partition {@code data}.
 */
public static <K, V, C extends Serializable, D extends AutoCloseable> D getData(Ignite ignite, String upstreamCacheName, IgniteBiPredicate<K, V> filter, UpstreamTransformerBuilder transformerBuilder, String datasetCacheName, UUID datasetId, PartitionDataBuilder<K, V, C, D> partDataBuilder, LearningEnvironment env, boolean isKeepBinary) {
    PartitionDataStorage dataStorage = (PartitionDataStorage) ignite.cluster().nodeLocalMap().computeIfAbsent(String.format(DATA_STORAGE_KEY_TEMPLATE, datasetId), key -> new PartitionDataStorage());
    final int part = env.partition();
    return dataStorage.computeDataIfAbsent(part, () -> {
        IgniteCache<Integer, C> learningCtxCache = ignite.cache(datasetCacheName);
        C ctx = learningCtxCache.get(part);
        IgniteCache<K, V> upstreamCache = ignite.cache(upstreamCacheName);
        if (isKeepBinary)
            upstreamCache = upstreamCache.withKeepBinary();
        ScanQuery<K, V> qry = new ScanQuery<>();
        qry.setLocal(true);
        qry.setPartition(part);
        qry.setFilter(filter);
        UpstreamTransformer transformer = transformerBuilder.build(env);
        UpstreamTransformer transformerCp = Utils.copy(transformer);
        long cnt = computeCount(upstreamCache, qry, transformer);
        if (cnt > 0) {
            try (QueryCursor<UpstreamEntry<K, V>> cursor = upstreamCache.query(qry, e -> new UpstreamEntry<>(e.getKey(), e.getValue()))) {
                Iterator<UpstreamEntry<K, V>> it = cursor.iterator();
                Stream<UpstreamEntry> transformedStream = transformerCp.transform(Utils.asStream(it, cnt).map(x -> (UpstreamEntry) x));
                it = Utils.asStream(transformedStream.iterator()).map(x -> (UpstreamEntry<K, V>) x).iterator();
                Iterator<UpstreamEntry<K, V>> iter = new IteratorWithConcurrentModificationChecker<>(it, cnt, "Cache expected to be not modified during dataset data building [partition=" + part + ']');
                return partDataBuilder.build(env, iter, cnt, ctx);
            }
        }
        return null;
    });
}
Also used : UpstreamTransformer(org.apache.ignite.ml.dataset.UpstreamTransformer) Arrays(java.util.Arrays) DeployingContext(org.apache.ignite.ml.environment.deploy.DeployingContext) IgniteBiPredicate(org.apache.ignite.lang.IgniteBiPredicate) IgniteFunction(org.apache.ignite.ml.math.functions.IgniteFunction) Affinity(org.apache.ignite.cache.affinity.Affinity) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ConcurrentMap(java.util.concurrent.ConcurrentMap) IgniteCallable(org.apache.ignite.lang.IgniteCallable) PartitionContextBuilder(org.apache.ignite.ml.dataset.PartitionContextBuilder) LearningEnvironment(org.apache.ignite.ml.environment.LearningEnvironment) Map(java.util.Map) PartitionDataBuilder(org.apache.ignite.ml.dataset.PartitionDataBuilder) UpstreamTransformerBuilder(org.apache.ignite.ml.dataset.UpstreamTransformerBuilder) LearningEnvironmentBuilder(org.apache.ignite.ml.environment.LearningEnvironmentBuilder) UpstreamEntry(org.apache.ignite.ml.dataset.UpstreamEntry) ClusterGroup(org.apache.ignite.cluster.ClusterGroup) IgniteFuture(org.apache.ignite.lang.IgniteFuture) Iterator(java.util.Iterator) Collection(java.util.Collection) IgniteException(org.apache.ignite.IgniteException) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) UUID(java.util.UUID) Ignite(org.apache.ignite.Ignite) IgniteCache(org.apache.ignite.IgniteCache) Serializable(java.io.Serializable) LockSupport(java.util.concurrent.locks.LockSupport) Stream(java.util.stream.Stream) Ignition(org.apache.ignite.Ignition) Utils(org.apache.ignite.ml.util.Utils) QueryCursor(org.apache.ignite.cache.query.QueryCursor) BitSet(java.util.BitSet) ScanQuery(org.apache.ignite.cache.query.ScanQuery) GridPeerDeployAware(org.apache.ignite.internal.util.lang.GridPeerDeployAware) ScanQuery(org.apache.ignite.cache.query.ScanQuery) UpstreamTransformer(org.apache.ignite.ml.dataset.UpstreamTransformer) UpstreamEntry(org.apache.ignite.ml.dataset.UpstreamEntry)

Aggregations

UpstreamTransformer (org.apache.ignite.ml.dataset.UpstreamTransformer)3 UpstreamTransformerBuilder (org.apache.ignite.ml.dataset.UpstreamTransformerBuilder)3 LearningEnvironment (org.apache.ignite.ml.environment.LearningEnvironment)3 Serializable (java.io.Serializable)2 ArrayList (java.util.ArrayList)2 Arrays (java.util.Arrays)2 BitSet (java.util.BitSet)2 Collection (java.util.Collection)2 HashMap (java.util.HashMap)2 Iterator (java.util.Iterator)2 Map (java.util.Map)2 UUID (java.util.UUID)2 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)2 ConcurrentMap (java.util.concurrent.ConcurrentMap)2 LockSupport (java.util.concurrent.locks.LockSupport)2 Stream (java.util.stream.Stream)2 Ignite (org.apache.ignite.Ignite)2 IgniteCache (org.apache.ignite.IgniteCache)2 IgniteException (org.apache.ignite.IgniteException)2 Ignition (org.apache.ignite.Ignition)2