use of org.apache.ignite.ml.environment.LearningEnvironment in project ignite by apache.
the class KNNUtils method buildDataset.
/**
* Builds dataset.
*
* @param envBuilder Learning environment builder.
* @param datasetBuilder Dataset builder.
* @param vectorizer Upstream vectorizer.
* @return Dataset.
*/
@Nullable
public static <K, V, C extends Serializable> Dataset<EmptyContext, LabeledVectorSet<LabeledVector>> buildDataset(LearningEnvironmentBuilder envBuilder, DatasetBuilder<K, V> datasetBuilder, Preprocessor<K, V> vectorizer) {
LearningEnvironment environment = envBuilder.buildForTrainer();
environment.initDeployingContext(vectorizer);
PartitionDataBuilder<K, V, EmptyContext, LabeledVectorSet<LabeledVector>> partDataBuilder = new LabeledDatasetPartitionDataBuilderOnHeap<>(vectorizer);
Dataset<EmptyContext, LabeledVectorSet<LabeledVector>> dataset = null;
if (datasetBuilder != null) {
dataset = datasetBuilder.build(envBuilder, (env, upstream, upstreamSize) -> new EmptyContext(), partDataBuilder, environment);
}
return dataset;
}
use of org.apache.ignite.ml.environment.LearningEnvironment in project ignite by apache.
the class CacheBasedDataset method computeWithCtx.
/**
* {@inheritDoc}
*/
@Override
public <R> R computeWithCtx(IgniteTriFunction<C, D, LearningEnvironment, R> map, IgniteBinaryOperator<R> reduce, R identity) {
String upstreamCacheName = upstreamCache.getName();
String datasetCacheName = datasetCache.getName();
return computeForAllPartitions(part -> {
LearningEnvironment env = ComputeUtils.getLearningEnvironment(ignite, datasetId, part, envBuilder);
C ctx = ComputeUtils.getContext(Ignition.localIgnite(), datasetCacheName, part);
D data = ComputeUtils.getData(Ignition.localIgnite(), upstreamCacheName, filter, upstreamTransformerBuilder, datasetCacheName, datasetId, partDataBuilder, env, upstreamKeepBinary);
if (data != null) {
R res = map.apply(ctx, data, env);
// Saves partition context after update.
ComputeUtils.saveContext(Ignition.localIgnite(), datasetCacheName, part, ctx);
return res;
}
return null;
}, reduce, identity);
}
use of org.apache.ignite.ml.environment.LearningEnvironment in project ignite by apache.
the class DatasetFactory method create.
/**
* Creates a new instance of distributed dataset using the specified {@code partCtxBuilder} and {@code
* partDataBuilder}. This is the generic methods that allows to create any Ignite Cache based datasets with any
* desired partition {@code context} and {@code data}.
*
* @param datasetBuilder Dataset builder.
* @param partCtxBuilder Partition {@code context} builder.
* @param partDataBuilder Partition {@code data} builder.
* @param <K> Type of a key in {@code upstream} data.
* @param <V> ype of a value in {@code upstream} data.
* @param <C> Type of a partition {@code context}.
* @param <D> Type of a partition {@code data}.
* @return Dataset.
*/
public static <K, V, C extends Serializable, D extends AutoCloseable> Dataset<C, D> create(DatasetBuilder<K, V> datasetBuilder, PartitionContextBuilder<K, V, C> partCtxBuilder, PartitionDataBuilder<K, V, C, D> partDataBuilder) {
LearningEnvironment environment = LearningEnvironmentBuilder.defaultBuilder().buildForTrainer();
environment.deployingContext().initByClientObject(partDataBuilder);
return datasetBuilder.build(LearningEnvironmentBuilder.defaultBuilder(), partCtxBuilder, partDataBuilder, environment);
}
use of org.apache.ignite.ml.environment.LearningEnvironment in project ignite by apache.
the class DataStreamGeneratorTest method testAsDatasetBuilder.
/**
*/
@Test
public void testAsDatasetBuilder() throws Exception {
AtomicInteger cntr = new AtomicInteger();
DataStreamGenerator generator = new DataStreamGenerator() {
@Override
public Stream<LabeledVector<Double>> labeled() {
return Stream.generate(() -> {
int val = cntr.getAndIncrement();
return new LabeledVector<>(VectorUtils.of(val), (double) val % 2);
});
}
};
int N = 100;
cntr.set(0);
DatasetBuilder<Vector, Double> b1 = generator.asDatasetBuilder(N, 2);
cntr.set(0);
DatasetBuilder<Vector, Double> b2 = generator.asDatasetBuilder(N, (v, l) -> l == 0, 2);
cntr.set(0);
DatasetBuilder<Vector, Double> b3 = generator.asDatasetBuilder(N, (v, l) -> l == 1, 2, new UpstreamTransformerBuilder() {
@Override
public UpstreamTransformer build(LearningEnvironment env) {
return new UpstreamTransformerForTest();
}
});
checkDataset(N, b1, v -> (Double) v.label() == 0 || (Double) v.label() == 1);
checkDataset(N / 2, b2, v -> (Double) v.label() == 0);
checkDataset(N / 2, b3, v -> (Double) v.label() < 0);
}
use of org.apache.ignite.ml.environment.LearningEnvironment in project ignite by apache.
the class ComputeUtils method initContext.
/**
* Initializes partition {@code context} by loading it from a partition {@code upstream}.
* @param ignite Ignite instance.
* @param upstreamCacheName Name of an {@code upstream} cache.
* @param filter Filter for {@code upstream} data.
* @param transformerBuilder Upstream transformer builder.
* @param ctxBuilder Partition {@code context} builder.
* @param envBuilder Environment builder.
* @param isKeepBinary Support of binary objects.
* @param deployingCtx Deploy context.
* @param <K> Type of a key in {@code upstream} data.
* @param <V> Type of a value in {@code upstream} data.
* @param <C> Type of a partition {@code context}.
*/
public static <K, V, C extends Serializable> void initContext(Ignite ignite, String upstreamCacheName, UpstreamTransformerBuilder transformerBuilder, IgniteBiPredicate<K, V> filter, String datasetCacheName, PartitionContextBuilder<K, V, C> ctxBuilder, LearningEnvironmentBuilder envBuilder, int retries, int interval, boolean isKeepBinary, DeployingContext deployingCtx) {
affinityCallWithRetries(ignite, Arrays.asList(datasetCacheName, upstreamCacheName), part -> {
Ignite locIgnite = Ignition.localIgnite();
LearningEnvironment env = envBuilder.buildForWorker(part);
IgniteCache<K, V> locUpstreamCache = locIgnite.cache(upstreamCacheName);
if (isKeepBinary)
locUpstreamCache = locUpstreamCache.withKeepBinary();
ScanQuery<K, V> qry = new ScanQuery<>();
qry.setLocal(true);
qry.setPartition(part);
qry.setFilter(filter);
C ctx;
UpstreamTransformer transformer = transformerBuilder.build(env);
UpstreamTransformer transformerCp = Utils.copy(transformer);
long cnt = computeCount(locUpstreamCache, qry, transformer);
try (QueryCursor<UpstreamEntry<K, V>> cursor = locUpstreamCache.query(qry, e -> new UpstreamEntry<>(e.getKey(), e.getValue()))) {
Iterator<UpstreamEntry<K, V>> it = cursor.iterator();
Stream<UpstreamEntry> transformedStream = transformerCp.transform(Utils.asStream(it, cnt).map(x -> (UpstreamEntry) x));
it = Utils.asStream(transformedStream.iterator()).map(x -> (UpstreamEntry<K, V>) x).iterator();
Iterator<UpstreamEntry<K, V>> iter = new IteratorWithConcurrentModificationChecker<>(it, cnt, "Cache expected to be not modified during dataset data building [partition=" + part + ']');
ctx = ctxBuilder.build(env, iter, cnt);
}
IgniteCache<Integer, C> datasetCache = locIgnite.cache(datasetCacheName);
datasetCache.put(part, ctx);
return part;
}, retries, interval, deployingCtx);
}
Aggregations