use of org.apache.ignite.ml.environment.LearningEnvironment in project ignite by apache.
the class ComputeUtils method getData.
/**
* Extracts partition {@code data} from the local storage, if it's not found in local storage recovers this {@code
* data} from a partition {@code upstream} and {@code context}. Be aware that this method should be called from
* the node where partition is placed.
*
* @param ignite Ignite instance.
* @param upstreamCacheName Name of an {@code upstream} cache.
* @param filter Filter for {@code upstream} data.
* @param transformerBuilder Builder of upstream transformers.
* @param datasetCacheName Name of a partition {@code context} cache.
* @param datasetId Dataset ID.
* @param partDataBuilder Partition data builder.
* @param env Learning environment.
* @param <K> Type of a key in {@code upstream} data.
* @param <V> Type of a value in {@code upstream} data.
* @param <C> Type of a partition {@code context}.
* @param <D> Type of a partition {@code data}.
* @return Partition {@code data}.
*/
public static <K, V, C extends Serializable, D extends AutoCloseable> D getData(Ignite ignite, String upstreamCacheName, IgniteBiPredicate<K, V> filter, UpstreamTransformerBuilder transformerBuilder, String datasetCacheName, UUID datasetId, PartitionDataBuilder<K, V, C, D> partDataBuilder, LearningEnvironment env, boolean isKeepBinary) {
PartitionDataStorage dataStorage = (PartitionDataStorage) ignite.cluster().nodeLocalMap().computeIfAbsent(String.format(DATA_STORAGE_KEY_TEMPLATE, datasetId), key -> new PartitionDataStorage());
final int part = env.partition();
return dataStorage.computeDataIfAbsent(part, () -> {
IgniteCache<Integer, C> learningCtxCache = ignite.cache(datasetCacheName);
C ctx = learningCtxCache.get(part);
IgniteCache<K, V> upstreamCache = ignite.cache(upstreamCacheName);
if (isKeepBinary)
upstreamCache = upstreamCache.withKeepBinary();
ScanQuery<K, V> qry = new ScanQuery<>();
qry.setLocal(true);
qry.setPartition(part);
qry.setFilter(filter);
UpstreamTransformer transformer = transformerBuilder.build(env);
UpstreamTransformer transformerCp = Utils.copy(transformer);
long cnt = computeCount(upstreamCache, qry, transformer);
if (cnt > 0) {
try (QueryCursor<UpstreamEntry<K, V>> cursor = upstreamCache.query(qry, e -> new UpstreamEntry<>(e.getKey(), e.getValue()))) {
Iterator<UpstreamEntry<K, V>> it = cursor.iterator();
Stream<UpstreamEntry> transformedStream = transformerCp.transform(Utils.asStream(it, cnt).map(x -> (UpstreamEntry) x));
it = Utils.asStream(transformedStream.iterator()).map(x -> (UpstreamEntry<K, V>) x).iterator();
Iterator<UpstreamEntry<K, V>> iter = new IteratorWithConcurrentModificationChecker<>(it, cnt, "Cache expected to be not modified during dataset data building [partition=" + part + ']');
return partDataBuilder.build(env, iter, cnt, ctx);
}
}
return null;
});
}
use of org.apache.ignite.ml.environment.LearningEnvironment in project ignite by apache.
the class LocalDataset method computeWithCtx.
/**
* {@inheritDoc}
*/
@Override
public <R> R computeWithCtx(IgniteTriFunction<C, D, LearningEnvironment, R> map, IgniteBinaryOperator<R> reduce, R identity) {
R res = identity;
for (int part = 0; part < ctx.size(); part++) {
D partData = data.get(part);
LearningEnvironment env = envs.get(part);
if (partData != null)
res = reduce.apply(res, map.apply(ctx.get(part), partData, env));
}
return res;
}
use of org.apache.ignite.ml.environment.LearningEnvironment in project ignite by apache.
the class DatasetFactory method createSimpleDataset.
/**
* Creates a new instance of distributed {@link SimpleDataset} using the specified {@code partCtxBuilder} and {@code
* featureExtractor}. This methods determines partition {@code data} to be {@link SimpleDatasetData}, but allows to
* use any desired type of partition {@code context}.
*
* @param datasetBuilder Dataset builder.
* @param envBuilder Learning environment builder.
* @param partCtxBuilder Partition {@code context} builder.
* @param featureExtractor Feature extractor used to extract features and build {@link SimpleDatasetData}.
* @param <K> Type of a key in {@code upstream} data.
* @param <V> Type of a value in {@code upstream} data.
* @param <C> Type of a partition {@code context}.
* @return Dataset.
*/
public static <K, V, C extends Serializable, CO extends Serializable> SimpleDataset<C> createSimpleDataset(DatasetBuilder<K, V> datasetBuilder, LearningEnvironmentBuilder envBuilder, PartitionContextBuilder<K, V, C> partCtxBuilder, Preprocessor<K, V> featureExtractor) {
LearningEnvironment environment = LearningEnvironmentBuilder.defaultBuilder().buildForTrainer();
environment.initDeployingContext(featureExtractor);
return create(datasetBuilder, envBuilder, partCtxBuilder, new SimpleDatasetDataBuilder<>(featureExtractor), environment).wrap(SimpleDataset::new);
}
use of org.apache.ignite.ml.environment.LearningEnvironment in project ignite by apache.
the class DatasetFactory method createSimpleLabeledDataset.
/**
* Creates a new instance of distributed {@link SimpleLabeledDataset} using the specified {@code partCtxBuilder},
* {@code featureExtractor} and {@code lbExtractor}. This method determines partition {@code data} to be {@link
* SimpleLabeledDatasetData}, but allows to use any desired type of partition {@code context}.
*
* @param datasetBuilder Dataset builder.
* @param envBuilder Learning environment builder.
* @param partCtxBuilder Partition {@code context} builder.
* @param vectorizer Upstream vectorizer used to extract features and labels and build {@link
* SimpleLabeledDatasetData}.
* @param <K> Type of a key in {@code upstream} data.
* @param <V> Type of a value in {@code upstream} data.
* @param <C> Type of a partition {@code context}.
* @return Dataset.
*/
public static <K, V, C extends Serializable, CO extends Serializable> SimpleLabeledDataset<C> createSimpleLabeledDataset(DatasetBuilder<K, V> datasetBuilder, LearningEnvironmentBuilder envBuilder, PartitionContextBuilder<K, V, C> partCtxBuilder, Preprocessor<K, V> vectorizer) {
LearningEnvironment environment = LearningEnvironmentBuilder.defaultBuilder().buildForTrainer();
environment.initDeployingContext(vectorizer);
return create(datasetBuilder, envBuilder, partCtxBuilder, new SimpleLabeledDatasetDataBuilder<>(vectorizer), environment).wrap(SimpleLabeledDataset::new);
}
use of org.apache.ignite.ml.environment.LearningEnvironment in project ignite by apache.
the class CacheBasedDataset method compute.
/**
* {@inheritDoc}
*/
@Override
public <R> R compute(IgniteBiFunction<D, LearningEnvironment, R> map, IgniteBinaryOperator<R> reduce, R identity) {
String upstreamCacheName = upstreamCache.getName();
String datasetCacheName = datasetCache.getName();
return computeForAllPartitions(part -> {
LearningEnvironment env = ComputeUtils.getLearningEnvironment(Ignition.localIgnite(), datasetId, part, envBuilder);
D data = ComputeUtils.getData(Ignition.localIgnite(), upstreamCacheName, filter, upstreamTransformerBuilder, datasetCacheName, datasetId, partDataBuilder, env, upstreamKeepBinary);
return data != null ? map.apply(data, env) : null;
}, reduce, identity);
}
Aggregations