use of org.apache.ignite.ml.math.impls.matrix.SparseDistributedMatrix in project ignite by apache.
the class SparseDistributedMatrixMapReducerTest method testMapReduceWithNullValues.
/**
* Tests that matrix 100x100 filled by "1.0" and distributed across nodes successfully processed via
* {@link SparseDistributedMatrixMapReducer} even when mapping function returns {@code null}.
*/
public void testMapReduceWithNullValues() {
IgniteUtils.setCurrentIgniteName(ignite.configuration().getIgniteInstanceName());
SparseDistributedMatrix distributedMatrix = new SparseDistributedMatrix(100, 100);
for (int i = 0; i < 100; i++) for (int j = 0; j < 100; j++) distributedMatrix.set(i, j, 1);
SparseDistributedMatrixMapReducer mapReducer = new SparseDistributedMatrixMapReducer(distributedMatrix);
double total = mapReducer.mapReduce((matrix, args) -> null, sums -> {
double totalSum = 0;
for (Double partialSum : sums) if (partialSum != null)
totalSum += partialSum;
return totalSum;
}, 0.0);
assertEquals(0, total, 1e-18);
}
use of org.apache.ignite.ml.math.impls.matrix.SparseDistributedMatrix in project ignite by apache.
the class ColumnDecisionTreeTrainerBenchmark method tstMNISTSparseDistributedMatrix.
/**
* Run decision tree classifier on MNIST using sparse distributed matrix as a storage for dataset.
* To run this test rename this method so it starts from 'test'.
*
* @throws IOException In case of loading MNIST dataset errors.
*/
public void tstMNISTSparseDistributedMatrix() throws IOException {
IgniteUtils.setCurrentIgniteName(ignite.configuration().getIgniteInstanceName());
int ptsCnt = 30_000;
int featCnt = 28 * 28;
Properties props = loadMNISTProperties();
Stream<DenseLocalOnHeapVector> trainingMnistStream = MnistUtils.mnist(props.getProperty(PROP_TRAINING_IMAGES), props.getProperty(PROP_TRAINING_LABELS), new Random(123L), ptsCnt);
Stream<DenseLocalOnHeapVector> testMnistStream = MnistUtils.mnist(props.getProperty(PROP_TEST_IMAGES), props.getProperty(PROP_TEST_LABELS), new Random(123L), 10_000);
SparseDistributedMatrix m = new SparseDistributedMatrix(ptsCnt, featCnt + 1, StorageConstants.COLUMN_STORAGE_MODE, StorageConstants.RANDOM_ACCESS_MODE);
SparseDistributedMatrixStorage sto = (SparseDistributedMatrixStorage) m.getStorage();
loadVectorsIntoSparseDistributedMatrixCache(sto.cache().getName(), sto.getUUID(), trainingMnistStream.iterator(), featCnt + 1);
ColumnDecisionTreeTrainer<GiniSplitCalculator.GiniData> trainer = new ColumnDecisionTreeTrainer<>(10, ContinuousSplitCalculators.GINI.apply(ignite), RegionCalculators.GINI, RegionCalculators.MOST_COMMON, ignite);
X.println("Training started");
long before = System.currentTimeMillis();
DecisionTreeModel mdl = trainer.train(new MatrixColumnDecisionTreeTrainerInput(m, new HashMap<>()));
X.println("Training finished in " + (System.currentTimeMillis() - before));
IgniteTriFunction<Model<Vector, Double>, Stream<IgniteBiTuple<Vector, Double>>, Function<Double, Double>, Double> mse = Estimators.errorsPercentage();
Double accuracy = mse.apply(mdl, testMnistStream.map(v -> new IgniteBiTuple<>(v.viewPart(0, featCnt), v.getX(featCnt))), Function.identity());
X.println("Errors percentage: " + accuracy);
Assert.assertEquals(0, SplitCache.getOrCreate(ignite).size());
Assert.assertEquals(0, FeaturesCache.getOrCreate(ignite).size());
Assert.assertEquals(0, ContextCache.getOrCreate(ignite).size());
Assert.assertEquals(0, ProjectionsCache.getOrCreate(ignite).size());
}
use of org.apache.ignite.ml.math.impls.matrix.SparseDistributedMatrix in project ignite by apache.
the class ColumnDecisionTreeTrainerBenchmark method testByGenStreamerLoad.
/**
*/
private void testByGenStreamerLoad(int ptsPerReg, HashMap<Integer, Integer> catsInfo, SplitDataGenerator<DenseLocalOnHeapVector> gen, Random rnd) {
List<IgniteBiTuple<Integer, DenseLocalOnHeapVector>> lst = gen.points(ptsPerReg, (i, rn) -> i).collect(Collectors.toList());
int featCnt = gen.featuresCnt();
Collections.shuffle(lst, rnd);
int numRegs = gen.regsCount();
SparseDistributedMatrix m = new SparseDistributedMatrix(numRegs * ptsPerReg, featCnt + 1, StorageConstants.COLUMN_STORAGE_MODE, StorageConstants.RANDOM_ACCESS_MODE);
IgniteFunction<DoubleStream, Double> regCalc = s -> s.average().orElse(0.0);
Map<Integer, List<LabeledVectorDouble>> byRegion = new HashMap<>();
SparseDistributedMatrixStorage sto = (SparseDistributedMatrixStorage) m.getStorage();
long before = System.currentTimeMillis();
X.println("Batch loading started...");
loadVectorsIntoSparseDistributedMatrixCache(sto.cache().getName(), sto.getUUID(), gen.points(ptsPerReg, (i, rn) -> i).map(IgniteBiTuple::get2).iterator(), featCnt + 1);
X.println("Batch loading took " + (System.currentTimeMillis() - before) + " ms.");
for (IgniteBiTuple<Integer, DenseLocalOnHeapVector> bt : lst) {
byRegion.putIfAbsent(bt.get1(), new LinkedList<>());
byRegion.get(bt.get1()).add(asLabeledVector(bt.get2().getStorage().data()));
}
ColumnDecisionTreeTrainer<VarianceSplitCalculator.VarianceData> trainer = new ColumnDecisionTreeTrainer<>(2, ContinuousSplitCalculators.VARIANCE, RegionCalculators.VARIANCE, regCalc, ignite);
before = System.currentTimeMillis();
DecisionTreeModel mdl = trainer.train(new MatrixColumnDecisionTreeTrainerInput(m, catsInfo));
X.println("Training took: " + (System.currentTimeMillis() - before) + " ms.");
byRegion.keySet().forEach(k -> {
LabeledVectorDouble sp = byRegion.get(k).get(0);
Tracer.showAscii(sp.features());
X.println("Predicted value and label [pred=" + mdl.apply(sp.features()) + ", label=" + sp.doubleLabel() + "]");
assert mdl.apply(sp.features()) == sp.doubleLabel();
});
}
use of org.apache.ignite.ml.math.impls.matrix.SparseDistributedMatrix in project ignite by apache.
the class KMeansDistributedClustererExample method main.
/**
* Executes example.
*
* @param args Command line arguments, none required.
*/
public static void main(String[] args) throws InterruptedException {
// IMPL NOTE based on KMeansDistributedClustererTestSingleNode#testClusterizationOnDatasetWithObviousStructure
System.out.println(">>> K-means distributed clusterer example started.");
// Start ignite grid.
try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) {
System.out.println(">>> Ignite grid started.");
// Create IgniteThread, we must work with SparseDistributedMatrix inside IgniteThread
// because we create ignite cache internally.
IgniteThread igniteThread = new IgniteThread(ignite.configuration().getIgniteInstanceName(), SparseDistributedMatrixExample.class.getSimpleName(), () -> {
int ptsCnt = 10000;
SparseDistributedMatrix points = new SparseDistributedMatrix(ptsCnt, 2, StorageConstants.ROW_STORAGE_MODE, StorageConstants.RANDOM_ACCESS_MODE);
DatasetWithObviousStructure dataset = new DatasetWithObviousStructure(10000);
List<Vector> massCenters = dataset.generate(points);
EuclideanDistance dist = new EuclideanDistance();
KMeansDistributedClusterer clusterer = new KMeansDistributedClusterer(dist, 3, 100, 1L);
Vector[] resCenters = clusterer.cluster(points, 4).centers();
System.out.println("Mass centers:");
massCenters.forEach(Tracer::showAscii);
System.out.println("Cluster centers:");
Arrays.asList(resCenters).forEach(Tracer::showAscii);
points.destroy();
System.out.println("\n>>> K-means distributed clusterer example completed.");
});
igniteThread.start();
igniteThread.join();
}
}
use of org.apache.ignite.ml.math.impls.matrix.SparseDistributedMatrix in project ignite by apache.
the class IgniteKMeansDistributedClustererBenchmark method test.
/**
* {@inheritDoc}
*/
@Override
public boolean test(Map<Object, Object> ctx) throws Exception {
final DataChanger.Scale scale = new DataChanger.Scale();
// Create IgniteThread, we must work with SparseDistributedMatrix inside IgniteThread
// because we create ignite cache internally.
IgniteThread igniteThread = new IgniteThread(ignite.configuration().getIgniteInstanceName(), this.getClass().getSimpleName(), new Runnable() {
/**
* {@inheritDoc}
*/
@Override
public void run() {
// IMPL NOTE originally taken from KMeansDistributedClustererTest
KMeansDistributedClusterer clusterer = new KMeansDistributedClusterer(new EuclideanDistance(), 1, 1, 1L);
double[] v1 = scale.mutate(new double[] { 1959, 325100 });
double[] v2 = scale.mutate(new double[] { 1960, 373200 });
SparseDistributedMatrix points = new SparseDistributedMatrix(2, 2, StorageConstants.ROW_STORAGE_MODE, StorageConstants.RANDOM_ACCESS_MODE);
points.setRow(0, v1);
points.setRow(1, v2);
clusterer.cluster(points, 1);
points.destroy();
}
});
igniteThread.start();
igniteThread.join();
return true;
}
Aggregations