use of org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector in project ignite by apache.
the class ColumnDecisionTreeTrainerBenchmark method tstMNISTBiIndexedCache.
/**
* Run decision tree classifier on MNIST using bi-indexed cache as a storage for dataset.
* To run this test rename this method so it starts from 'test'.
*
* @throws IOException In case of loading MNIST dataset errors.
*/
public void tstMNISTBiIndexedCache() throws IOException {
IgniteUtils.setCurrentIgniteName(ignite.configuration().getIgniteInstanceName());
int ptsCnt = 40_000;
int featCnt = 28 * 28;
Properties props = loadMNISTProperties();
Stream<DenseLocalOnHeapVector> trainingMnistStream = MnistUtils.mnist(props.getProperty(PROP_TRAINING_IMAGES), props.getProperty(PROP_TRAINING_LABELS), new Random(123L), ptsCnt);
Stream<DenseLocalOnHeapVector> testMnistStream = MnistUtils.mnist(props.getProperty(PROP_TEST_IMAGES), props.getProperty(PROP_TEST_LABELS), new Random(123L), 10_000);
IgniteCache<BiIndex, Double> cache = createBiIndexedCache();
loadVectorsIntoBiIndexedCache(cache.getName(), trainingMnistStream.iterator(), featCnt + 1);
ColumnDecisionTreeTrainer<GiniSplitCalculator.GiniData> trainer = new ColumnDecisionTreeTrainer<>(10, ContinuousSplitCalculators.GINI.apply(ignite), RegionCalculators.GINI, RegionCalculators.MOST_COMMON, ignite);
X.println("Training started.");
long before = System.currentTimeMillis();
DecisionTreeModel mdl = trainer.train(new BiIndexedCacheColumnDecisionTreeTrainerInput(cache, new HashMap<>(), ptsCnt, featCnt));
X.println("Training finished in " + (System.currentTimeMillis() - before));
IgniteTriFunction<Model<Vector, Double>, Stream<IgniteBiTuple<Vector, Double>>, Function<Double, Double>, Double> mse = Estimators.errorsPercentage();
Double accuracy = mse.apply(mdl, testMnistStream.map(v -> new IgniteBiTuple<>(v.viewPart(0, featCnt), v.getX(featCnt))), Function.identity());
X.println("Errors percentage: " + accuracy);
Assert.assertEquals(0, SplitCache.getOrCreate(ignite).size());
Assert.assertEquals(0, FeaturesCache.getOrCreate(ignite).size());
Assert.assertEquals(0, ContextCache.getOrCreate(ignite).size());
Assert.assertEquals(0, ProjectionsCache.getOrCreate(ignite).size());
}
use of org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector in project ignite by apache.
the class ColumnDecisionTreeTrainerBenchmark method vecsFromRanges.
/**
*/
private Vector[] vecsFromRanges(Map<Integer, double[]> ranges, int featCnt, double[] defRng, Random rnd, int ptsCnt, Function<Vector, Double> f) {
int vs = featCnt + 1;
DenseLocalOnHeapVector[] res = new DenseLocalOnHeapVector[ptsCnt];
for (int pt = 0; pt < ptsCnt; pt++) {
DenseLocalOnHeapVector v = new DenseLocalOnHeapVector(vs);
for (int i = 0; i < featCnt; i++) {
double[] range = ranges.getOrDefault(i, defRng);
double from = range[0];
double to = range[1];
double rng = to - from;
v.setX(i, rnd.nextDouble() * rng);
}
v.setX(featCnt, f.apply(v));
res[pt] = v;
}
return res;
}
use of org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector in project ignite by apache.
the class ColumnDecisionTreeTrainerBenchmark method tstCacheMixed.
/**
* This test is for manual run only.
* To run this test rename this method so it starts from 'test'.
*/
public void tstCacheMixed() {
IgniteUtils.setCurrentIgniteName(ignite.configuration().getIgniteInstanceName());
int ptsPerReg = 150;
int featCnt = 10;
HashMap<Integer, Integer> catsInfo = new HashMap<>();
catsInfo.put(1, 3);
Random rnd = new Random(12349L);
SplitDataGenerator<DenseLocalOnHeapVector> gen = new SplitDataGenerator<>(featCnt, catsInfo, () -> new DenseLocalOnHeapVector(featCnt + 1), rnd).split(0, 1, new int[] { 0, 2 }).split(1, 0, -10.0).split(0, 0, 0.0);
testByGenStreamerLoad(ptsPerReg, catsInfo, gen, rnd);
}
use of org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector in project ignite by apache.
the class ColumnDecisionTreeTrainerBenchmark method testByGenStreamerLoad.
/**
*/
private void testByGenStreamerLoad(int ptsPerReg, HashMap<Integer, Integer> catsInfo, SplitDataGenerator<DenseLocalOnHeapVector> gen, Random rnd) {
List<IgniteBiTuple<Integer, DenseLocalOnHeapVector>> lst = gen.points(ptsPerReg, (i, rn) -> i).collect(Collectors.toList());
int featCnt = gen.featuresCnt();
Collections.shuffle(lst, rnd);
int numRegs = gen.regsCount();
SparseDistributedMatrix m = new SparseDistributedMatrix(numRegs * ptsPerReg, featCnt + 1, StorageConstants.COLUMN_STORAGE_MODE, StorageConstants.RANDOM_ACCESS_MODE);
IgniteFunction<DoubleStream, Double> regCalc = s -> s.average().orElse(0.0);
Map<Integer, List<LabeledVectorDouble>> byRegion = new HashMap<>();
SparseDistributedMatrixStorage sto = (SparseDistributedMatrixStorage) m.getStorage();
long before = System.currentTimeMillis();
X.println("Batch loading started...");
loadVectorsIntoSparseDistributedMatrixCache(sto.cache().getName(), sto.getUUID(), gen.points(ptsPerReg, (i, rn) -> i).map(IgniteBiTuple::get2).iterator(), featCnt + 1);
X.println("Batch loading took " + (System.currentTimeMillis() - before) + " ms.");
for (IgniteBiTuple<Integer, DenseLocalOnHeapVector> bt : lst) {
byRegion.putIfAbsent(bt.get1(), new LinkedList<>());
byRegion.get(bt.get1()).add(asLabeledVector(bt.get2().getStorage().data()));
}
ColumnDecisionTreeTrainer<VarianceSplitCalculator.VarianceData> trainer = new ColumnDecisionTreeTrainer<>(2, ContinuousSplitCalculators.VARIANCE, RegionCalculators.VARIANCE, regCalc, ignite);
before = System.currentTimeMillis();
DecisionTreeModel mdl = trainer.train(new MatrixColumnDecisionTreeTrainerInput(m, catsInfo));
X.println("Training took: " + (System.currentTimeMillis() - before) + " ms.");
byRegion.keySet().forEach(k -> {
LabeledVectorDouble sp = byRegion.get(k).get(0);
Tracer.showAscii(sp.features());
X.println("Predicted value and label [pred=" + mdl.apply(sp.features()) + ", label=" + sp.doubleLabel() + "]");
assert mdl.apply(sp.features()) == sp.doubleLabel();
});
}
use of org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector in project ignite by apache.
the class FuzzyCMeansLocalClustererTest method checkCentersOfTheSamePointsTwoDimensions.
/**
* Test FCM on points which have the equal coordinates.
*/
@Test
public void checkCentersOfTheSamePointsTwoDimensions() {
FuzzyCMeansLocalClusterer clusterer = new FuzzyCMeansLocalClusterer(new EuclideanDistance(), 2, BaseFuzzyCMeansClusterer.StopCondition.STABLE_MEMBERSHIPS, 0.01, 10, null);
double[][] points = new double[][] { { 3.3, 10 }, { 3.3, 10 }, { 3.3, 10 }, { 3.3, 10 }, { 3.3, 10 } };
DenseLocalOnHeapMatrix pntMatrix = new DenseLocalOnHeapMatrix(points);
int k = 2;
FuzzyCMeansModel mdl = clusterer.cluster(pntMatrix, k);
Vector exp = new DenseLocalOnHeapVector(new double[] { 3.3, 10 });
for (int i = 0; i < k; i++) {
Vector center = mdl.centers()[i];
for (int j = 0; j < 2; j++) assertEquals(exp.getX(j), center.getX(j), 1);
}
}
Aggregations