use of org.apache.ignite.ml.math.impls.matrix.SparseDistributedMatrix in project ignite by apache.
the class ColumnDecisionTreeTrainerBenchmark method tstF1.
/**
* Test decision tree regression.
* To run this test rename this method so it starts from 'test'.
*/
public void tstF1() {
IgniteUtils.setCurrentIgniteName(ignite.configuration().getIgniteInstanceName());
int ptsCnt = 10000;
Map<Integer, double[]> ranges = new HashMap<>();
ranges.put(0, new double[] { -100.0, 100.0 });
ranges.put(1, new double[] { -100.0, 100.0 });
ranges.put(2, new double[] { -100.0, 100.0 });
int featCnt = 100;
double[] defRng = { -1.0, 1.0 };
Vector[] trainVectors = vecsFromRanges(ranges, featCnt, defRng, new Random(123L), ptsCnt, f1);
SparseDistributedMatrix m = new SparseDistributedMatrix(ptsCnt, featCnt + 1, StorageConstants.COLUMN_STORAGE_MODE, StorageConstants.RANDOM_ACCESS_MODE);
SparseDistributedMatrixStorage sto = (SparseDistributedMatrixStorage) m.getStorage();
loadVectorsIntoSparseDistributedMatrixCache(sto.cache().getName(), sto.getUUID(), Arrays.stream(trainVectors).iterator(), featCnt + 1);
IgniteFunction<DoubleStream, Double> regCalc = s -> s.average().orElse(0.0);
ColumnDecisionTreeTrainer<VarianceSplitCalculator.VarianceData> trainer = new ColumnDecisionTreeTrainer<>(10, ContinuousSplitCalculators.VARIANCE, RegionCalculators.VARIANCE, regCalc, ignite);
X.println("Training started.");
long before = System.currentTimeMillis();
DecisionTreeModel mdl = trainer.train(new MatrixColumnDecisionTreeTrainerInput(m, new HashMap<>()));
X.println("Training finished in: " + (System.currentTimeMillis() - before) + " ms.");
Vector[] testVectors = vecsFromRanges(ranges, featCnt, defRng, new Random(123L), 20, f1);
IgniteTriFunction<Model<Vector, Double>, Stream<IgniteBiTuple<Vector, Double>>, Function<Double, Double>, Double> mse = Estimators.MSE();
Double accuracy = mse.apply(mdl, Arrays.stream(testVectors).map(v -> new IgniteBiTuple<>(v.viewPart(0, featCnt), v.getX(featCnt))), Function.identity());
X.println("MSE: " + accuracy);
}
use of org.apache.ignite.ml.math.impls.matrix.SparseDistributedMatrix in project ignite by apache.
the class GradientDescent method calculateDistributedGradient.
/**
* Calculates gradient based in distributed matrix using {@link SparseDistributedMatrixMapReducer}.
*
* @param data Distributed matrix
* @param weights Point to calculate gradient
* @return Gradient
*/
private Vector calculateDistributedGradient(SparseDistributedMatrix data, Vector weights) {
SparseDistributedMatrixMapReducer mapReducer = new SparseDistributedMatrixMapReducer(data);
return mapReducer.mapReduce((matrix, args) -> {
Matrix inputs = extractInputs(matrix);
Vector groundTruth = extractGroundTruth(matrix);
return lossGradient.compute(inputs, groundTruth, args);
}, gradients -> {
int cnt = 0;
Vector resGradient = new DenseLocalOnHeapVector(data.columnSize());
for (Vector gradient : gradients) {
if (gradient != null) {
resGradient = resGradient.plus(gradient);
cnt++;
}
}
return resGradient.divide(cnt);
}, weights);
}
use of org.apache.ignite.ml.math.impls.matrix.SparseDistributedMatrix in project ignite by apache.
the class KMeansDistributedClustererTestMultiNode method testClusterizationOnDatasetWithObviousStructure.
/**
*/
public void testClusterizationOnDatasetWithObviousStructure() throws IOException {
IgniteUtils.setCurrentIgniteName(ignite.configuration().getIgniteInstanceName());
int ptsCnt = 10000;
int squareSideLen = 10000;
Random rnd = new Random(123456L);
// Let centers be in the vertices of square.
Map<Integer, Vector> centers = new HashMap<>();
centers.put(100, new DenseLocalOnHeapVector(new double[] { 0.0, 0.0 }));
centers.put(900, new DenseLocalOnHeapVector(new double[] { squareSideLen, 0.0 }));
centers.put(3000, new DenseLocalOnHeapVector(new double[] { 0.0, squareSideLen }));
centers.put(6000, new DenseLocalOnHeapVector(new double[] { squareSideLen, squareSideLen }));
SparseDistributedMatrix points = new SparseDistributedMatrix(ptsCnt, 2, StorageConstants.ROW_STORAGE_MODE, StorageConstants.RANDOM_ACCESS_MODE);
List<Integer> permutation = IntStream.range(0, ptsCnt).boxed().collect(Collectors.toList());
Collections.shuffle(permutation, rnd);
int totalCnt = 0;
for (Integer count : centers.keySet()) {
for (int i = 0; i < count; i++) {
Vector pnt = new DenseLocalOnHeapVector(2).assign(centers.get(count));
// Perturbate point on random value.
pnt.map(val -> val + rnd.nextDouble() * squareSideLen / 100);
points.assignRow(permutation.get(totalCnt), pnt);
totalCnt++;
}
}
EuclideanDistance dist = new EuclideanDistance();
KMeansDistributedClusterer clusterer = new KMeansDistributedClusterer(dist, 3, 100, 1L);
clusterer.cluster(points, 4);
points.destroy();
}
use of org.apache.ignite.ml.math.impls.matrix.SparseDistributedMatrix in project ignite by apache.
the class KMeansDistributedClustererTestMultiNode method testPerformClusterAnalysisDegenerate.
/**
*/
public void testPerformClusterAnalysisDegenerate() {
IgniteUtils.setCurrentIgniteName(ignite.configuration().getIgniteInstanceName());
KMeansDistributedClusterer clusterer = new KMeansDistributedClusterer(new EuclideanDistance(), 1, 1, 1L);
double[] v1 = new double[] { 1959, 325100 };
double[] v2 = new double[] { 1960, 373200 };
SparseDistributedMatrix points = new SparseDistributedMatrix(2, 2, StorageConstants.ROW_STORAGE_MODE, StorageConstants.RANDOM_ACCESS_MODE);
points.setRow(0, v1);
points.setRow(1, v2);
clusterer.cluster(points, 1);
points.destroy();
}
Aggregations