use of org.apache.ignite.ml.math.impls.matrix.SparseDistributedMatrix in project ignite by apache.
the class SplitDataGenerator method testByGen.
/**
*/
<D extends ContinuousRegionInfo> void testByGen(int totalPts, IgniteFunction<ColumnDecisionTreeTrainerInput, ? extends ContinuousSplitCalculator<D>> calc, IgniteFunction<ColumnDecisionTreeTrainerInput, IgniteFunction<DoubleStream, Double>> catImpCalc, IgniteFunction<DoubleStream, Double> regCalc, Ignite ignite) {
List<IgniteBiTuple<Integer, V>> lst = points(totalPts, (i, rn) -> i).collect(Collectors.toList());
Collections.shuffle(lst, rnd);
SparseDistributedMatrix m = new SparseDistributedMatrix(totalPts, featCnt + 1, StorageConstants.COLUMN_STORAGE_MODE, StorageConstants.RANDOM_ACCESS_MODE);
Map<Integer, List<LabeledVectorDouble>> byRegion = new HashMap<>();
int i = 0;
for (IgniteBiTuple<Integer, V> bt : lst) {
byRegion.putIfAbsent(bt.get1(), new LinkedList<>());
byRegion.get(bt.get1()).add(asLabeledVector(bt.get2().getStorage().data()));
m.setRow(i, bt.get2().getStorage().data());
i++;
}
ColumnDecisionTreeTrainer<D> trainer = new ColumnDecisionTreeTrainer<>(3, calc, catImpCalc, regCalc, ignite);
DecisionTreeModel mdl = trainer.train(new MatrixColumnDecisionTreeTrainerInput(m, catFeaturesInfo));
byRegion.keySet().forEach(k -> mdl.apply(byRegion.get(k).get(0).features()));
}
use of org.apache.ignite.ml.math.impls.matrix.SparseDistributedMatrix in project ignite by apache.
the class KMeansDistributedClusterer method cluster.
/**
*/
@Override
public KMeansModel cluster(SparseDistributedMatrix points, int k) throws MathIllegalArgumentException, ConvergenceException {
SparseDistributedMatrix pointsCp = (SparseDistributedMatrix) points.like(points.rowSize(), points.columnSize());
String cacheName = ((SparseDistributedMatrixStorage) points.getStorage()).cacheName();
// TODO: IGNITE-5825, this copy is very ineffective, just for POC. Immutability of data should be guaranteed by other methods
// such as logical locks for example.
pointsCp.assign(points);
Vector[] centers = initClusterCenters(pointsCp, k);
boolean converged = false;
int iteration = 0;
int dim = pointsCp.viewRow(0).size();
UUID uid = pointsCp.getUUID();
// Execute iterations of Lloyd's algorithm until converged
while (iteration < maxIterations && !converged) {
SumsAndCounts stats = getSumsAndCounts(centers, dim, uid, cacheName);
converged = true;
for (Integer ind : stats.sums.keySet()) {
Vector massCenter = stats.sums.get(ind).times(1.0 / stats.counts.get(ind));
if (converged && distance(massCenter, centers[ind]) > epsilon * epsilon)
converged = false;
centers[ind] = massCenter;
}
iteration++;
}
pointsCp.destroy();
return new KMeansModel(centers, getDistanceMeasure());
}
use of org.apache.ignite.ml.math.impls.matrix.SparseDistributedMatrix in project ignite by apache.
the class GradientDescent method getLossGradientFunction.
/**
* Makes carrying of the gradient function and fixes data matrix.
*/
private IgniteFunction<Vector, Vector> getLossGradientFunction(Matrix data) {
if (data instanceof SparseDistributedMatrix) {
SparseDistributedMatrix distributedMatrix = (SparseDistributedMatrix) data;
if (distributedMatrix.getStorage().storageMode() == StorageConstants.ROW_STORAGE_MODE)
return weights -> calculateDistributedGradient(distributedMatrix, weights);
}
Matrix inputs = extractInputs(data);
Vector groundTruth = extractGroundTruth(data);
return weights -> lossGradient.compute(inputs, groundTruth, weights);
}
use of org.apache.ignite.ml.math.impls.matrix.SparseDistributedMatrix in project ignite by apache.
the class SparseDistributedMatrixMapReducerTest method testMapReduce.
/**
* Tests that matrix 100x100 filled by "1.0" and distributed across nodes successfully processed (calculate sum of
* all elements) via {@link SparseDistributedMatrixMapReducer}.
*/
public void testMapReduce() {
IgniteUtils.setCurrentIgniteName(ignite.configuration().getIgniteInstanceName());
SparseDistributedMatrix distributedMatrix = new SparseDistributedMatrix(100, 100);
for (int i = 0; i < 100; i++) for (int j = 0; j < 100; j++) distributedMatrix.set(i, j, 1);
SparseDistributedMatrixMapReducer mapReducer = new SparseDistributedMatrixMapReducer(distributedMatrix);
double total = mapReducer.mapReduce((matrix, args) -> {
double partialSum = 0.0;
for (int i = 0; i < matrix.rowSize(); i++) for (int j = 0; j < matrix.columnSize(); j++) partialSum += matrix.get(i, j);
return partialSum;
}, sums -> {
double totalSum = 0;
for (Double partialSum : sums) if (partialSum != null)
totalSum += partialSum;
return totalSum;
}, 0.0);
assertEquals(100.0 * 100.0, total, 1e-18);
}
use of org.apache.ignite.ml.math.impls.matrix.SparseDistributedMatrix in project ignite by apache.
the class SparseDistributedMatrixMapReducerTest method testMapReduceWithOneEmptyNode.
/**
* Tests that matrix 1x100 filled by "1.0" and distributed across nodes successfully processed (calculate sum of
* all elements) via {@link SparseDistributedMatrixMapReducer} even when not all nodes contains data.
*/
public void testMapReduceWithOneEmptyNode() {
IgniteUtils.setCurrentIgniteName(ignite.configuration().getIgniteInstanceName());
SparseDistributedMatrix distributedMatrix = new SparseDistributedMatrix(1, 100);
for (int j = 0; j < 100; j++) distributedMatrix.set(0, j, 1);
SparseDistributedMatrixMapReducer mapReducer = new SparseDistributedMatrixMapReducer(distributedMatrix);
double total = mapReducer.mapReduce((matrix, args) -> {
double partialSum = 0.0;
for (int i = 0; i < matrix.rowSize(); i++) for (int j = 0; j < matrix.columnSize(); j++) partialSum += matrix.get(i, j);
return partialSum;
}, sums -> {
double totalSum = 0;
for (Double partialSum : sums) if (partialSum != null)
totalSum += partialSum;
return totalSum;
}, 0.0);
assertEquals(100.0, total, 1e-18);
}
Aggregations