use of org.apache.ignite.ml.math.impls.storage.matrix.SparseDistributedMatrixStorage in project ignite by apache.
the class FuzzyCMeansDistributedClusterer method calculateNewCenters.
/**
* Calculate new centers according to membership matrix.
*
* @param points Matrix with source points.
* @param membershipsAndSums Membership matrix and sums of membership coefficient for each center.
* @param k The number of centers.
* @return Array of new centers.
*/
private Vector[] calculateNewCenters(SparseDistributedMatrix points, MembershipsAndSums membershipsAndSums, int k) {
String cacheName = ((SparseDistributedMatrixStorage) points.getStorage()).cacheName();
UUID uuid = points.getUUID();
CentersArraySupplier supplier = new CentersArraySupplier(k, points.columnSize());
Vector[] centers = CacheUtils.distributedFold(cacheName, (IgniteBiFunction<Cache.Entry<SparseMatrixKey, ConcurrentHashMap<Integer, Double>>, Vector[], Vector[]>) (vectorWithIndex, centerSums) -> {
Integer idx = vectorWithIndex.getKey().index();
Vector pnt = MatrixUtil.localCopyOf(VectorUtils.fromMap(vectorWithIndex.getValue(), false));
Vector pntMemberships = membershipsAndSums.memberships.get(idx);
for (int i = 0; i < k; i++) {
Vector weightedPnt = pnt.times(pntMemberships.getX(i));
centerSums[i] = centerSums[i].plus(weightedPnt);
}
return centerSums;
}, key -> key.dataStructureId().equals(uuid), (sums1, sums2) -> {
for (int i = 0; i < k; i++) sums1[i] = sums1[i].plus(sums2[i]);
return sums1;
}, supplier);
for (int i = 0; i < k; i++) centers[i] = centers[i].divide(membershipsAndSums.membershipSums.getX(i));
return centers;
}
use of org.apache.ignite.ml.math.impls.storage.matrix.SparseDistributedMatrixStorage in project ignite by apache.
the class FuzzyCMeansDistributedClusterer method calculateMembership.
/**
* Calculate matrix of membership coefficients for each point and each center.
*
* @param points Matrix with source points.
* @param centers Array of current centers.
* @return Membership matrix and sums of membership coefficients for each center.
*/
private MembershipsAndSums calculateMembership(SparseDistributedMatrix points, Vector[] centers) {
String cacheName = ((SparseDistributedMatrixStorage) points.getStorage()).cacheName();
UUID uuid = points.getUUID();
double fuzzyMembershipCoefficient = 2 / (exponentialWeight - 1);
MembershipsAndSumsSupplier supplier = new MembershipsAndSumsSupplier(centers.length);
return CacheUtils.distributedFold(cacheName, (IgniteBiFunction<Cache.Entry<SparseMatrixKey, ConcurrentHashMap<Integer, Double>>, MembershipsAndSums, MembershipsAndSums>) (vectorWithIndex, membershipsAndSums) -> {
Integer idx = vectorWithIndex.getKey().index();
Vector pnt = VectorUtils.fromMap(vectorWithIndex.getValue(), false);
Vector distances = new DenseLocalOnHeapVector(centers.length);
Vector pntMemberships = new DenseLocalOnHeapVector(centers.length);
for (int i = 0; i < centers.length; i++) distances.setX(i, distance(centers[i], pnt));
for (int i = 0; i < centers.length; i++) {
double invertedFuzzyWeight = 0.0;
for (int j = 0; j < centers.length; j++) {
double val = Math.pow(distances.getX(i) / distances.getX(j), fuzzyMembershipCoefficient);
if (Double.isNaN(val))
val = 1.0;
invertedFuzzyWeight += val;
}
double membership = Math.pow(1.0 / invertedFuzzyWeight, exponentialWeight);
pntMemberships.setX(i, membership);
}
membershipsAndSums.memberships.put(idx, pntMemberships);
membershipsAndSums.membershipSums = membershipsAndSums.membershipSums.plus(pntMemberships);
return membershipsAndSums;
}, key -> key.dataStructureId().equals(uuid), (mem1, mem2) -> {
mem1.merge(mem2);
return mem1;
}, supplier);
}
use of org.apache.ignite.ml.math.impls.storage.matrix.SparseDistributedMatrixStorage in project ignite by apache.
the class KMeansDistributedClusterer method initClusterCenters.
/**
* Initialize cluster centers.
*/
private Vector[] initClusterCenters(SparseDistributedMatrix points, int k) {
// Initialize empty centers and point costs.
int ptsCnt = points.rowSize();
String cacheName = ((SparseDistributedMatrixStorage) points.getStorage()).cacheName();
// Initialize the first center to a random point.
Vector sample = localCopyOf(points.viewRow(rnd.nextInt(ptsCnt)));
List<Vector> centers = new ArrayList<>();
List<Vector> newCenters = new ArrayList<>();
newCenters.add(sample);
centers.add(sample);
final ConcurrentHashMap<Integer, Double> costs = new ConcurrentHashMap<>();
// On each step, sample 2 * k points on average with probability proportional
// to their squared distance from the centers. Note that only distances between points
// and new centers are computed in each iteration.
int step = 0;
UUID uid = points.getUUID();
while (step < initSteps) {
// We assume here that costs can fit into memory of one node.
ConcurrentHashMap<Integer, Double> newCosts = getNewCosts(points, newCenters, cacheName);
// Merge costs with new costs.
for (Integer ind : newCosts.keySet()) costs.merge(ind, newCosts.get(ind), Math::min);
double sumCosts = costs.values().stream().mapToDouble(Double::valueOf).sum();
newCenters = getNewCenters(k, costs, uid, sumCosts, cacheName);
centers.addAll(newCenters);
step++;
}
List<Vector> distinctCenters = centers.stream().distinct().collect(Collectors.toList());
if (distinctCenters.size() <= k)
return distinctCenters.toArray(new Vector[] {});
else {
// Finally, we might have a set of more than k distinct candidate centers; weight each
// candidate by the number of points in the dataset mapping to it and run a local k-means++
// on the weighted centers to pick k of them
ConcurrentHashMap<Integer, Integer> centerInd2Weight = weightCenters(uid, distinctCenters, cacheName);
List<Double> weights = new ArrayList<>(centerInd2Weight.size());
for (int i = 0; i < distinctCenters.size(); i++) weights.add(i, Double.valueOf(centerInd2Weight.getOrDefault(i, 0)));
DenseLocalOnHeapMatrix dCenters = MatrixUtil.fromList(distinctCenters, true);
return new KMeansLocalClusterer(getDistanceMeasure(), 30, seed).cluster(dCenters, k, weights).centers();
}
}
use of org.apache.ignite.ml.math.impls.storage.matrix.SparseDistributedMatrixStorage in project ignite by apache.
the class ColumnDecisionTreeTrainerBenchmark method tstF1.
/**
* Test decision tree regression.
* To run this test rename this method so it starts from 'test'.
*/
public void tstF1() {
IgniteUtils.setCurrentIgniteName(ignite.configuration().getIgniteInstanceName());
int ptsCnt = 10000;
Map<Integer, double[]> ranges = new HashMap<>();
ranges.put(0, new double[] { -100.0, 100.0 });
ranges.put(1, new double[] { -100.0, 100.0 });
ranges.put(2, new double[] { -100.0, 100.0 });
int featCnt = 100;
double[] defRng = { -1.0, 1.0 };
Vector[] trainVectors = vecsFromRanges(ranges, featCnt, defRng, new Random(123L), ptsCnt, f1);
SparseDistributedMatrix m = new SparseDistributedMatrix(ptsCnt, featCnt + 1, StorageConstants.COLUMN_STORAGE_MODE, StorageConstants.RANDOM_ACCESS_MODE);
SparseDistributedMatrixStorage sto = (SparseDistributedMatrixStorage) m.getStorage();
loadVectorsIntoSparseDistributedMatrixCache(sto.cache().getName(), sto.getUUID(), Arrays.stream(trainVectors).iterator(), featCnt + 1);
IgniteFunction<DoubleStream, Double> regCalc = s -> s.average().orElse(0.0);
ColumnDecisionTreeTrainer<VarianceSplitCalculator.VarianceData> trainer = new ColumnDecisionTreeTrainer<>(10, ContinuousSplitCalculators.VARIANCE, RegionCalculators.VARIANCE, regCalc, ignite);
X.println("Training started.");
long before = System.currentTimeMillis();
DecisionTreeModel mdl = trainer.train(new MatrixColumnDecisionTreeTrainerInput(m, new HashMap<>()));
X.println("Training finished in: " + (System.currentTimeMillis() - before) + " ms.");
Vector[] testVectors = vecsFromRanges(ranges, featCnt, defRng, new Random(123L), 20, f1);
IgniteTriFunction<Model<Vector, Double>, Stream<IgniteBiTuple<Vector, Double>>, Function<Double, Double>, Double> mse = Estimators.MSE();
Double accuracy = mse.apply(mdl, Arrays.stream(testVectors).map(v -> new IgniteBiTuple<>(v.viewPart(0, featCnt), v.getX(featCnt))), Function.identity());
X.println("MSE: " + accuracy);
}
use of org.apache.ignite.ml.math.impls.storage.matrix.SparseDistributedMatrixStorage in project ignite by apache.
the class SparseDistributedMatrixMapReducer method mapReduce.
/**
*/
public <R, T> R mapReduce(IgniteBiFunction<Matrix, T, R> mapper, IgniteFunction<Collection<R>, R> reducer, T args) {
Ignite ignite = Ignition.localIgnite();
SparseDistributedMatrixStorage storage = (SparseDistributedMatrixStorage) distributedMatrix.getStorage();
int colSize = distributedMatrix.columnSize();
Collection<R> results = ignite.compute(ignite.cluster().forDataNodes(storage.cacheName())).broadcast(arguments -> {
Ignite locIgnite = Ignition.localIgnite();
Affinity<RowColMatrixKey> affinity = locIgnite.affinity(storage.cacheName());
ClusterNode locNode = locIgnite.cluster().localNode();
Map<ClusterNode, Collection<RowColMatrixKey>> keys = affinity.mapKeysToNodes(storage.getAllKeys());
Collection<RowColMatrixKey> locKeys = keys.get(locNode);
if (locKeys != null) {
int idx = 0;
Matrix locMatrix = new DenseLocalOnHeapMatrix(locKeys.size(), colSize);
for (RowColMatrixKey key : locKeys) {
Map<Integer, Double> row = storage.cache().get(key);
for (Map.Entry<Integer, Double> cell : row.entrySet()) locMatrix.set(idx, cell.getKey(), cell.getValue());
idx++;
}
return mapper.apply(locMatrix, arguments);
}
return null;
}, args);
return reducer.apply(results);
}
Aggregations