use of org.apache.ignite.ml.math.distributed.keys.impl.SparseMatrixKey in project ignite by apache.
the class ColumnDecisionTreeTrainerBenchmark method loadVectorsIntoSparseDistributedMatrixCache.
/**
* Load vectors into sparse distributed matrix.
*
* @param cacheName Name of cache where matrix is stored.
* @param uuid UUID of matrix.
* @param iter Iterator over vectors.
* @param vectorSize size of vectors.
*/
private void loadVectorsIntoSparseDistributedMatrixCache(String cacheName, UUID uuid, Iterator<? extends org.apache.ignite.ml.math.Vector> iter, int vectorSize) {
try (IgniteDataStreamer<SparseMatrixKey, Map<Integer, Double>> streamer = Ignition.localIgnite().dataStreamer(cacheName)) {
int sampleIdx = 0;
streamer.allowOverwrite(true);
streamer.receiver(StreamTransformer.from((e, arg) -> {
Map<Integer, Double> val = e.getValue();
if (val == null)
val = new Int2DoubleOpenHashMap();
val.putAll((Map<Integer, Double>) arg[0]);
e.setValue(val);
return null;
}));
// Feature index -> (sample index -> value)
Map<Integer, Map<Integer, Double>> batch = new HashMap<>();
IntStream.range(0, vectorSize).forEach(i -> batch.put(i, new HashMap<>()));
int batchSize = 1000;
while (iter.hasNext()) {
org.apache.ignite.ml.math.Vector next = iter.next();
for (int i = 0; i < vectorSize; i++) batch.get(i).put(sampleIdx, next.getX(i));
X.println("Sample index: " + sampleIdx);
if (sampleIdx % batchSize == 0) {
batch.keySet().forEach(fi -> streamer.addData(new SparseMatrixKey(fi, uuid, fi), batch.get(fi)));
IntStream.range(0, vectorSize).forEach(i -> batch.put(i, new HashMap<>()));
}
sampleIdx++;
}
if (sampleIdx % batchSize != 0) {
batch.keySet().forEach(fi -> streamer.addData(new SparseMatrixKey(fi, uuid, fi), batch.get(fi)));
IntStream.range(0, vectorSize).forEach(i -> batch.put(i, new HashMap<>()));
}
}
}
use of org.apache.ignite.ml.math.distributed.keys.impl.SparseMatrixKey in project ignite by apache.
the class FuzzyCMeansDistributedClusterer method calculateNewCenters.
/**
* Calculate new centers according to membership matrix.
*
* @param points Matrix with source points.
* @param membershipsAndSums Membership matrix and sums of membership coefficient for each center.
* @param k The number of centers.
* @return Array of new centers.
*/
private Vector[] calculateNewCenters(SparseDistributedMatrix points, MembershipsAndSums membershipsAndSums, int k) {
String cacheName = ((SparseDistributedMatrixStorage) points.getStorage()).cacheName();
UUID uuid = points.getUUID();
CentersArraySupplier supplier = new CentersArraySupplier(k, points.columnSize());
Vector[] centers = CacheUtils.distributedFold(cacheName, (IgniteBiFunction<Cache.Entry<SparseMatrixKey, ConcurrentHashMap<Integer, Double>>, Vector[], Vector[]>) (vectorWithIndex, centerSums) -> {
Integer idx = vectorWithIndex.getKey().index();
Vector pnt = MatrixUtil.localCopyOf(VectorUtils.fromMap(vectorWithIndex.getValue(), false));
Vector pntMemberships = membershipsAndSums.memberships.get(idx);
for (int i = 0; i < k; i++) {
Vector weightedPnt = pnt.times(pntMemberships.getX(i));
centerSums[i] = centerSums[i].plus(weightedPnt);
}
return centerSums;
}, key -> key.dataStructureId().equals(uuid), (sums1, sums2) -> {
for (int i = 0; i < k; i++) sums1[i] = sums1[i].plus(sums2[i]);
return sums1;
}, supplier);
for (int i = 0; i < k; i++) centers[i] = centers[i].divide(membershipsAndSums.membershipSums.getX(i));
return centers;
}
use of org.apache.ignite.ml.math.distributed.keys.impl.SparseMatrixKey in project ignite by apache.
the class FuzzyCMeansDistributedClusterer method calculateMembership.
/**
* Calculate matrix of membership coefficients for each point and each center.
*
* @param points Matrix with source points.
* @param centers Array of current centers.
* @return Membership matrix and sums of membership coefficients for each center.
*/
private MembershipsAndSums calculateMembership(SparseDistributedMatrix points, Vector[] centers) {
String cacheName = ((SparseDistributedMatrixStorage) points.getStorage()).cacheName();
UUID uuid = points.getUUID();
double fuzzyMembershipCoefficient = 2 / (exponentialWeight - 1);
MembershipsAndSumsSupplier supplier = new MembershipsAndSumsSupplier(centers.length);
return CacheUtils.distributedFold(cacheName, (IgniteBiFunction<Cache.Entry<SparseMatrixKey, ConcurrentHashMap<Integer, Double>>, MembershipsAndSums, MembershipsAndSums>) (vectorWithIndex, membershipsAndSums) -> {
Integer idx = vectorWithIndex.getKey().index();
Vector pnt = VectorUtils.fromMap(vectorWithIndex.getValue(), false);
Vector distances = new DenseLocalOnHeapVector(centers.length);
Vector pntMemberships = new DenseLocalOnHeapVector(centers.length);
for (int i = 0; i < centers.length; i++) distances.setX(i, distance(centers[i], pnt));
for (int i = 0; i < centers.length; i++) {
double invertedFuzzyWeight = 0.0;
for (int j = 0; j < centers.length; j++) {
double val = Math.pow(distances.getX(i) / distances.getX(j), fuzzyMembershipCoefficient);
if (Double.isNaN(val))
val = 1.0;
invertedFuzzyWeight += val;
}
double membership = Math.pow(1.0 / invertedFuzzyWeight, exponentialWeight);
pntMemberships.setX(i, membership);
}
membershipsAndSums.memberships.put(idx, pntMemberships);
membershipsAndSums.membershipSums = membershipsAndSums.membershipSums.plus(pntMemberships);
return membershipsAndSums;
}, key -> key.dataStructureId().equals(uuid), (mem1, mem2) -> {
mem1.merge(mem2);
return mem1;
}, supplier);
}
Aggregations