use of org.apache.ignite.ml.math.Vector in project ignite by apache.
the class FuzzyCMeansDistributedClusterer method cluster.
/**
* {@inheritDoc}
*/
@Override
public FuzzyCMeansModel cluster(SparseDistributedMatrix points, int k) throws MathIllegalArgumentException, ConvergenceException {
GridArgumentCheck.notNull(points, "points");
if (k < 2)
throw new MathIllegalArgumentException("The number of clusters is less than 2");
Vector[] centers = initializeCenters(points, k);
MembershipsAndSums membershipsAndSums = null;
int iteration = 0;
boolean finished = false;
while (!finished && iteration < cMeansMaxIterations) {
MembershipsAndSums newMembershipsAndSums = calculateMembership(points, centers);
Vector[] newCenters = calculateNewCenters(points, newMembershipsAndSums, k);
if (stopCond == StopCondition.STABLE_CENTERS)
finished = isFinished(centers, newCenters);
else
finished = isFinished(membershipsAndSums, newMembershipsAndSums);
centers = newCenters;
membershipsAndSums = newMembershipsAndSums;
iteration++;
}
if (iteration == cMeansMaxIterations)
throw new ConvergenceException("Fuzzy C-Means algorithm has not converged after " + Integer.toString(iteration) + " iterations");
return new FuzzyCMeansModel(centers, measure);
}
use of org.apache.ignite.ml.math.Vector in project ignite by apache.
the class KMeansDistributedClusterer method initClusterCenters.
/**
* Initialize cluster centers.
*/
private Vector[] initClusterCenters(SparseDistributedMatrix points, int k) {
// Initialize empty centers and point costs.
int ptsCnt = points.rowSize();
String cacheName = ((SparseDistributedMatrixStorage) points.getStorage()).cacheName();
// Initialize the first center to a random point.
Vector sample = localCopyOf(points.viewRow(rnd.nextInt(ptsCnt)));
List<Vector> centers = new ArrayList<>();
List<Vector> newCenters = new ArrayList<>();
newCenters.add(sample);
centers.add(sample);
final ConcurrentHashMap<Integer, Double> costs = new ConcurrentHashMap<>();
// On each step, sample 2 * k points on average with probability proportional
// to their squared distance from the centers. Note that only distances between points
// and new centers are computed in each iteration.
int step = 0;
UUID uid = points.getUUID();
while (step < initSteps) {
// We assume here that costs can fit into memory of one node.
ConcurrentHashMap<Integer, Double> newCosts = getNewCosts(points, newCenters, cacheName);
// Merge costs with new costs.
for (Integer ind : newCosts.keySet()) costs.merge(ind, newCosts.get(ind), Math::min);
double sumCosts = costs.values().stream().mapToDouble(Double::valueOf).sum();
newCenters = getNewCenters(k, costs, uid, sumCosts, cacheName);
centers.addAll(newCenters);
step++;
}
List<Vector> distinctCenters = centers.stream().distinct().collect(Collectors.toList());
if (distinctCenters.size() <= k)
return distinctCenters.toArray(new Vector[] {});
else {
// Finally, we might have a set of more than k distinct candidate centers; weight each
// candidate by the number of points in the dataset mapping to it and run a local k-means++
// on the weighted centers to pick k of them
ConcurrentHashMap<Integer, Integer> centerInd2Weight = weightCenters(uid, distinctCenters, cacheName);
List<Double> weights = new ArrayList<>(centerInd2Weight.size());
for (int i = 0; i < distinctCenters.size(); i++) weights.add(i, Double.valueOf(centerInd2Weight.getOrDefault(i, 0)));
DenseLocalOnHeapMatrix dCenters = MatrixUtil.fromList(distinctCenters, true);
return new KMeansLocalClusterer(getDistanceMeasure(), 30, seed).cluster(dCenters, k, weights).centers();
}
}
use of org.apache.ignite.ml.math.Vector in project ignite by apache.
the class LinearRegressionQRTrainer method train.
/**
* {@inheritDoc}
*/
@Override
public LinearRegressionModel train(Matrix data) {
Vector groundTruth = extractGroundTruth(data);
Matrix inputs = extractInputs(data);
QRDecomposition decomposition = new QRDecomposition(inputs);
QRDSolver solver = new QRDSolver(decomposition.getQ(), decomposition.getR());
Vector variables = solver.solve(groundTruth);
Vector weights = variables.viewPart(1, variables.size() - 1);
double intercept = variables.get(0);
return new LinearRegressionModel(weights, intercept);
}
use of org.apache.ignite.ml.math.Vector in project ignite by apache.
the class Deltas method fit.
/**
* Trains model based on the specified data.
*
* @param datasetBuilder Dataset builder.
* @param featureExtractor Feature extractor.
* @param lbExtractor Label extractor.
* @param cols Number of columns.
* @return Model.
*/
@Override
public SVMLinearBinaryClassificationModel fit(DatasetBuilder<K, V> datasetBuilder, IgniteBiFunction<K, V, double[]> featureExtractor, IgniteBiFunction<K, V, Double> lbExtractor, int cols) {
assert datasetBuilder != null;
PartitionDataBuilder<K, V, SVMPartitionContext, LabeledDataset<Double, LabeledVector>> partDataBuilder = new SVMPartitionDataBuilderOnHeap<>(featureExtractor, lbExtractor, cols);
Vector weights;
try (Dataset<SVMPartitionContext, LabeledDataset<Double, LabeledVector>> dataset = datasetBuilder.build((upstream, upstreamSize) -> new SVMPartitionContext(), partDataBuilder)) {
final int weightVectorSizeWithIntercept = cols + 1;
weights = initializeWeightsWithZeros(weightVectorSizeWithIntercept);
for (int i = 0; i < this.getAmountOfIterations(); i++) {
Vector deltaWeights = calculateUpdates(weights, dataset);
// creates new vector
weights = weights.plus(deltaWeights);
}
} catch (Exception e) {
throw new RuntimeException(e);
}
return new SVMLinearBinaryClassificationModel(weights.viewPart(1, weights.size() - 1), weights.get(0));
}
use of org.apache.ignite.ml.math.Vector in project ignite by apache.
the class KNNClassificationTest method testBinaryClassificationTest.
/**
*/
public void testBinaryClassificationTest() {
IgniteUtils.setCurrentIgniteName(ignite.configuration().getIgniteInstanceName());
double[][] mtx = new double[][] { { 1.0, 1.0 }, { 1.0, 2.0 }, { 2.0, 1.0 }, { -1.0, -1.0 }, { -1.0, -2.0 }, { -2.0, -1.0 } };
double[] lbs = new double[] { 1.0, 1.0, 1.0, 2.0, 2.0, 2.0 };
LabeledDataset training = new LabeledDataset(mtx, lbs);
KNNModel knnMdl = new KNNModel(3, new EuclideanDistance(), KNNStrategy.SIMPLE, training);
Vector firstVector = new DenseLocalOnHeapVector(new double[] { 2.0, 2.0 });
assertEquals(knnMdl.apply(firstVector), 1.0);
Vector secondVector = new DenseLocalOnHeapVector(new double[] { -2.0, -2.0 });
assertEquals(knnMdl.apply(secondVector), 2.0);
}
Aggregations