Search in sources :

Example 66 with Vector

use of org.apache.ignite.ml.math.Vector in project ignite by apache.

the class FuzzyCMeansDistributedClusterer method cluster.

/**
 * {@inheritDoc}
 */
@Override
public FuzzyCMeansModel cluster(SparseDistributedMatrix points, int k) throws MathIllegalArgumentException, ConvergenceException {
    GridArgumentCheck.notNull(points, "points");
    if (k < 2)
        throw new MathIllegalArgumentException("The number of clusters is less than 2");
    Vector[] centers = initializeCenters(points, k);
    MembershipsAndSums membershipsAndSums = null;
    int iteration = 0;
    boolean finished = false;
    while (!finished && iteration < cMeansMaxIterations) {
        MembershipsAndSums newMembershipsAndSums = calculateMembership(points, centers);
        Vector[] newCenters = calculateNewCenters(points, newMembershipsAndSums, k);
        if (stopCond == StopCondition.STABLE_CENTERS)
            finished = isFinished(centers, newCenters);
        else
            finished = isFinished(membershipsAndSums, newMembershipsAndSums);
        centers = newCenters;
        membershipsAndSums = newMembershipsAndSums;
        iteration++;
    }
    if (iteration == cMeansMaxIterations)
        throw new ConvergenceException("Fuzzy C-Means algorithm has not converged after " + Integer.toString(iteration) + " iterations");
    return new FuzzyCMeansModel(centers, measure);
}
Also used : MathIllegalArgumentException(org.apache.ignite.ml.math.exceptions.MathIllegalArgumentException) ConvergenceException(org.apache.ignite.ml.math.exceptions.ConvergenceException) Vector(org.apache.ignite.ml.math.Vector) DenseLocalOnHeapVector(org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector)

Example 67 with Vector

use of org.apache.ignite.ml.math.Vector in project ignite by apache.

the class KMeansDistributedClusterer method initClusterCenters.

/**
 * Initialize cluster centers.
 */
private Vector[] initClusterCenters(SparseDistributedMatrix points, int k) {
    // Initialize empty centers and point costs.
    int ptsCnt = points.rowSize();
    String cacheName = ((SparseDistributedMatrixStorage) points.getStorage()).cacheName();
    // Initialize the first center to a random point.
    Vector sample = localCopyOf(points.viewRow(rnd.nextInt(ptsCnt)));
    List<Vector> centers = new ArrayList<>();
    List<Vector> newCenters = new ArrayList<>();
    newCenters.add(sample);
    centers.add(sample);
    final ConcurrentHashMap<Integer, Double> costs = new ConcurrentHashMap<>();
    // On each step, sample 2 * k points on average with probability proportional
    // to their squared distance from the centers. Note that only distances between points
    // and new centers are computed in each iteration.
    int step = 0;
    UUID uid = points.getUUID();
    while (step < initSteps) {
        // We assume here that costs can fit into memory of one node.
        ConcurrentHashMap<Integer, Double> newCosts = getNewCosts(points, newCenters, cacheName);
        // Merge costs with new costs.
        for (Integer ind : newCosts.keySet()) costs.merge(ind, newCosts.get(ind), Math::min);
        double sumCosts = costs.values().stream().mapToDouble(Double::valueOf).sum();
        newCenters = getNewCenters(k, costs, uid, sumCosts, cacheName);
        centers.addAll(newCenters);
        step++;
    }
    List<Vector> distinctCenters = centers.stream().distinct().collect(Collectors.toList());
    if (distinctCenters.size() <= k)
        return distinctCenters.toArray(new Vector[] {});
    else {
        // Finally, we might have a set of more than k distinct candidate centers; weight each
        // candidate by the number of points in the dataset mapping to it and run a local k-means++
        // on the weighted centers to pick k of them
        ConcurrentHashMap<Integer, Integer> centerInd2Weight = weightCenters(uid, distinctCenters, cacheName);
        List<Double> weights = new ArrayList<>(centerInd2Weight.size());
        for (int i = 0; i < distinctCenters.size(); i++) weights.add(i, Double.valueOf(centerInd2Weight.getOrDefault(i, 0)));
        DenseLocalOnHeapMatrix dCenters = MatrixUtil.fromList(distinctCenters, true);
        return new KMeansLocalClusterer(getDistanceMeasure(), 30, seed).cluster(dCenters, k, weights).centers();
    }
}
Also used : SparseDistributedMatrixStorage(org.apache.ignite.ml.math.impls.storage.matrix.SparseDistributedMatrixStorage) ArrayList(java.util.ArrayList) DenseLocalOnHeapMatrix(org.apache.ignite.ml.math.impls.matrix.DenseLocalOnHeapMatrix) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) UUID(java.util.UUID) Vector(org.apache.ignite.ml.math.Vector)

Example 68 with Vector

use of org.apache.ignite.ml.math.Vector in project ignite by apache.

the class LinearRegressionQRTrainer method train.

/**
 * {@inheritDoc}
 */
@Override
public LinearRegressionModel train(Matrix data) {
    Vector groundTruth = extractGroundTruth(data);
    Matrix inputs = extractInputs(data);
    QRDecomposition decomposition = new QRDecomposition(inputs);
    QRDSolver solver = new QRDSolver(decomposition.getQ(), decomposition.getR());
    Vector variables = solver.solve(groundTruth);
    Vector weights = variables.viewPart(1, variables.size() - 1);
    double intercept = variables.get(0);
    return new LinearRegressionModel(weights, intercept);
}
Also used : QRDecomposition(org.apache.ignite.ml.math.decompositions.QRDecomposition) Matrix(org.apache.ignite.ml.math.Matrix) QRDSolver(org.apache.ignite.ml.math.decompositions.QRDSolver) FunctionVector(org.apache.ignite.ml.math.impls.vector.FunctionVector) Vector(org.apache.ignite.ml.math.Vector)

Example 69 with Vector

use of org.apache.ignite.ml.math.Vector in project ignite by apache.

the class Deltas method fit.

/**
 * Trains model based on the specified data.
 *
 * @param datasetBuilder   Dataset builder.
 * @param featureExtractor Feature extractor.
 * @param lbExtractor      Label extractor.
 * @param cols             Number of columns.
 * @return Model.
 */
@Override
public SVMLinearBinaryClassificationModel fit(DatasetBuilder<K, V> datasetBuilder, IgniteBiFunction<K, V, double[]> featureExtractor, IgniteBiFunction<K, V, Double> lbExtractor, int cols) {
    assert datasetBuilder != null;
    PartitionDataBuilder<K, V, SVMPartitionContext, LabeledDataset<Double, LabeledVector>> partDataBuilder = new SVMPartitionDataBuilderOnHeap<>(featureExtractor, lbExtractor, cols);
    Vector weights;
    try (Dataset<SVMPartitionContext, LabeledDataset<Double, LabeledVector>> dataset = datasetBuilder.build((upstream, upstreamSize) -> new SVMPartitionContext(), partDataBuilder)) {
        final int weightVectorSizeWithIntercept = cols + 1;
        weights = initializeWeightsWithZeros(weightVectorSizeWithIntercept);
        for (int i = 0; i < this.getAmountOfIterations(); i++) {
            Vector deltaWeights = calculateUpdates(weights, dataset);
            // creates new vector
            weights = weights.plus(deltaWeights);
        }
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
    return new SVMLinearBinaryClassificationModel(weights.viewPart(1, weights.size() - 1), weights.get(0));
}
Also used : LabeledDataset(org.apache.ignite.ml.structures.LabeledDataset) LabeledVector(org.apache.ignite.ml.structures.LabeledVector) Vector(org.apache.ignite.ml.math.Vector) DenseLocalOnHeapVector(org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector)

Example 70 with Vector

use of org.apache.ignite.ml.math.Vector in project ignite by apache.

the class KNNClassificationTest method testBinaryClassificationTest.

/**
 */
public void testBinaryClassificationTest() {
    IgniteUtils.setCurrentIgniteName(ignite.configuration().getIgniteInstanceName());
    double[][] mtx = new double[][] { { 1.0, 1.0 }, { 1.0, 2.0 }, { 2.0, 1.0 }, { -1.0, -1.0 }, { -1.0, -2.0 }, { -2.0, -1.0 } };
    double[] lbs = new double[] { 1.0, 1.0, 1.0, 2.0, 2.0, 2.0 };
    LabeledDataset training = new LabeledDataset(mtx, lbs);
    KNNModel knnMdl = new KNNModel(3, new EuclideanDistance(), KNNStrategy.SIMPLE, training);
    Vector firstVector = new DenseLocalOnHeapVector(new double[] { 2.0, 2.0 });
    assertEquals(knnMdl.apply(firstVector), 1.0);
    Vector secondVector = new DenseLocalOnHeapVector(new double[] { -2.0, -2.0 });
    assertEquals(knnMdl.apply(secondVector), 2.0);
}
Also used : EuclideanDistance(org.apache.ignite.ml.math.distances.EuclideanDistance) KNNModel(org.apache.ignite.ml.knn.models.KNNModel) DenseLocalOnHeapVector(org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector) LabeledDataset(org.apache.ignite.ml.structures.LabeledDataset) Vector(org.apache.ignite.ml.math.Vector) DenseLocalOnHeapVector(org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector)

Aggregations

Vector (org.apache.ignite.ml.math.Vector)116 DenseLocalOnHeapVector (org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector)62 Test (org.junit.Test)29 EuclideanDistance (org.apache.ignite.ml.math.distances.EuclideanDistance)20 DenseLocalOnHeapMatrix (org.apache.ignite.ml.math.impls.matrix.DenseLocalOnHeapMatrix)20 Matrix (org.apache.ignite.ml.math.Matrix)19 SparseDistributedMatrix (org.apache.ignite.ml.math.impls.matrix.SparseDistributedMatrix)17 Random (java.util.Random)12 ArrayList (java.util.ArrayList)11 DistanceMeasure (org.apache.ignite.ml.math.distances.DistanceMeasure)10 Arrays (java.util.Arrays)9 Ignite (org.apache.ignite.Ignite)9 SparseDistributedMatrixStorage (org.apache.ignite.ml.math.impls.storage.matrix.SparseDistributedMatrixStorage)9 LabeledDataset (org.apache.ignite.ml.structures.LabeledDataset)9 UUID (java.util.UUID)8 Collections (java.util.Collections)7 List (java.util.List)7 MathIllegalArgumentException (org.apache.ignite.ml.math.exceptions.MathIllegalArgumentException)7 DenseLocalOffHeapVector (org.apache.ignite.ml.math.impls.vector.DenseLocalOffHeapVector)7 HashMap (java.util.HashMap)6