Search in sources :

Example 11 with SparseDistributedMatrix

use of org.apache.ignite.ml.math.impls.matrix.SparseDistributedMatrix in project ignite by apache.

the class SplitDataGenerator method testByGen.

/**
 */
<D extends ContinuousRegionInfo> void testByGen(int totalPts, IgniteFunction<ColumnDecisionTreeTrainerInput, ? extends ContinuousSplitCalculator<D>> calc, IgniteFunction<ColumnDecisionTreeTrainerInput, IgniteFunction<DoubleStream, Double>> catImpCalc, IgniteFunction<DoubleStream, Double> regCalc, Ignite ignite) {
    List<IgniteBiTuple<Integer, V>> lst = points(totalPts, (i, rn) -> i).collect(Collectors.toList());
    Collections.shuffle(lst, rnd);
    SparseDistributedMatrix m = new SparseDistributedMatrix(totalPts, featCnt + 1, StorageConstants.COLUMN_STORAGE_MODE, StorageConstants.RANDOM_ACCESS_MODE);
    Map<Integer, List<LabeledVectorDouble>> byRegion = new HashMap<>();
    int i = 0;
    for (IgniteBiTuple<Integer, V> bt : lst) {
        byRegion.putIfAbsent(bt.get1(), new LinkedList<>());
        byRegion.get(bt.get1()).add(asLabeledVector(bt.get2().getStorage().data()));
        m.setRow(i, bt.get2().getStorage().data());
        i++;
    }
    ColumnDecisionTreeTrainer<D> trainer = new ColumnDecisionTreeTrainer<>(3, calc, catImpCalc, regCalc, ignite);
    DecisionTreeModel mdl = trainer.train(new MatrixColumnDecisionTreeTrainerInput(m, catFeaturesInfo));
    byRegion.keySet().forEach(k -> mdl.apply(byRegion.get(k).get(0).features()));
}
Also used : IntStream(java.util.stream.IntStream) Arrays(java.util.Arrays) DecisionTreeModel(org.apache.ignite.ml.trees.models.DecisionTreeModel) IgniteFunction(org.apache.ignite.ml.math.functions.IgniteFunction) BiFunction(java.util.function.BiFunction) ColumnDecisionTreeTrainerInput(org.apache.ignite.ml.trees.trainers.columnbased.ColumnDecisionTreeTrainerInput) HashMap(java.util.HashMap) Random(java.util.Random) SparseDistributedMatrix(org.apache.ignite.ml.math.impls.matrix.SparseDistributedMatrix) Function(java.util.function.Function) Supplier(java.util.function.Supplier) Vector(org.apache.ignite.ml.math.Vector) Map(java.util.Map) LinkedList(java.util.LinkedList) DenseLocalOnHeapVector(org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector) MatrixColumnDecisionTreeTrainerInput(org.apache.ignite.ml.trees.trainers.columnbased.MatrixColumnDecisionTreeTrainerInput) LabeledVectorDouble(org.apache.ignite.ml.structures.LabeledVectorDouble) Ignite(org.apache.ignite.Ignite) Collectors(java.util.stream.Collectors) Serializable(java.io.Serializable) DoubleStream(java.util.stream.DoubleStream) IgniteBiTuple(org.apache.ignite.lang.IgniteBiTuple) List(java.util.List) Stream(java.util.stream.Stream) MathIllegalArgumentException(org.apache.ignite.ml.math.exceptions.MathIllegalArgumentException) Utils(org.apache.ignite.ml.util.Utils) ContinuousSplitCalculator(org.apache.ignite.ml.trees.ContinuousSplitCalculator) BitSet(java.util.BitSet) StorageConstants(org.apache.ignite.ml.math.StorageConstants) ContinuousRegionInfo(org.apache.ignite.ml.trees.ContinuousRegionInfo) Collections(java.util.Collections) ColumnDecisionTreeTrainer(org.apache.ignite.ml.trees.trainers.columnbased.ColumnDecisionTreeTrainer) SparseDistributedMatrix(org.apache.ignite.ml.math.impls.matrix.SparseDistributedMatrix) IgniteBiTuple(org.apache.ignite.lang.IgniteBiTuple) HashMap(java.util.HashMap) MatrixColumnDecisionTreeTrainerInput(org.apache.ignite.ml.trees.trainers.columnbased.MatrixColumnDecisionTreeTrainerInput) DecisionTreeModel(org.apache.ignite.ml.trees.models.DecisionTreeModel) LinkedList(java.util.LinkedList) List(java.util.List) ColumnDecisionTreeTrainer(org.apache.ignite.ml.trees.trainers.columnbased.ColumnDecisionTreeTrainer)

Example 12 with SparseDistributedMatrix

use of org.apache.ignite.ml.math.impls.matrix.SparseDistributedMatrix in project ignite by apache.

the class KMeansDistributedClusterer method cluster.

/**
 */
@Override
public KMeansModel cluster(SparseDistributedMatrix points, int k) throws MathIllegalArgumentException, ConvergenceException {
    SparseDistributedMatrix pointsCp = (SparseDistributedMatrix) points.like(points.rowSize(), points.columnSize());
    String cacheName = ((SparseDistributedMatrixStorage) points.getStorage()).cacheName();
    // TODO: IGNITE-5825, this copy is very ineffective, just for POC. Immutability of data should be guaranteed by other methods
    // such as logical locks for example.
    pointsCp.assign(points);
    Vector[] centers = initClusterCenters(pointsCp, k);
    boolean converged = false;
    int iteration = 0;
    int dim = pointsCp.viewRow(0).size();
    UUID uid = pointsCp.getUUID();
    // Execute iterations of Lloyd's algorithm until converged
    while (iteration < maxIterations && !converged) {
        SumsAndCounts stats = getSumsAndCounts(centers, dim, uid, cacheName);
        converged = true;
        for (Integer ind : stats.sums.keySet()) {
            Vector massCenter = stats.sums.get(ind).times(1.0 / stats.counts.get(ind));
            if (converged && distance(massCenter, centers[ind]) > epsilon * epsilon)
                converged = false;
            centers[ind] = massCenter;
        }
        iteration++;
    }
    pointsCp.destroy();
    return new KMeansModel(centers, getDistanceMeasure());
}
Also used : SparseDistributedMatrix(org.apache.ignite.ml.math.impls.matrix.SparseDistributedMatrix) SparseDistributedMatrixStorage(org.apache.ignite.ml.math.impls.storage.matrix.SparseDistributedMatrixStorage) UUID(java.util.UUID) Vector(org.apache.ignite.ml.math.Vector)

Example 13 with SparseDistributedMatrix

use of org.apache.ignite.ml.math.impls.matrix.SparseDistributedMatrix in project ignite by apache.

the class GradientDescent method getLossGradientFunction.

/**
 * Makes carrying of the gradient function and fixes data matrix.
 */
private IgniteFunction<Vector, Vector> getLossGradientFunction(Matrix data) {
    if (data instanceof SparseDistributedMatrix) {
        SparseDistributedMatrix distributedMatrix = (SparseDistributedMatrix) data;
        if (distributedMatrix.getStorage().storageMode() == StorageConstants.ROW_STORAGE_MODE)
            return weights -> calculateDistributedGradient(distributedMatrix, weights);
    }
    Matrix inputs = extractInputs(data);
    Vector groundTruth = extractGroundTruth(data);
    return weights -> lossGradient.compute(inputs, groundTruth, weights);
}
Also used : FunctionVector(org.apache.ignite.ml.math.impls.vector.FunctionVector) SparseDistributedMatrixMapReducer(org.apache.ignite.ml.optimization.util.SparseDistributedMatrixMapReducer) Vector(org.apache.ignite.ml.math.Vector) IgniteFunction(org.apache.ignite.ml.math.functions.IgniteFunction) Matrix(org.apache.ignite.ml.math.Matrix) SparseDistributedMatrix(org.apache.ignite.ml.math.impls.matrix.SparseDistributedMatrix) StorageConstants(org.apache.ignite.ml.math.StorageConstants) DenseLocalOnHeapVector(org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector) SparseDistributedMatrix(org.apache.ignite.ml.math.impls.matrix.SparseDistributedMatrix) Matrix(org.apache.ignite.ml.math.Matrix) SparseDistributedMatrix(org.apache.ignite.ml.math.impls.matrix.SparseDistributedMatrix) FunctionVector(org.apache.ignite.ml.math.impls.vector.FunctionVector) Vector(org.apache.ignite.ml.math.Vector) DenseLocalOnHeapVector(org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector)

Example 14 with SparseDistributedMatrix

use of org.apache.ignite.ml.math.impls.matrix.SparseDistributedMatrix in project ignite by apache.

the class SparseDistributedMatrixMapReducerTest method testMapReduce.

/**
 * Tests that matrix 100x100 filled by "1.0" and distributed across nodes successfully processed (calculate sum of
 * all elements) via {@link SparseDistributedMatrixMapReducer}.
 */
public void testMapReduce() {
    IgniteUtils.setCurrentIgniteName(ignite.configuration().getIgniteInstanceName());
    SparseDistributedMatrix distributedMatrix = new SparseDistributedMatrix(100, 100);
    for (int i = 0; i < 100; i++) for (int j = 0; j < 100; j++) distributedMatrix.set(i, j, 1);
    SparseDistributedMatrixMapReducer mapReducer = new SparseDistributedMatrixMapReducer(distributedMatrix);
    double total = mapReducer.mapReduce((matrix, args) -> {
        double partialSum = 0.0;
        for (int i = 0; i < matrix.rowSize(); i++) for (int j = 0; j < matrix.columnSize(); j++) partialSum += matrix.get(i, j);
        return partialSum;
    }, sums -> {
        double totalSum = 0;
        for (Double partialSum : sums) if (partialSum != null)
            totalSum += partialSum;
        return totalSum;
    }, 0.0);
    assertEquals(100.0 * 100.0, total, 1e-18);
}
Also used : SparseDistributedMatrix(org.apache.ignite.ml.math.impls.matrix.SparseDistributedMatrix)

Example 15 with SparseDistributedMatrix

use of org.apache.ignite.ml.math.impls.matrix.SparseDistributedMatrix in project ignite by apache.

the class SparseDistributedMatrixMapReducerTest method testMapReduceWithOneEmptyNode.

/**
 * Tests that matrix 1x100 filled by "1.0" and distributed across nodes successfully processed (calculate sum of
 * all elements) via {@link SparseDistributedMatrixMapReducer} even when not all nodes contains data.
 */
public void testMapReduceWithOneEmptyNode() {
    IgniteUtils.setCurrentIgniteName(ignite.configuration().getIgniteInstanceName());
    SparseDistributedMatrix distributedMatrix = new SparseDistributedMatrix(1, 100);
    for (int j = 0; j < 100; j++) distributedMatrix.set(0, j, 1);
    SparseDistributedMatrixMapReducer mapReducer = new SparseDistributedMatrixMapReducer(distributedMatrix);
    double total = mapReducer.mapReduce((matrix, args) -> {
        double partialSum = 0.0;
        for (int i = 0; i < matrix.rowSize(); i++) for (int j = 0; j < matrix.columnSize(); j++) partialSum += matrix.get(i, j);
        return partialSum;
    }, sums -> {
        double totalSum = 0;
        for (Double partialSum : sums) if (partialSum != null)
            totalSum += partialSum;
        return totalSum;
    }, 0.0);
    assertEquals(100.0, total, 1e-18);
}
Also used : SparseDistributedMatrix(org.apache.ignite.ml.math.impls.matrix.SparseDistributedMatrix)

Aggregations

SparseDistributedMatrix (org.apache.ignite.ml.math.impls.matrix.SparseDistributedMatrix)29 Vector (org.apache.ignite.ml.math.Vector)18 DenseLocalOnHeapVector (org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector)14 Random (java.util.Random)11 EuclideanDistance (org.apache.ignite.ml.math.distances.EuclideanDistance)9 Ignite (org.apache.ignite.Ignite)8 IgniteThread (org.apache.ignite.thread.IgniteThread)8 HashMap (java.util.HashMap)7 List (java.util.List)7 Map (java.util.Map)7 Collectors (java.util.stream.Collectors)7 StorageConstants (org.apache.ignite.ml.math.StorageConstants)7 UUID (java.util.UUID)6 DistanceMeasure (org.apache.ignite.ml.math.distances.DistanceMeasure)6 IgniteFunction (org.apache.ignite.ml.math.functions.IgniteFunction)6 SparseDistributedMatrixStorage (org.apache.ignite.ml.math.impls.storage.matrix.SparseDistributedMatrixStorage)6 Collections (java.util.Collections)5 LinkedList (java.util.LinkedList)5 DoubleStream (java.util.stream.DoubleStream)5 IgniteUtils (org.apache.ignite.internal.util.IgniteUtils)5