Search in sources :

Example 6 with SparseDistributedMatrixStorage

use of org.apache.ignite.ml.math.impls.storage.matrix.SparseDistributedMatrixStorage in project ignite by apache.

the class FuzzyCMeansDistributedClusterer method calculateNewCenters.

/**
 * Calculate new centers according to membership matrix.
 *
 * @param points Matrix with source points.
 * @param membershipsAndSums Membership matrix and sums of membership coefficient for each center.
 * @param k The number of centers.
 * @return Array of new centers.
 */
private Vector[] calculateNewCenters(SparseDistributedMatrix points, MembershipsAndSums membershipsAndSums, int k) {
    String cacheName = ((SparseDistributedMatrixStorage) points.getStorage()).cacheName();
    UUID uuid = points.getUUID();
    CentersArraySupplier supplier = new CentersArraySupplier(k, points.columnSize());
    Vector[] centers = CacheUtils.distributedFold(cacheName, (IgniteBiFunction<Cache.Entry<SparseMatrixKey, ConcurrentHashMap<Integer, Double>>, Vector[], Vector[]>) (vectorWithIndex, centerSums) -> {
        Integer idx = vectorWithIndex.getKey().index();
        Vector pnt = MatrixUtil.localCopyOf(VectorUtils.fromMap(vectorWithIndex.getValue(), false));
        Vector pntMemberships = membershipsAndSums.memberships.get(idx);
        for (int i = 0; i < k; i++) {
            Vector weightedPnt = pnt.times(pntMemberships.getX(i));
            centerSums[i] = centerSums[i].plus(weightedPnt);
        }
        return centerSums;
    }, key -> key.dataStructureId().equals(uuid), (sums1, sums2) -> {
        for (int i = 0; i < k; i++) sums1[i] = sums1[i].plus(sums2[i]);
        return sums1;
    }, supplier);
    for (int i = 0; i < k; i++) centers[i] = centers[i].divide(membershipsAndSums.membershipSums.getX(i));
    return centers;
}
Also used : IgniteSupplier(org.apache.ignite.ml.math.functions.IgniteSupplier) GridArgumentCheck(org.apache.ignite.internal.util.GridArgumentCheck) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) Functions(org.apache.ignite.ml.math.functions.Functions) Random(java.util.Random) DistanceMeasure(org.apache.ignite.ml.math.distances.DistanceMeasure) UUID(java.util.UUID) SparseDistributedMatrix(org.apache.ignite.ml.math.impls.matrix.SparseDistributedMatrix) Collectors(java.util.stream.Collectors) CacheUtils(org.apache.ignite.ml.math.distributed.CacheUtils) MatrixUtil(org.apache.ignite.ml.math.util.MatrixUtil) SparseDistributedMatrixStorage(org.apache.ignite.ml.math.impls.storage.matrix.SparseDistributedMatrixStorage) ArrayList(java.util.ArrayList) ConvergenceException(org.apache.ignite.ml.math.exceptions.ConvergenceException) List(java.util.List) SparseMatrixKey(org.apache.ignite.ml.math.distributed.keys.impl.SparseMatrixKey) Vector(org.apache.ignite.ml.math.Vector) MathIllegalArgumentException(org.apache.ignite.ml.math.exceptions.MathIllegalArgumentException) IgniteBiFunction(org.apache.ignite.ml.math.functions.IgniteBiFunction) DenseLocalOnHeapMatrix(org.apache.ignite.ml.math.impls.matrix.DenseLocalOnHeapMatrix) Map(java.util.Map) Cache(javax.cache.Cache) VectorUtils(org.apache.ignite.ml.math.VectorUtils) DenseLocalOnHeapVector(org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector) SparseDistributedMatrixStorage(org.apache.ignite.ml.math.impls.storage.matrix.SparseDistributedMatrixStorage) UUID(java.util.UUID) Vector(org.apache.ignite.ml.math.Vector) DenseLocalOnHeapVector(org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector)

Example 7 with SparseDistributedMatrixStorage

use of org.apache.ignite.ml.math.impls.storage.matrix.SparseDistributedMatrixStorage in project ignite by apache.

the class FuzzyCMeansDistributedClusterer method calculateMembership.

/**
 * Calculate matrix of membership coefficients for each point and each center.
 *
 * @param points Matrix with source points.
 * @param centers Array of current centers.
 * @return Membership matrix and sums of membership coefficients for each center.
 */
private MembershipsAndSums calculateMembership(SparseDistributedMatrix points, Vector[] centers) {
    String cacheName = ((SparseDistributedMatrixStorage) points.getStorage()).cacheName();
    UUID uuid = points.getUUID();
    double fuzzyMembershipCoefficient = 2 / (exponentialWeight - 1);
    MembershipsAndSumsSupplier supplier = new MembershipsAndSumsSupplier(centers.length);
    return CacheUtils.distributedFold(cacheName, (IgniteBiFunction<Cache.Entry<SparseMatrixKey, ConcurrentHashMap<Integer, Double>>, MembershipsAndSums, MembershipsAndSums>) (vectorWithIndex, membershipsAndSums) -> {
        Integer idx = vectorWithIndex.getKey().index();
        Vector pnt = VectorUtils.fromMap(vectorWithIndex.getValue(), false);
        Vector distances = new DenseLocalOnHeapVector(centers.length);
        Vector pntMemberships = new DenseLocalOnHeapVector(centers.length);
        for (int i = 0; i < centers.length; i++) distances.setX(i, distance(centers[i], pnt));
        for (int i = 0; i < centers.length; i++) {
            double invertedFuzzyWeight = 0.0;
            for (int j = 0; j < centers.length; j++) {
                double val = Math.pow(distances.getX(i) / distances.getX(j), fuzzyMembershipCoefficient);
                if (Double.isNaN(val))
                    val = 1.0;
                invertedFuzzyWeight += val;
            }
            double membership = Math.pow(1.0 / invertedFuzzyWeight, exponentialWeight);
            pntMemberships.setX(i, membership);
        }
        membershipsAndSums.memberships.put(idx, pntMemberships);
        membershipsAndSums.membershipSums = membershipsAndSums.membershipSums.plus(pntMemberships);
        return membershipsAndSums;
    }, key -> key.dataStructureId().equals(uuid), (mem1, mem2) -> {
        mem1.merge(mem2);
        return mem1;
    }, supplier);
}
Also used : IgniteSupplier(org.apache.ignite.ml.math.functions.IgniteSupplier) GridArgumentCheck(org.apache.ignite.internal.util.GridArgumentCheck) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) Functions(org.apache.ignite.ml.math.functions.Functions) Random(java.util.Random) DistanceMeasure(org.apache.ignite.ml.math.distances.DistanceMeasure) UUID(java.util.UUID) SparseDistributedMatrix(org.apache.ignite.ml.math.impls.matrix.SparseDistributedMatrix) Collectors(java.util.stream.Collectors) CacheUtils(org.apache.ignite.ml.math.distributed.CacheUtils) MatrixUtil(org.apache.ignite.ml.math.util.MatrixUtil) SparseDistributedMatrixStorage(org.apache.ignite.ml.math.impls.storage.matrix.SparseDistributedMatrixStorage) ArrayList(java.util.ArrayList) ConvergenceException(org.apache.ignite.ml.math.exceptions.ConvergenceException) List(java.util.List) SparseMatrixKey(org.apache.ignite.ml.math.distributed.keys.impl.SparseMatrixKey) Vector(org.apache.ignite.ml.math.Vector) MathIllegalArgumentException(org.apache.ignite.ml.math.exceptions.MathIllegalArgumentException) IgniteBiFunction(org.apache.ignite.ml.math.functions.IgniteBiFunction) DenseLocalOnHeapMatrix(org.apache.ignite.ml.math.impls.matrix.DenseLocalOnHeapMatrix) Map(java.util.Map) Cache(javax.cache.Cache) VectorUtils(org.apache.ignite.ml.math.VectorUtils) DenseLocalOnHeapVector(org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector) SparseDistributedMatrixStorage(org.apache.ignite.ml.math.impls.storage.matrix.SparseDistributedMatrixStorage) DenseLocalOnHeapVector(org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector) UUID(java.util.UUID) Vector(org.apache.ignite.ml.math.Vector) DenseLocalOnHeapVector(org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector)

Example 8 with SparseDistributedMatrixStorage

use of org.apache.ignite.ml.math.impls.storage.matrix.SparseDistributedMatrixStorage in project ignite by apache.

the class KMeansDistributedClusterer method initClusterCenters.

/**
 * Initialize cluster centers.
 */
private Vector[] initClusterCenters(SparseDistributedMatrix points, int k) {
    // Initialize empty centers and point costs.
    int ptsCnt = points.rowSize();
    String cacheName = ((SparseDistributedMatrixStorage) points.getStorage()).cacheName();
    // Initialize the first center to a random point.
    Vector sample = localCopyOf(points.viewRow(rnd.nextInt(ptsCnt)));
    List<Vector> centers = new ArrayList<>();
    List<Vector> newCenters = new ArrayList<>();
    newCenters.add(sample);
    centers.add(sample);
    final ConcurrentHashMap<Integer, Double> costs = new ConcurrentHashMap<>();
    // On each step, sample 2 * k points on average with probability proportional
    // to their squared distance from the centers. Note that only distances between points
    // and new centers are computed in each iteration.
    int step = 0;
    UUID uid = points.getUUID();
    while (step < initSteps) {
        // We assume here that costs can fit into memory of one node.
        ConcurrentHashMap<Integer, Double> newCosts = getNewCosts(points, newCenters, cacheName);
        // Merge costs with new costs.
        for (Integer ind : newCosts.keySet()) costs.merge(ind, newCosts.get(ind), Math::min);
        double sumCosts = costs.values().stream().mapToDouble(Double::valueOf).sum();
        newCenters = getNewCenters(k, costs, uid, sumCosts, cacheName);
        centers.addAll(newCenters);
        step++;
    }
    List<Vector> distinctCenters = centers.stream().distinct().collect(Collectors.toList());
    if (distinctCenters.size() <= k)
        return distinctCenters.toArray(new Vector[] {});
    else {
        // Finally, we might have a set of more than k distinct candidate centers; weight each
        // candidate by the number of points in the dataset mapping to it and run a local k-means++
        // on the weighted centers to pick k of them
        ConcurrentHashMap<Integer, Integer> centerInd2Weight = weightCenters(uid, distinctCenters, cacheName);
        List<Double> weights = new ArrayList<>(centerInd2Weight.size());
        for (int i = 0; i < distinctCenters.size(); i++) weights.add(i, Double.valueOf(centerInd2Weight.getOrDefault(i, 0)));
        DenseLocalOnHeapMatrix dCenters = MatrixUtil.fromList(distinctCenters, true);
        return new KMeansLocalClusterer(getDistanceMeasure(), 30, seed).cluster(dCenters, k, weights).centers();
    }
}
Also used : SparseDistributedMatrixStorage(org.apache.ignite.ml.math.impls.storage.matrix.SparseDistributedMatrixStorage) ArrayList(java.util.ArrayList) DenseLocalOnHeapMatrix(org.apache.ignite.ml.math.impls.matrix.DenseLocalOnHeapMatrix) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) UUID(java.util.UUID) Vector(org.apache.ignite.ml.math.Vector)

Example 9 with SparseDistributedMatrixStorage

use of org.apache.ignite.ml.math.impls.storage.matrix.SparseDistributedMatrixStorage in project ignite by apache.

the class ColumnDecisionTreeTrainerBenchmark method tstF1.

/**
 * Test decision tree regression.
 * To run this test rename this method so it starts from 'test'.
 */
public void tstF1() {
    IgniteUtils.setCurrentIgniteName(ignite.configuration().getIgniteInstanceName());
    int ptsCnt = 10000;
    Map<Integer, double[]> ranges = new HashMap<>();
    ranges.put(0, new double[] { -100.0, 100.0 });
    ranges.put(1, new double[] { -100.0, 100.0 });
    ranges.put(2, new double[] { -100.0, 100.0 });
    int featCnt = 100;
    double[] defRng = { -1.0, 1.0 };
    Vector[] trainVectors = vecsFromRanges(ranges, featCnt, defRng, new Random(123L), ptsCnt, f1);
    SparseDistributedMatrix m = new SparseDistributedMatrix(ptsCnt, featCnt + 1, StorageConstants.COLUMN_STORAGE_MODE, StorageConstants.RANDOM_ACCESS_MODE);
    SparseDistributedMatrixStorage sto = (SparseDistributedMatrixStorage) m.getStorage();
    loadVectorsIntoSparseDistributedMatrixCache(sto.cache().getName(), sto.getUUID(), Arrays.stream(trainVectors).iterator(), featCnt + 1);
    IgniteFunction<DoubleStream, Double> regCalc = s -> s.average().orElse(0.0);
    ColumnDecisionTreeTrainer<VarianceSplitCalculator.VarianceData> trainer = new ColumnDecisionTreeTrainer<>(10, ContinuousSplitCalculators.VARIANCE, RegionCalculators.VARIANCE, regCalc, ignite);
    X.println("Training started.");
    long before = System.currentTimeMillis();
    DecisionTreeModel mdl = trainer.train(new MatrixColumnDecisionTreeTrainerInput(m, new HashMap<>()));
    X.println("Training finished in: " + (System.currentTimeMillis() - before) + " ms.");
    Vector[] testVectors = vecsFromRanges(ranges, featCnt, defRng, new Random(123L), 20, f1);
    IgniteTriFunction<Model<Vector, Double>, Stream<IgniteBiTuple<Vector, Double>>, Function<Double, Double>, Double> mse = Estimators.MSE();
    Double accuracy = mse.apply(mdl, Arrays.stream(testVectors).map(v -> new IgniteBiTuple<>(v.viewPart(0, featCnt), v.getX(featCnt))), Function.identity());
    X.println("MSE: " + accuracy);
}
Also used : CacheAtomicityMode(org.apache.ignite.cache.CacheAtomicityMode) Arrays(java.util.Arrays) FeaturesCache(org.apache.ignite.ml.trees.trainers.columnbased.caches.FeaturesCache) IgniteTestResources(org.apache.ignite.testframework.junits.IgniteTestResources) Random(java.util.Random) BiIndex(org.apache.ignite.ml.trees.trainers.columnbased.BiIndex) SparseDistributedMatrix(org.apache.ignite.ml.math.impls.matrix.SparseDistributedMatrix) SparseDistributedMatrixStorage(org.apache.ignite.ml.math.impls.storage.matrix.SparseDistributedMatrixStorage) VarianceSplitCalculator(org.apache.ignite.ml.trees.trainers.columnbased.contsplitcalcs.VarianceSplitCalculator) Vector(org.apache.ignite.ml.math.Vector) Estimators(org.apache.ignite.ml.estimators.Estimators) Map(java.util.Map) X(org.apache.ignite.internal.util.typedef.X) Level(org.apache.log4j.Level) DenseLocalOnHeapVector(org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector) MatrixColumnDecisionTreeTrainerInput(org.apache.ignite.ml.trees.trainers.columnbased.MatrixColumnDecisionTreeTrainerInput) LabeledVectorDouble(org.apache.ignite.ml.structures.LabeledVectorDouble) BaseDecisionTreeTest(org.apache.ignite.ml.trees.BaseDecisionTreeTest) IgniteTriFunction(org.apache.ignite.ml.math.functions.IgniteTriFunction) ProjectionsCache(org.apache.ignite.ml.trees.trainers.columnbased.caches.ProjectionsCache) UUID(java.util.UUID) StreamTransformer(org.apache.ignite.stream.StreamTransformer) Collectors(java.util.stream.Collectors) IgniteCache(org.apache.ignite.IgniteCache) ContextCache(org.apache.ignite.ml.trees.trainers.columnbased.caches.ContextCache) DoubleStream(java.util.stream.DoubleStream) IgniteBiTuple(org.apache.ignite.lang.IgniteBiTuple) List(java.util.List) IgniteConfiguration(org.apache.ignite.configuration.IgniteConfiguration) Stream(java.util.stream.Stream) SparseMatrixKey(org.apache.ignite.ml.math.distributed.keys.impl.SparseMatrixKey) SplitCache(org.apache.ignite.ml.trees.trainers.columnbased.caches.SplitCache) RegionCalculators(org.apache.ignite.ml.trees.trainers.columnbased.regcalcs.RegionCalculators) IntStream(java.util.stream.IntStream) DecisionTreeModel(org.apache.ignite.ml.trees.models.DecisionTreeModel) IgniteFunction(org.apache.ignite.ml.math.functions.IgniteFunction) Model(org.apache.ignite.ml.Model) HashMap(java.util.HashMap) Function(java.util.function.Function) GiniSplitCalculator(org.apache.ignite.ml.trees.trainers.columnbased.contsplitcalcs.GiniSplitCalculator) BiIndexedCacheColumnDecisionTreeTrainerInput(org.apache.ignite.ml.trees.trainers.columnbased.BiIndexedCacheColumnDecisionTreeTrainerInput) CacheWriteSynchronizationMode(org.apache.ignite.cache.CacheWriteSynchronizationMode) IgniteUtils(org.apache.ignite.internal.util.IgniteUtils) MnistUtils(org.apache.ignite.ml.util.MnistUtils) LinkedList(java.util.LinkedList) Properties(java.util.Properties) Iterator(java.util.Iterator) ContinuousSplitCalculators(org.apache.ignite.ml.trees.trainers.columnbased.contsplitcalcs.ContinuousSplitCalculators) IOException(java.io.IOException) SplitDataGenerator(org.apache.ignite.ml.trees.SplitDataGenerator) Int2DoubleOpenHashMap(it.unimi.dsi.fastutil.ints.Int2DoubleOpenHashMap) Ignition(org.apache.ignite.Ignition) CacheConfiguration(org.apache.ignite.configuration.CacheConfiguration) IgniteDataStreamer(org.apache.ignite.IgniteDataStreamer) Tracer(org.apache.ignite.ml.math.Tracer) StorageConstants(org.apache.ignite.ml.math.StorageConstants) Assert(org.junit.Assert) Collections(java.util.Collections) ColumnDecisionTreeTrainer(org.apache.ignite.ml.trees.trainers.columnbased.ColumnDecisionTreeTrainer) GridCacheProcessor(org.apache.ignite.internal.processors.cache.GridCacheProcessor) InputStream(java.io.InputStream) CacheMode(org.apache.ignite.cache.CacheMode) HashMap(java.util.HashMap) Int2DoubleOpenHashMap(it.unimi.dsi.fastutil.ints.Int2DoubleOpenHashMap) MatrixColumnDecisionTreeTrainerInput(org.apache.ignite.ml.trees.trainers.columnbased.MatrixColumnDecisionTreeTrainerInput) IgniteBiTuple(org.apache.ignite.lang.IgniteBiTuple) DecisionTreeModel(org.apache.ignite.ml.trees.models.DecisionTreeModel) IgniteTriFunction(org.apache.ignite.ml.math.functions.IgniteTriFunction) IgniteFunction(org.apache.ignite.ml.math.functions.IgniteFunction) Function(java.util.function.Function) Random(java.util.Random) DoubleStream(java.util.stream.DoubleStream) Stream(java.util.stream.Stream) IntStream(java.util.stream.IntStream) InputStream(java.io.InputStream) Vector(org.apache.ignite.ml.math.Vector) DenseLocalOnHeapVector(org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector) ColumnDecisionTreeTrainer(org.apache.ignite.ml.trees.trainers.columnbased.ColumnDecisionTreeTrainer) SparseDistributedMatrix(org.apache.ignite.ml.math.impls.matrix.SparseDistributedMatrix) SparseDistributedMatrixStorage(org.apache.ignite.ml.math.impls.storage.matrix.SparseDistributedMatrixStorage) LabeledVectorDouble(org.apache.ignite.ml.structures.LabeledVectorDouble) DecisionTreeModel(org.apache.ignite.ml.trees.models.DecisionTreeModel) Model(org.apache.ignite.ml.Model) DoubleStream(java.util.stream.DoubleStream)

Example 10 with SparseDistributedMatrixStorage

use of org.apache.ignite.ml.math.impls.storage.matrix.SparseDistributedMatrixStorage in project ignite by apache.

the class SparseDistributedMatrixMapReducer method mapReduce.

/**
 */
public <R, T> R mapReduce(IgniteBiFunction<Matrix, T, R> mapper, IgniteFunction<Collection<R>, R> reducer, T args) {
    Ignite ignite = Ignition.localIgnite();
    SparseDistributedMatrixStorage storage = (SparseDistributedMatrixStorage) distributedMatrix.getStorage();
    int colSize = distributedMatrix.columnSize();
    Collection<R> results = ignite.compute(ignite.cluster().forDataNodes(storage.cacheName())).broadcast(arguments -> {
        Ignite locIgnite = Ignition.localIgnite();
        Affinity<RowColMatrixKey> affinity = locIgnite.affinity(storage.cacheName());
        ClusterNode locNode = locIgnite.cluster().localNode();
        Map<ClusterNode, Collection<RowColMatrixKey>> keys = affinity.mapKeysToNodes(storage.getAllKeys());
        Collection<RowColMatrixKey> locKeys = keys.get(locNode);
        if (locKeys != null) {
            int idx = 0;
            Matrix locMatrix = new DenseLocalOnHeapMatrix(locKeys.size(), colSize);
            for (RowColMatrixKey key : locKeys) {
                Map<Integer, Double> row = storage.cache().get(key);
                for (Map.Entry<Integer, Double> cell : row.entrySet()) locMatrix.set(idx, cell.getKey(), cell.getValue());
                idx++;
            }
            return mapper.apply(locMatrix, arguments);
        }
        return null;
    }, args);
    return reducer.apply(results);
}
Also used : ClusterNode(org.apache.ignite.cluster.ClusterNode) SparseDistributedMatrixStorage(org.apache.ignite.ml.math.impls.storage.matrix.SparseDistributedMatrixStorage) DenseLocalOnHeapMatrix(org.apache.ignite.ml.math.impls.matrix.DenseLocalOnHeapMatrix) SparseDistributedMatrix(org.apache.ignite.ml.math.impls.matrix.SparseDistributedMatrix) Matrix(org.apache.ignite.ml.math.Matrix) DenseLocalOnHeapMatrix(org.apache.ignite.ml.math.impls.matrix.DenseLocalOnHeapMatrix) RowColMatrixKey(org.apache.ignite.ml.math.distributed.keys.RowColMatrixKey) Collection(java.util.Collection) Ignite(org.apache.ignite.Ignite) Map(java.util.Map)

Aggregations

SparseDistributedMatrixStorage (org.apache.ignite.ml.math.impls.storage.matrix.SparseDistributedMatrixStorage)11 Vector (org.apache.ignite.ml.math.Vector)9 UUID (java.util.UUID)8 SparseDistributedMatrix (org.apache.ignite.ml.math.impls.matrix.SparseDistributedMatrix)7 Map (java.util.Map)6 DenseLocalOnHeapVector (org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector)6 List (java.util.List)5 Random (java.util.Random)5 Collectors (java.util.stream.Collectors)5 SparseMatrixKey (org.apache.ignite.ml.math.distributed.keys.impl.SparseMatrixKey)5 DenseLocalOnHeapMatrix (org.apache.ignite.ml.math.impls.matrix.DenseLocalOnHeapMatrix)4 Int2DoubleOpenHashMap (it.unimi.dsi.fastutil.ints.Int2DoubleOpenHashMap)3 IOException (java.io.IOException)3 InputStream (java.io.InputStream)3 ArrayList (java.util.ArrayList)3 Arrays (java.util.Arrays)3 Collections (java.util.Collections)3 HashMap (java.util.HashMap)3 Iterator (java.util.Iterator)3 LinkedList (java.util.LinkedList)3