Search in sources :

Example 26 with DenseVector

use of org.apache.ignite.ml.math.primitives.vector.impl.DenseVector in project ignite by apache.

the class KMeansTrainer method initClusterCentersRandomly.

/**
 * K cluster centers are initialized randomly.
 *
 * @param dataset The dataset to pick up random centers.
 * @param k Amount of clusters.
 * @return K cluster centers.
 */
private Vector[] initClusterCentersRandomly(Dataset<EmptyContext, LabeledVectorSet<LabeledVector>> dataset, int k) {
    Vector[] initCenters = new DenseVector[k];
    // Gets k or less vectors from each partition.
    List<LabeledVector> rndPnts = dataset.compute(data -> {
        List<LabeledVector> rndPnt = new ArrayList<>();
        if (data.rowSize() != 0) {
            if (data.rowSize() > k) {
                // If it's enough rows in partition to pick k vectors.
                final Random random = environment.randomNumbersGenerator();
                for (int i = 0; i < k; i++) {
                    Set<Integer> uniqueIndices = new HashSet<>();
                    int nextIdx = random.nextInt(data.rowSize());
                    // It required to make the next cycle is finite.
                    int maxRandomSearch = k;
                    int cntr = 0;
                    // Repeat nextIdx generation if it was picked earlier.
                    while (uniqueIndices.contains(nextIdx) && cntr < maxRandomSearch) {
                        nextIdx = random.nextInt(data.rowSize());
                        cntr++;
                    }
                    uniqueIndices.add(nextIdx);
                    rndPnt.add(data.getRow(nextIdx));
                }
            } else
                // If it's not enough vectors to pick k vectors.
                for (int i = 0; i < data.rowSize(); i++) rndPnt.add(data.getRow(i));
        }
        return rndPnt;
    }, (a, b) -> {
        if (a == null)
            return b == null ? new ArrayList<>() : b;
        if (b == null)
            return a;
        return Stream.concat(a.stream(), b.stream()).collect(Collectors.toList());
    });
    // Shuffle them.
    Collections.shuffle(rndPnts);
    // Pick k vectors randomly.
    if (rndPnts.size() >= k) {
        for (int i = 0; i < k; i++) {
            final LabeledVector rndPnt = rndPnts.get(environment.randomNumbersGenerator().nextInt(rndPnts.size()));
            rndPnts.remove(rndPnt);
            initCenters[i] = rndPnt.features();
        }
    } else
        throw new RuntimeException("The KMeans Trainer required more than " + k + " vectors to find " + k + " clusters");
    return initCenters;
}
Also used : Random(java.util.Random) ArrayList(java.util.ArrayList) LabeledVector(org.apache.ignite.ml.structures.LabeledVector) Vector(org.apache.ignite.ml.math.primitives.vector.Vector) LabeledVector(org.apache.ignite.ml.structures.LabeledVector) DenseVector(org.apache.ignite.ml.math.primitives.vector.impl.DenseVector) DenseVector(org.apache.ignite.ml.math.primitives.vector.impl.DenseVector) HashSet(java.util.HashSet)

Example 27 with DenseVector

use of org.apache.ignite.ml.math.primitives.vector.impl.DenseVector in project ignite by apache.

the class KMeansTrainer method updateModel.

/**
 * {@inheritDoc}
 */
@Override
protected <K, V> KMeansModel updateModel(KMeansModel mdl, DatasetBuilder<K, V> datasetBuilder, Preprocessor<K, V> preprocessor) {
    assert datasetBuilder != null;
    PartitionDataBuilder<K, V, EmptyContext, LabeledVectorSet<LabeledVector>> partDataBuilder = new LabeledDatasetPartitionDataBuilderOnHeap<>(preprocessor);
    Vector[] centers;
    try (Dataset<EmptyContext, LabeledVectorSet<LabeledVector>> dataset = datasetBuilder.build(envBuilder, (env, upstream, upstreamSize) -> new EmptyContext(), partDataBuilder, learningEnvironment())) {
        final Integer cols = dataset.compute(org.apache.ignite.ml.structures.Dataset::colSize, (a, b) -> {
            if (a == null)
                return b == null ? 0 : b;
            if (b == null)
                return a;
            return b;
        });
        if (cols == null)
            return getLastTrainedModelOrThrowEmptyDatasetException(mdl);
        centers = Optional.ofNullable(mdl).map(KMeansModel::centers).orElseGet(() -> initClusterCentersRandomly(dataset, k));
        boolean converged = false;
        int iteration = 0;
        while (iteration < maxIterations && !converged) {
            Vector[] newCentroids = new DenseVector[k];
            TotalCostAndCounts totalRes = calcDataForNewCentroids(centers, dataset, cols);
            converged = true;
            for (Map.Entry<Integer, Vector> entry : totalRes.sums.entrySet()) {
                Vector massCenter = entry.getValue().times(1.0 / totalRes.counts.get(entry.getKey()));
                if (converged && distance.compute(massCenter, centers[entry.getKey()]) > epsilon * epsilon)
                    converged = false;
                newCentroids[entry.getKey()] = massCenter;
            }
            iteration++;
            for (int i = 0; i < centers.length; i++) {
                if (newCentroids[i] != null)
                    centers[i] = newCentroids[i];
            }
        }
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
    return new KMeansModel(centers, distance);
}
Also used : EmptyContext(org.apache.ignite.ml.dataset.primitive.context.EmptyContext) LabeledDatasetPartitionDataBuilderOnHeap(org.apache.ignite.ml.structures.partition.LabeledDatasetPartitionDataBuilderOnHeap) Dataset(org.apache.ignite.ml.dataset.Dataset) LabeledVectorSet(org.apache.ignite.ml.structures.LabeledVectorSet) Vector(org.apache.ignite.ml.math.primitives.vector.Vector) LabeledVector(org.apache.ignite.ml.structures.LabeledVector) DenseVector(org.apache.ignite.ml.math.primitives.vector.impl.DenseVector) Map(java.util.Map) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) DenseVector(org.apache.ignite.ml.math.primitives.vector.impl.DenseVector)

Example 28 with DenseVector

use of org.apache.ignite.ml.math.primitives.vector.impl.DenseVector in project ignite by apache.

the class SparkModelParser method readSVMCoefficients.

/**
 * Read coefficient matrix from parquet.
 *
 * @param g Coefficient group.
 * @return Vector of coefficients.
 */
private static Vector readSVMCoefficients(SimpleGroup g) {
    Vector coefficients;
    Group coeffGroup = g.getGroup(0, 0).getGroup(3, 0);
    final int amountOfCoefficients = coeffGroup.getFieldRepetitionCount(0);
    coefficients = new DenseVector(amountOfCoefficients);
    for (int j = 0; j < amountOfCoefficients; j++) {
        double coefficient = coeffGroup.getGroup(0, j).getDouble(0, 0);
        coefficients.set(j, coefficient);
    }
    return coefficients;
}
Also used : Group(org.apache.parquet.example.data.Group) SimpleGroup(org.apache.parquet.example.data.simple.SimpleGroup) Vector(org.apache.ignite.ml.math.primitives.vector.Vector) DenseVector(org.apache.ignite.ml.math.primitives.vector.impl.DenseVector) DenseVector(org.apache.ignite.ml.math.primitives.vector.impl.DenseVector)

Example 29 with DenseVector

use of org.apache.ignite.ml.math.primitives.vector.impl.DenseVector in project ignite by apache.

the class LogisticRegressionModelTest method testPredictOnAnObservationWithWrongCardinality.

/**
 */
@Test(expected = CardinalityException.class)
public void testPredictOnAnObservationWithWrongCardinality() {
    Vector weights = new DenseVector(new double[] { 2.0, 3.0 });
    LogisticRegressionModel mdl = new LogisticRegressionModel(weights, 1.0);
    Vector observation = new DenseVector(new double[] { 1.0 });
    mdl.predict(observation);
}
Also used : Vector(org.apache.ignite.ml.math.primitives.vector.Vector) DenseVector(org.apache.ignite.ml.math.primitives.vector.impl.DenseVector) DenseVector(org.apache.ignite.ml.math.primitives.vector.impl.DenseVector) Test(org.junit.Test)

Example 30 with DenseVector

use of org.apache.ignite.ml.math.primitives.vector.impl.DenseVector in project ignite by apache.

the class SVMModelTest method testPredictOnAnObservationWithWrongCardinality.

/**
 */
@Test(expected = CardinalityException.class)
public void testPredictOnAnObservationWithWrongCardinality() {
    Vector weights = new DenseVector(new double[] { 2.0, 3.0 });
    SVMLinearClassificationModel mdl = new SVMLinearClassificationModel(weights, 1.0);
    Vector observation = new DenseVector(new double[] { 1.0 });
    mdl.predict(observation);
}
Also used : Vector(org.apache.ignite.ml.math.primitives.vector.Vector) DenseVector(org.apache.ignite.ml.math.primitives.vector.impl.DenseVector) DenseVector(org.apache.ignite.ml.math.primitives.vector.impl.DenseVector) Test(org.junit.Test)

Aggregations

DenseVector (org.apache.ignite.ml.math.primitives.vector.impl.DenseVector)101 Vector (org.apache.ignite.ml.math.primitives.vector.Vector)59 Test (org.junit.Test)59 Serializable (java.io.Serializable)16 SparseVector (org.apache.ignite.ml.math.primitives.vector.impl.SparseVector)14 HashMap (java.util.HashMap)13 DenseMatrix (org.apache.ignite.ml.math.primitives.matrix.impl.DenseMatrix)13 DummyVectorizer (org.apache.ignite.ml.dataset.feature.extractor.impl.DummyVectorizer)10 LabeledVector (org.apache.ignite.ml.structures.LabeledVector)10 RendezvousAffinityFunction (org.apache.ignite.cache.affinity.rendezvous.RendezvousAffinityFunction)9 CacheConfiguration (org.apache.ignite.configuration.CacheConfiguration)9 HashSet (java.util.HashSet)7 TrainerTest (org.apache.ignite.ml.common.TrainerTest)7 KMeansModel (org.apache.ignite.ml.clustering.kmeans.KMeansModel)5 LocalDatasetBuilder (org.apache.ignite.ml.dataset.impl.local.LocalDatasetBuilder)5 EuclideanDistance (org.apache.ignite.ml.math.distances.EuclideanDistance)5 IgniteDifferentiableVectorToDoubleFunction (org.apache.ignite.ml.math.functions.IgniteDifferentiableVectorToDoubleFunction)5 MLPArchitecture (org.apache.ignite.ml.nn.architecture.MLPArchitecture)5 OneHotEncoderPreprocessor (org.apache.ignite.ml.preprocessing.encoding.onehotencoder.OneHotEncoderPreprocessor)4 Random (java.util.Random)3