Search in sources :

Example 86 with Vector

use of org.apache.ignite.ml.math.primitives.vector.Vector in project ignite by apache.

the class GmmTrainer method updateModel.

/**
 * Gets older model and returns updated model on given data.
 *
 * @param dataset Dataset.
 * @param model Model.
 * @return Updated model.
 */
@NotNull
private UpdateResult updateModel(Dataset<EmptyContext, GmmPartitionData> dataset, GmmModel model) {
    boolean isConverged = false;
    int countOfIterations = 0;
    double maxProbInDataset = Double.NEGATIVE_INFINITY;
    while (!isConverged) {
        MeanWithClusterProbAggregator.AggregatedStats stats = MeanWithClusterProbAggregator.aggreateStats(dataset, countOfComponents);
        Vector clusterProbs = stats.clusterProbabilities();
        Vector[] newMeans = stats.means().toArray(new Vector[countOfComponents]);
        A.ensure(newMeans.length == model.countOfComponents(), "newMeans.size() == count of components");
        A.ensure(newMeans[0].size() == initialMeans[0].size(), "newMeans[0].size() == initialMeans[0].size()");
        List<Matrix> newCovs = CovarianceMatricesAggregator.computeCovariances(dataset, clusterProbs, newMeans);
        try {
            List<MultivariateGaussianDistribution> components = buildComponents(newMeans, newCovs);
            GmmModel newModel = new GmmModel(clusterProbs, components);
            countOfIterations += 1;
            isConverged = isConverged(model, newModel) || countOfIterations > maxCountOfIterations;
            model = newModel;
            maxProbInDataset = GmmPartitionData.updatePcxiAndComputeLikelihood(dataset, clusterProbs, components);
        } catch (SingularMatrixException | IllegalArgumentException e) {
            String msg = "Cannot construct non-singular covariance matrix by data. " + "Try to select other initial means or other model trainer. Iterations will stop.";
            environment.logger().log(MLLogger.VerboseLevel.HIGH, msg);
            isConverged = true;
        }
    }
    return new UpdateResult(model, maxProbInDataset);
}
Also used : Matrix(org.apache.ignite.ml.math.primitives.matrix.Matrix) MultivariateGaussianDistribution(org.apache.ignite.ml.math.stat.MultivariateGaussianDistribution) SingularMatrixException(org.apache.ignite.ml.math.exceptions.math.SingularMatrixException) Vector(org.apache.ignite.ml.math.primitives.vector.Vector) NotNull(org.jetbrains.annotations.NotNull)

Example 87 with Vector

use of org.apache.ignite.ml.math.primitives.vector.Vector in project ignite by apache.

the class SparkModelParser method readSVMCoefficients.

/**
 * Read coefficient matrix from parquet.
 *
 * @param g Coefficient group.
 * @return Vector of coefficients.
 */
private static Vector readSVMCoefficients(SimpleGroup g) {
    Vector coefficients;
    Group coeffGroup = g.getGroup(0, 0).getGroup(3, 0);
    final int amountOfCoefficients = coeffGroup.getFieldRepetitionCount(0);
    coefficients = new DenseVector(amountOfCoefficients);
    for (int j = 0; j < amountOfCoefficients; j++) {
        double coefficient = coeffGroup.getGroup(0, j).getDouble(0, 0);
        coefficients.set(j, coefficient);
    }
    return coefficients;
}
Also used : Group(org.apache.parquet.example.data.Group) SimpleGroup(org.apache.parquet.example.data.simple.SimpleGroup) Vector(org.apache.ignite.ml.math.primitives.vector.Vector) DenseVector(org.apache.ignite.ml.math.primitives.vector.impl.DenseVector) DenseVector(org.apache.ignite.ml.math.primitives.vector.impl.DenseVector)

Example 88 with Vector

use of org.apache.ignite.ml.math.primitives.vector.Vector in project ignite by apache.

the class RegressionEvaluatorTest method testEvaluatorWithFilter.

/**
 * Test evaluator and trainer with test-train splitting.
 */
@Test
public void testEvaluatorWithFilter() {
    Map<Integer, Vector> data = new HashMap<>();
    data.put(0, VectorUtils.of(60323, 83.0, 234289, 2356, 1590, 107608, 1947));
    data.put(1, VectorUtils.of(61122, 88.5, 259426, 2325, 1456, 108632, 1948));
    data.put(2, VectorUtils.of(60171, 88.2, 258054, 3682, 1616, 109773, 1949));
    data.put(3, VectorUtils.of(61187, 89.5, 284599, 3351, 1650, 110929, 1950));
    data.put(4, VectorUtils.of(63221, 96.2, 328975, 2099, 3099, 112075, 1951));
    data.put(5, VectorUtils.of(63639, 98.1, 346999, 1932, 3594, 113270, 1952));
    data.put(6, VectorUtils.of(64989, 99.0, 365385, 1870, 3547, 115094, 1953));
    data.put(7, VectorUtils.of(63761, 100.0, 363112, 3578, 3350, 116219, 1954));
    data.put(8, VectorUtils.of(66019, 101.2, 397469, 2904, 3048, 117388, 1955));
    data.put(9, VectorUtils.of(68169, 108.4, 442769, 2936, 2798, 120445, 1957));
    data.put(10, VectorUtils.of(66513, 110.8, 444546, 4681, 2637, 121950, 1958));
    data.put(11, VectorUtils.of(68655, 112.6, 482704, 3813, 2552, 123366, 1959));
    data.put(12, VectorUtils.of(69564, 114.2, 502601, 3931, 2514, 125368, 1960));
    data.put(13, VectorUtils.of(69331, 115.7, 518173, 4806, 2572, 127852, 1961));
    data.put(14, VectorUtils.of(70551, 116.9, 554894, 4007, 2827, 130081, 1962));
    KNNRegressionTrainer trainer = new KNNRegressionTrainer().withK(3).withDistanceMeasure(new EuclideanDistance());
    TrainTestSplit<Integer, Vector> split = new TrainTestDatasetSplitter<Integer, Vector>(new SHA256UniformMapper<>(new Random(0))).split(0.5);
    Vectorizer<Integer, Vector, Integer, Double> vectorizer = new DummyVectorizer<Integer>().labeled(Vectorizer.LabelCoordinate.FIRST);
    KNNRegressionModel mdl = trainer.fit(data, split.getTestFilter(), parts, vectorizer);
    double score = Evaluator.evaluate(new LocalDatasetBuilder<>(data, split.getTrainFilter(), parts), mdl, vectorizer, new Rss()).getSingle();
    assertEquals(4800164.444444457, score, 1e-4);
}
Also used : KNNRegressionTrainer(org.apache.ignite.ml.knn.regression.KNNRegressionTrainer) SHA256UniformMapper(org.apache.ignite.ml.selection.split.mapper.SHA256UniformMapper) HashMap(java.util.HashMap) KNNRegressionModel(org.apache.ignite.ml.knn.regression.KNNRegressionModel) EuclideanDistance(org.apache.ignite.ml.math.distances.EuclideanDistance) Rss(org.apache.ignite.ml.selection.scoring.metric.regression.Rss) Random(java.util.Random) LocalDatasetBuilder(org.apache.ignite.ml.dataset.impl.local.LocalDatasetBuilder) Vector(org.apache.ignite.ml.math.primitives.vector.Vector) TrainerTest(org.apache.ignite.ml.common.TrainerTest) Test(org.junit.Test)

Example 89 with Vector

use of org.apache.ignite.ml.math.primitives.vector.Vector in project ignite by apache.

the class BinaryClassificationMetricsTest method testCalculation.

/**
 */
@Test
public void testCalculation() {
    Map<Vector, Double> xorset = new HashMap<Vector, Double>() {

        {
            put(VectorUtils.of(0., 0.), 0.);
            put(VectorUtils.of(0., 1.), 1.);
            put(VectorUtils.of(1., 0.), 1.);
            put(VectorUtils.of(1., 1.), 0.);
        }
    };
    IgniteModel<Vector, Double> xorFunction = v -> {
        if (Math.abs(v.get(0) - v.get(1)) < 0.01)
            return 0.;
        else
            return 1.;
    };
    IgniteModel<Vector, Double> andFunction = v -> {
        if (Math.abs(v.get(0) - v.get(1)) < 0.01 && v.get(0) > 0)
            return 1.;
        else
            return 0.;
    };
    IgniteModel<Vector, Double> orFunction = v -> {
        if (v.get(0) > 0 || v.get(1) > 0)
            return 1.;
        else
            return 0.;
    };
    EvaluationResult xorResult = Evaluator.evaluateBinaryClassification(xorset, xorFunction, Vector::labeled);
    assertEquals(1., xorResult.get(MetricName.ACCURACY), 0.01);
    assertEquals(1., xorResult.get(MetricName.PRECISION), 0.01);
    assertEquals(1., xorResult.get(MetricName.RECALL), 0.01);
    assertEquals(1., xorResult.get(MetricName.F_MEASURE), 0.01);
    EvaluationResult andResult = Evaluator.evaluateBinaryClassification(xorset, andFunction, Vector::labeled);
    assertEquals(0.25, andResult.get(MetricName.ACCURACY), 0.01);
    // there is no TP
    assertEquals(0., andResult.get(MetricName.PRECISION), 0.01);
    // there is no TP
    assertEquals(0., andResult.get(MetricName.RECALL), 0.01);
    // // there is no TP and zero in denominator
    assertEquals(Double.NaN, andResult.get(MetricName.F_MEASURE), 0.01);
    EvaluationResult orResult = Evaluator.evaluateBinaryClassification(xorset, orFunction, Vector::labeled);
    assertEquals(0.75, orResult.get(MetricName.ACCURACY), 0.01);
    // there is no TP
    assertEquals(0.66, orResult.get(MetricName.PRECISION), 0.01);
    // there is no TP
    assertEquals(1., orResult.get(MetricName.RECALL), 0.01);
    // // there is no TP and zero in denominator
    assertEquals(0.8, orResult.get(MetricName.F_MEASURE), 0.01);
}
Also used : VectorUtils(org.apache.ignite.ml.math.primitives.vector.VectorUtils) Evaluator(org.apache.ignite.ml.selection.scoring.evaluator.Evaluator) MetricName(org.apache.ignite.ml.selection.scoring.metric.MetricName) Map(java.util.Map) Vector(org.apache.ignite.ml.math.primitives.vector.Vector) EvaluationResult(org.apache.ignite.ml.selection.scoring.evaluator.EvaluationResult) HashMap(java.util.HashMap) Test(org.junit.Test) IgniteModel(org.apache.ignite.ml.IgniteModel) Assert.assertEquals(org.junit.Assert.assertEquals) HashMap(java.util.HashMap) Vector(org.apache.ignite.ml.math.primitives.vector.Vector) EvaluationResult(org.apache.ignite.ml.selection.scoring.evaluator.EvaluationResult) Test(org.junit.Test)

Example 90 with Vector

use of org.apache.ignite.ml.math.primitives.vector.Vector in project ignite by apache.

the class BinaryClassificationEvaluatorTest method testEvaluatorWithoutFilter.

/**
 * Test evaluator and trainer on classification model y = x.
 */
@Test
public void testEvaluatorWithoutFilter() {
    Map<Integer, Vector> cacheMock = new HashMap<>();
    for (int i = 0; i < twoLinearlySeparableClasses.length; i++) cacheMock.put(i, VectorUtils.of(twoLinearlySeparableClasses[i]));
    KNNClassificationTrainer trainer = new KNNClassificationTrainer().withK(3);
    Vectorizer<Integer, Vector, Integer, Double> vectorizer = new DummyVectorizer<Integer>().labeled(Vectorizer.LabelCoordinate.FIRST);
    KNNClassificationModel mdl = trainer.fit(cacheMock, parts, vectorizer);
    double score = Evaluator.evaluate(cacheMock, mdl, vectorizer, MetricName.ACCURACY);
    assertEquals(0.9919839679358717, score, 1e-12);
}
Also used : HashMap(java.util.HashMap) KNNClassificationTrainer(org.apache.ignite.ml.knn.classification.KNNClassificationTrainer) KNNClassificationModel(org.apache.ignite.ml.knn.classification.KNNClassificationModel) Vector(org.apache.ignite.ml.math.primitives.vector.Vector) TrainerTest(org.apache.ignite.ml.common.TrainerTest) Test(org.junit.Test)

Aggregations

Vector (org.apache.ignite.ml.math.primitives.vector.Vector)265 DenseVector (org.apache.ignite.ml.math.primitives.vector.impl.DenseVector)95 Test (org.junit.Test)94 Ignite (org.apache.ignite.Ignite)78 LabeledVector (org.apache.ignite.ml.structures.LabeledVector)49 HashMap (java.util.HashMap)39 SandboxMLCache (org.apache.ignite.examples.ml.util.SandboxMLCache)38 DummyVectorizer (org.apache.ignite.ml.dataset.feature.extractor.impl.DummyVectorizer)26 FileNotFoundException (java.io.FileNotFoundException)22 TrainerTest (org.apache.ignite.ml.common.TrainerTest)22 DecisionTreeClassificationTrainer (org.apache.ignite.ml.tree.DecisionTreeClassificationTrainer)21 DecisionTreeModel (org.apache.ignite.ml.tree.DecisionTreeModel)21 Serializable (java.io.Serializable)19 IgniteCache (org.apache.ignite.IgniteCache)18 EncoderTrainer (org.apache.ignite.ml.preprocessing.encoding.EncoderTrainer)16 Cache (javax.cache.Cache)15 DoubleArrayVectorizer (org.apache.ignite.ml.dataset.feature.extractor.impl.DoubleArrayVectorizer)15 EuclideanDistance (org.apache.ignite.ml.math.distances.EuclideanDistance)14 ArrayList (java.util.ArrayList)12 ModelsComposition (org.apache.ignite.ml.composition.ModelsComposition)12