Search in sources :

Example 21 with SandboxMLCache

use of org.apache.ignite.examples.ml.util.SandboxMLCache in project ignite by apache.

the class OneVsRestClassificationExample method main.

/**
 * Run example.
 */
public static void main(String[] args) throws IOException {
    System.out.println();
    System.out.println(">>> One-vs-Rest SVM Multi-class classification model over cached dataset usage example started.");
    // Start ignite grid.
    try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) {
        System.out.println(">>> Ignite grid started.");
        IgniteCache<Integer, Vector> dataCache = null;
        try {
            dataCache = new SandboxMLCache(ignite).fillCacheWith(MLSandboxDatasets.GLASS_IDENTIFICATION);
            OneVsRestTrainer<SVMLinearClassificationModel> trainer = new OneVsRestTrainer<>(new SVMLinearClassificationTrainer().withAmountOfIterations(20).withAmountOfLocIterations(50).withLambda(0.2).withSeed(1234L));
            MultiClassModel<SVMLinearClassificationModel> mdl = trainer.fit(ignite, dataCache, new DummyVectorizer<Integer>().labeled(0));
            System.out.println(">>> One-vs-Rest SVM Multi-class model");
            System.out.println(mdl.toString());
            MinMaxScalerTrainer<Integer, Vector> minMaxScalerTrainer = new MinMaxScalerTrainer<>();
            Preprocessor<Integer, Vector> preprocessor = minMaxScalerTrainer.fit(ignite, dataCache, new DummyVectorizer<Integer>().labeled(0));
            MultiClassModel<SVMLinearClassificationModel> mdlWithScaling = trainer.fit(ignite, dataCache, preprocessor);
            System.out.println(">>> One-vs-Rest SVM Multi-class model with MinMaxScaling");
            System.out.println(mdlWithScaling.toString());
            System.out.println(">>> ----------------------------------------------------------------");
            System.out.println(">>> | Prediction\t| Prediction with MinMaxScaling\t| Ground Truth\t|");
            System.out.println(">>> ----------------------------------------------------------------");
            int amountOfErrors = 0;
            int amountOfErrorsWithMinMaxScaling = 0;
            int totalAmount = 0;
            // Build confusion matrix. See https://en.wikipedia.org/wiki/Confusion_matrix
            int[][] confusionMtx = { { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 } };
            int[][] confusionMtxWithMinMaxScaling = { { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 } };
            try (QueryCursor<Cache.Entry<Integer, Vector>> observations = dataCache.query(new ScanQuery<>())) {
                for (Cache.Entry<Integer, Vector> observation : observations) {
                    Vector val = observation.getValue();
                    Vector inputs = val.copyOfRange(1, val.size());
                    double groundTruth = val.get(0);
                    double prediction = mdl.predict(inputs);
                    double predictionWithMinMaxScaling = mdlWithScaling.predict(inputs);
                    totalAmount++;
                    // Collect data for model
                    if (!Precision.equals(groundTruth, prediction, Precision.EPSILON))
                        amountOfErrors++;
                    int idx1 = (int) prediction == 1 ? 0 : ((int) prediction == 3 ? 1 : 2);
                    int idx2 = (int) groundTruth == 1 ? 0 : ((int) groundTruth == 3 ? 1 : 2);
                    confusionMtx[idx1][idx2]++;
                    // Collect data for model with min-max scaling
                    if (!Precision.equals(groundTruth, predictionWithMinMaxScaling, Precision.EPSILON))
                        amountOfErrorsWithMinMaxScaling++;
                    idx1 = (int) predictionWithMinMaxScaling == 1 ? 0 : ((int) predictionWithMinMaxScaling == 3 ? 1 : 2);
                    idx2 = (int) groundTruth == 1 ? 0 : ((int) groundTruth == 3 ? 1 : 2);
                    confusionMtxWithMinMaxScaling[idx1][idx2]++;
                    System.out.printf(">>> | %.4f\t\t| %.4f\t\t\t\t\t\t| %.4f\t\t|\n", prediction, predictionWithMinMaxScaling, groundTruth);
                }
                System.out.println(">>> ----------------------------------------------------------------");
                System.out.println("\n>>> -----------------One-vs-Rest SVM model-------------");
                System.out.println("\n>>> Absolute amount of errors " + amountOfErrors);
                System.out.println("\n>>> Accuracy " + (1 - amountOfErrors / (double) totalAmount));
                System.out.println("\n>>> Confusion matrix is " + Arrays.deepToString(confusionMtx));
                System.out.println("\n>>> -----------------One-vs-Rest SVM model with MinMaxScaling-------------");
                System.out.println("\n>>> Absolute amount of errors " + amountOfErrorsWithMinMaxScaling);
                System.out.println("\n>>> Accuracy " + (1 - amountOfErrorsWithMinMaxScaling / (double) totalAmount));
                System.out.println("\n>>> Confusion matrix is " + Arrays.deepToString(confusionMtxWithMinMaxScaling));
                System.out.println(">>> One-vs-Rest SVM model over cache based dataset usage example completed.");
            }
        } finally {
            if (dataCache != null)
                dataCache.destroy();
        }
    } finally {
        System.out.flush();
    }
}
Also used : SandboxMLCache(org.apache.ignite.examples.ml.util.SandboxMLCache) OneVsRestTrainer(org.apache.ignite.ml.multiclass.OneVsRestTrainer) MinMaxScalerTrainer(org.apache.ignite.ml.preprocessing.minmaxscaling.MinMaxScalerTrainer) DummyVectorizer(org.apache.ignite.ml.dataset.feature.extractor.impl.DummyVectorizer) SVMLinearClassificationTrainer(org.apache.ignite.ml.svm.SVMLinearClassificationTrainer) Ignite(org.apache.ignite.Ignite) SVMLinearClassificationModel(org.apache.ignite.ml.svm.SVMLinearClassificationModel) Vector(org.apache.ignite.ml.math.primitives.vector.Vector) IgniteCache(org.apache.ignite.IgniteCache) SandboxMLCache(org.apache.ignite.examples.ml.util.SandboxMLCache) Cache(javax.cache.Cache)

Example 22 with SandboxMLCache

use of org.apache.ignite.examples.ml.util.SandboxMLCache in project ignite by apache.

the class CompoundNaiveBayesExample method main.

/**
 * Run example.
 */
public static void main(String[] args) throws IOException {
    System.out.println();
    System.out.println(">>> Compound Naive Bayes classification model over partitioned dataset usage example started.");
    // Start ignite grid.
    try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) {
        System.out.println(">>> Ignite grid started.");
        IgniteCache<Integer, Vector> dataCache = new SandboxMLCache(ignite).fillCacheWith(MLSandboxDatasets.MIXED_DATASET);
        double[] priorProbabilities = new double[] { .5, .5 };
        double[][] thresholds = new double[][] { { .5 }, { .5 }, { .5 }, { .5 }, { .5 } };
        System.out.println(">>> Create new naive Bayes classification trainer object.");
        CompoundNaiveBayesTrainer trainer = new CompoundNaiveBayesTrainer().withPriorProbabilities(priorProbabilities).withGaussianNaiveBayesTrainer(new GaussianNaiveBayesTrainer()).withGaussianFeatureIdsToSkip(asList(3, 4, 5, 6, 7)).withDiscreteNaiveBayesTrainer(new DiscreteNaiveBayesTrainer().setBucketThresholds(thresholds)).withDiscreteFeatureIdsToSkip(asList(0, 1, 2));
        System.out.println(">>> Perform the training to get the model.");
        Vectorizer<Integer, Vector, Integer, Double> vectorizer = new DummyVectorizer<Integer>().labeled(Vectorizer.LabelCoordinate.FIRST);
        CompoundNaiveBayesModel mdl = trainer.fit(ignite, dataCache, vectorizer);
        System.out.println(">>> Compound Naive Bayes model: " + mdl);
        double accuracy = Evaluator.evaluate(dataCache, mdl, vectorizer, MetricName.ACCURACY);
        System.out.println("\n>>> Accuracy " + accuracy);
        System.out.println(">>> Compound Naive bayes model over partitioned dataset usage example completed.");
    }
}
Also used : SandboxMLCache(org.apache.ignite.examples.ml.util.SandboxMLCache) DiscreteNaiveBayesTrainer(org.apache.ignite.ml.naivebayes.discrete.DiscreteNaiveBayesTrainer) CompoundNaiveBayesTrainer(org.apache.ignite.ml.naivebayes.compound.CompoundNaiveBayesTrainer) CompoundNaiveBayesModel(org.apache.ignite.ml.naivebayes.compound.CompoundNaiveBayesModel) GaussianNaiveBayesTrainer(org.apache.ignite.ml.naivebayes.gaussian.GaussianNaiveBayesTrainer) Ignite(org.apache.ignite.Ignite) Vector(org.apache.ignite.ml.math.primitives.vector.Vector)

Example 23 with SandboxMLCache

use of org.apache.ignite.examples.ml.util.SandboxMLCache in project ignite by apache.

the class GaussianNaiveBayesTrainerExample method main.

/**
 * Run example.
 */
public static void main(String[] args) throws IOException {
    System.out.println();
    System.out.println(">>> Naive Bayes classification model over partitioned dataset usage example started.");
    // Start ignite grid.
    try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) {
        System.out.println(">>> Ignite grid started.");
        IgniteCache<Integer, Vector> dataCache = null;
        try {
            dataCache = new SandboxMLCache(ignite).fillCacheWith(MLSandboxDatasets.TWO_CLASSED_IRIS);
            System.out.println(">>> Create new naive Bayes classification trainer object.");
            GaussianNaiveBayesTrainer trainer = new GaussianNaiveBayesTrainer();
            System.out.println(">>> Perform the training to get the model.");
            Vectorizer<Integer, Vector, Integer, Double> vectorizer = new DummyVectorizer<Integer>().labeled(Vectorizer.LabelCoordinate.FIRST);
            GaussianNaiveBayesModel mdl = trainer.fit(ignite, dataCache, vectorizer);
            System.out.println(">>> Naive Bayes model: " + mdl);
            double accuracy = Evaluator.evaluate(dataCache, mdl, vectorizer, MetricName.ACCURACY);
            System.out.println("\n>>> Accuracy " + accuracy);
            System.out.println(">>> Naive bayes model over partitioned dataset usage example completed.");
        } finally {
            dataCache.destroy();
        }
    } finally {
        System.out.flush();
    }
}
Also used : SandboxMLCache(org.apache.ignite.examples.ml.util.SandboxMLCache) GaussianNaiveBayesTrainer(org.apache.ignite.ml.naivebayes.gaussian.GaussianNaiveBayesTrainer) Ignite(org.apache.ignite.Ignite) GaussianNaiveBayesModel(org.apache.ignite.ml.naivebayes.gaussian.GaussianNaiveBayesModel) Vector(org.apache.ignite.ml.math.primitives.vector.Vector)

Example 24 with SandboxMLCache

use of org.apache.ignite.examples.ml.util.SandboxMLCache in project ignite by apache.

the class IrisClassificationExample method main.

/**
 * Runs example.
 */
public static void main(String[] args) throws IOException {
    try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) {
        System.out.println(">>> Ignite grid started.");
        IgniteCache<Integer, Vector> dataCache = null;
        try {
            System.out.println(">>> Fill dataset cache.");
            dataCache = new SandboxMLCache(ignite).fillCacheWith(MLSandboxDatasets.IRIS);
            KNNClassificationTrainer trainer = ((KNNClassificationTrainer) new KNNClassificationTrainer().withEnvironmentBuilder(LearningEnvironmentBuilder.defaultBuilder().withRNGSeed(0))).withK(3).withDistanceMeasure(new EuclideanDistance()).withWeighted(true);
            // This vectorizer works with values in cache of Vector class.
            Vectorizer<Integer, Vector, Integer, Double> vectorizer = new DummyVectorizer<Integer>().labeled(// FIRST means "label are stored at first coordinate of vector"
            Vectorizer.LabelCoordinate.FIRST);
            // Splits dataset to train and test samples with 60/40 proportion.
            TrainTestSplit<Integer, Vector> split = new TrainTestDatasetSplitter<Integer, Vector>().split(0.6);
            System.out.println(">>> Start traininig.");
            KNNClassificationModel mdl = trainer.fit(ignite, dataCache, split.getTrainFilter(), vectorizer);
            System.out.println(">>> Perform scoring.");
            double accuracy = Evaluator.evaluate(dataCache, split.getTestFilter(), mdl, vectorizer, new Accuracy<>());
            System.out.println(">> Model accuracy: " + accuracy);
        } finally {
            if (dataCache != null)
                dataCache.destroy();
        }
    } finally {
        System.out.flush();
    }
}
Also used : SandboxMLCache(org.apache.ignite.examples.ml.util.SandboxMLCache) KNNClassificationTrainer(org.apache.ignite.ml.knn.classification.KNNClassificationTrainer) KNNClassificationModel(org.apache.ignite.ml.knn.classification.KNNClassificationModel) EuclideanDistance(org.apache.ignite.ml.math.distances.EuclideanDistance) Ignite(org.apache.ignite.Ignite) Vector(org.apache.ignite.ml.math.primitives.vector.Vector)

Example 25 with SandboxMLCache

use of org.apache.ignite.examples.ml.util.SandboxMLCache in project ignite by apache.

the class TrainTestDatasetSplitterExample method main.

/**
 * Run example.
 */
public static void main(String[] args) throws IOException {
    System.out.println();
    System.out.println(">>> Linear regression model over cache based dataset usage example started.");
    // Start ignite grid.
    try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) {
        System.out.println(">>> Ignite grid started.");
        IgniteCache<Integer, Vector> dataCache = null;
        try {
            dataCache = new SandboxMLCache(ignite).fillCacheWith(MLSandboxDatasets.MORTALITY_DATA);
            System.out.println(">>> Create new linear regression trainer object.");
            LinearRegressionLSQRTrainer trainer = new LinearRegressionLSQRTrainer();
            System.out.println(">>> Create new training dataset splitter object.");
            TrainTestSplit<Integer, Vector> split = new TrainTestDatasetSplitter<Integer, Vector>().split(0.75);
            System.out.println(">>> Perform the training to get the model.");
            Vectorizer<Integer, Vector, Integer, Double> vectorizer = new DummyVectorizer<Integer>().labeled(Vectorizer.LabelCoordinate.FIRST);
            LinearRegressionModel mdl = trainer.fit(ignite, dataCache, split.getTrainFilter(), vectorizer);
            System.out.println(">>> Linear regression model: " + mdl);
            System.out.println(">>> ---------------------------------");
            System.out.println(">>> | Prediction\t| Ground Truth\t|");
            System.out.println(">>> ---------------------------------");
            ScanQuery<Integer, Vector> qry = new ScanQuery<>();
            qry.setFilter(split.getTestFilter());
            try (QueryCursor<Cache.Entry<Integer, Vector>> observations = dataCache.query(qry)) {
                for (Cache.Entry<Integer, Vector> observation : observations) {
                    Vector val = observation.getValue();
                    Vector inputs = val.copyOfRange(1, val.size());
                    double groundTruth = val.get(0);
                    double prediction = mdl.predict(inputs);
                    System.out.printf(">>> | %.4f\t\t| %.4f\t\t|\n", prediction, groundTruth);
                }
            }
            System.out.println(">>> ---------------------------------");
            System.out.println(">>> Linear regression model over cache based dataset usage example completed.");
        } finally {
            if (dataCache != null)
                dataCache.destroy();
        }
    } finally {
        System.out.flush();
    }
}
Also used : SandboxMLCache(org.apache.ignite.examples.ml.util.SandboxMLCache) LinearRegressionModel(org.apache.ignite.ml.regressions.linear.LinearRegressionModel) ScanQuery(org.apache.ignite.cache.query.ScanQuery) LinearRegressionLSQRTrainer(org.apache.ignite.ml.regressions.linear.LinearRegressionLSQRTrainer) Ignite(org.apache.ignite.Ignite) Vector(org.apache.ignite.ml.math.primitives.vector.Vector) IgniteCache(org.apache.ignite.IgniteCache) SandboxMLCache(org.apache.ignite.examples.ml.util.SandboxMLCache) Cache(javax.cache.Cache)

Aggregations

SandboxMLCache (org.apache.ignite.examples.ml.util.SandboxMLCache)41 Ignite (org.apache.ignite.Ignite)38 Vector (org.apache.ignite.ml.math.primitives.vector.Vector)34 Path (java.nio.file.Path)9 IgniteCache (org.apache.ignite.IgniteCache)7 LinearRegressionModel (org.apache.ignite.ml.regressions.linear.LinearRegressionModel)7 Cache (javax.cache.Cache)6 LinearRegressionLSQRTrainer (org.apache.ignite.ml.regressions.linear.LinearRegressionLSQRTrainer)6 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)5 DummyVectorizer (org.apache.ignite.ml.dataset.feature.extractor.impl.DummyVectorizer)5 DecisionTreeClassificationTrainer (org.apache.ignite.ml.tree.DecisionTreeClassificationTrainer)5 FileNotFoundException (java.io.FileNotFoundException)4 FeatureMeta (org.apache.ignite.ml.dataset.feature.FeatureMeta)4 GaussianNaiveBayesTrainer (org.apache.ignite.ml.naivebayes.gaussian.GaussianNaiveBayesTrainer)4 LogisticRegressionSGDTrainer (org.apache.ignite.ml.regressions.logistic.LogisticRegressionSGDTrainer)4 KMeansModel (org.apache.ignite.ml.clustering.kmeans.KMeansModel)3 KMeansTrainer (org.apache.ignite.ml.clustering.kmeans.KMeansTrainer)3 ModelsComposition (org.apache.ignite.ml.composition.ModelsComposition)3 EuclideanDistance (org.apache.ignite.ml.math.distances.EuclideanDistance)3 DiscreteNaiveBayesTrainer (org.apache.ignite.ml.naivebayes.discrete.DiscreteNaiveBayesTrainer)3