Search in sources :

Example 36 with SandboxMLCache

use of org.apache.ignite.examples.ml.util.SandboxMLCache in project ignite by apache.

the class DiscreteNaiveBayesExportImportExample method main.

/**
 * Run example.
 */
public static void main(String[] args) throws IOException {
    System.out.println(">>> Discrete naive Bayes classification model over partitioned dataset usage example started.");
    // Start ignite grid.
    try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) {
        System.out.println(">>> Ignite grid started.");
        IgniteCache<Integer, Vector> dataCache = null;
        Path jsonMdlPath = null;
        try {
            dataCache = new SandboxMLCache(ignite).fillCacheWith(MLSandboxDatasets.ENGLISH_VS_SCOTTISH);
            double[][] thresholds = new double[][] { { .5 }, { .5 }, { .5 }, { .5 }, { .5 } };
            System.out.println(">>> Create new Discrete naive Bayes classification trainer object.");
            DiscreteNaiveBayesTrainer trainer = new DiscreteNaiveBayesTrainer().setBucketThresholds(thresholds);
            System.out.println("\n>>> Perform the training to get the model.");
            Vectorizer<Integer, Vector, Integer, Double> vectorizer = new DummyVectorizer<Integer>().labeled(Vectorizer.LabelCoordinate.FIRST);
            DiscreteNaiveBayesModel mdl = trainer.fit(ignite, dataCache, vectorizer);
            System.out.println("\n>>> Exported Discrete Naive Bayes model: " + mdl.toString(true));
            double accuracy = Evaluator.evaluate(dataCache, mdl, vectorizer, MetricName.ACCURACY);
            System.out.println("\n>>> Accuracy for exported Discrete Naive Bayes model:" + accuracy);
            jsonMdlPath = Files.createTempFile(null, null);
            mdl.toJSON(jsonMdlPath);
            DiscreteNaiveBayesModel modelImportedFromJSON = DiscreteNaiveBayesModel.fromJSON(jsonMdlPath);
            System.out.println("\n>>> Imported Discrete Naive Bayes model: " + modelImportedFromJSON.toString(true));
            accuracy = Evaluator.evaluate(dataCache, modelImportedFromJSON, vectorizer, MetricName.ACCURACY);
            System.out.println("\n>>> Accuracy for imported Discrete Naive Bayes model:" + accuracy);
            System.out.println("\n>>> Discrete Naive bayes model over partitioned dataset usage example completed.");
        } finally {
            if (dataCache != null)
                dataCache.destroy();
            if (jsonMdlPath != null)
                Files.deleteIfExists(jsonMdlPath);
        }
    } finally {
        System.out.flush();
    }
}
Also used : Path(java.nio.file.Path) SandboxMLCache(org.apache.ignite.examples.ml.util.SandboxMLCache) DiscreteNaiveBayesTrainer(org.apache.ignite.ml.naivebayes.discrete.DiscreteNaiveBayesTrainer) Ignite(org.apache.ignite.Ignite) Vector(org.apache.ignite.ml.math.primitives.vector.Vector) DiscreteNaiveBayesModel(org.apache.ignite.ml.naivebayes.discrete.DiscreteNaiveBayesModel)

Example 37 with SandboxMLCache

use of org.apache.ignite.examples.ml.util.SandboxMLCache in project ignite by apache.

the class LinearRegressionExportImportExample method main.

/**
 * Run example.
 */
public static void main(String[] args) throws IOException {
    System.out.println();
    System.out.println(">>> Linear regression model over cache based dataset usage example started.");
    // Start ignite grid.
    try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) {
        System.out.println(">>> Ignite grid started.");
        IgniteCache<Integer, Vector> dataCache = null;
        Path jsonMdlPath = null;
        try {
            dataCache = new SandboxMLCache(ignite).fillCacheWith(MLSandboxDatasets.MORTALITY_DATA);
            System.out.println("\n>>> Create new linear regression trainer object.");
            LinearRegressionLSQRTrainer trainer = new LinearRegressionLSQRTrainer();
            System.out.println("\n>>> Perform the training to get the model.");
            LinearRegressionModel mdl = trainer.fit(ignite, dataCache, new DummyVectorizer<Integer>().labeled(Vectorizer.LabelCoordinate.FIRST));
            System.out.println("\n>>> Exported LinearRegression model: " + mdl);
            double rmse = Evaluator.evaluate(dataCache, mdl, new DummyVectorizer<Integer>().labeled(Vectorizer.LabelCoordinate.FIRST), MetricName.RMSE);
            System.out.println("\n>>> RMSE for exported LinearRegression model: " + rmse);
            jsonMdlPath = Files.createTempFile(null, null);
            mdl.toJSON(jsonMdlPath);
            LinearRegressionModel modelImportedFromJSON = LinearRegressionModel.fromJSON(jsonMdlPath);
            System.out.println("\n>>> Imported LinearRegression model: " + modelImportedFromJSON);
            rmse = Evaluator.evaluate(dataCache, mdl, new DummyVectorizer<Integer>().labeled(Vectorizer.LabelCoordinate.FIRST), MetricName.RMSE);
            System.out.println("\n>>> RMSE for imported LinearRegression model: " + rmse);
            System.out.println("\n>>> Linear regression model over cache based dataset usage example completed.");
        } finally {
            if (dataCache != null)
                dataCache.destroy();
            if (jsonMdlPath != null)
                Files.deleteIfExists(jsonMdlPath);
        }
    } finally {
        System.out.flush();
    }
}
Also used : Path(java.nio.file.Path) LinearRegressionLSQRTrainer(org.apache.ignite.ml.regressions.linear.LinearRegressionLSQRTrainer) SandboxMLCache(org.apache.ignite.examples.ml.util.SandboxMLCache) LinearRegressionModel(org.apache.ignite.ml.regressions.linear.LinearRegressionModel) DummyVectorizer(org.apache.ignite.ml.dataset.feature.extractor.impl.DummyVectorizer) Ignite(org.apache.ignite.Ignite) Vector(org.apache.ignite.ml.math.primitives.vector.Vector)

Example 38 with SandboxMLCache

use of org.apache.ignite.examples.ml.util.SandboxMLCache in project ignite by apache.

the class LogisticRegressionExportImportExample method main.

/**
 * Run example.
 */
public static void main(String[] args) throws IOException {
    System.out.println();
    System.out.println(">>> Logistic regression model over partitioned dataset usage example started.");
    // Start ignite grid.
    try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) {
        System.out.println("\n>>> Ignite grid started.");
        IgniteCache<Integer, Vector> dataCache = null;
        Path jsonMdlPath = null;
        try {
            dataCache = new SandboxMLCache(ignite).fillCacheWith(MLSandboxDatasets.TWO_CLASSED_IRIS);
            System.out.println("\n>>> Create new logistic regression trainer object.");
            LogisticRegressionSGDTrainer trainer = new LogisticRegressionSGDTrainer().withUpdatesStgy(new UpdatesStrategy<>(new SimpleGDUpdateCalculator(0.2), SimpleGDParameterUpdate.SUM_LOCAL, SimpleGDParameterUpdate.AVG)).withMaxIterations(100000).withLocIterations(100).withBatchSize(10).withSeed(123L);
            System.out.println("\n>>> Perform the training to get the model.");
            Vectorizer<Integer, Vector, Integer, Double> vectorizer = new DummyVectorizer<Integer>().labeled(Vectorizer.LabelCoordinate.FIRST);
            LogisticRegressionModel mdl = trainer.fit(ignite, dataCache, vectorizer);
            System.out.println("\n>>> Exported logistic regression model: " + mdl);
            double accuracy = Evaluator.evaluate(dataCache, mdl, vectorizer, MetricName.ACCURACY);
            System.out.println("\n>>> Accuracy for exported logistic regression model " + accuracy);
            jsonMdlPath = Files.createTempFile(null, null);
            mdl.toJSON(jsonMdlPath);
            LogisticRegressionModel modelImportedFromJSON = LogisticRegressionModel.fromJSON(jsonMdlPath);
            System.out.println("\n>>> Imported logistic regression model: " + modelImportedFromJSON);
            accuracy = Evaluator.evaluate(dataCache, modelImportedFromJSON, vectorizer, MetricName.ACCURACY);
            System.out.println("\n>>> Accuracy for imported logistic regression model " + accuracy);
            System.out.println("\n>>> Logistic regression model over partitioned dataset usage example completed.");
        } finally {
            if (dataCache != null)
                dataCache.destroy();
            if (jsonMdlPath != null)
                Files.deleteIfExists(jsonMdlPath);
        }
    } finally {
        System.out.flush();
    }
}
Also used : Path(java.nio.file.Path) LogisticRegressionSGDTrainer(org.apache.ignite.ml.regressions.logistic.LogisticRegressionSGDTrainer) SandboxMLCache(org.apache.ignite.examples.ml.util.SandboxMLCache) LogisticRegressionModel(org.apache.ignite.ml.regressions.logistic.LogisticRegressionModel) SimpleGDUpdateCalculator(org.apache.ignite.ml.optimization.updatecalculators.SimpleGDUpdateCalculator) Ignite(org.apache.ignite.Ignite) Vector(org.apache.ignite.ml.math.primitives.vector.Vector)

Example 39 with SandboxMLCache

use of org.apache.ignite.examples.ml.util.SandboxMLCache in project ignite by apache.

the class CustomersClusterizationExample method main.

/**
 * Runs example.
 */
public static void main(String[] args) throws IOException {
    try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) {
        System.out.println(">>> Ignite grid started.");
        IgniteCache<Integer, Vector> dataCache = null;
        try {
            System.out.println(">>> Fill dataset cache.");
            dataCache = new SandboxMLCache(ignite).fillCacheWith(MLSandboxDatasets.WHOLESALE_CUSTOMERS);
            System.out.println(">>> Start training and scoring.");
            for (int amountOfClusters = 1; amountOfClusters < 10; amountOfClusters++) {
                KMeansTrainer trainer = new KMeansTrainer().withAmountOfClusters(amountOfClusters).withDistance(new EuclideanDistance()).withEnvironmentBuilder(LearningEnvironmentBuilder.defaultBuilder().withRNGSeed(0)).withMaxIterations(50);
                // This vectorizer works with values in cache of Vector class.
                Vectorizer<Integer, Vector, Integer, Double> vectorizer = new DummyVectorizer<Integer>().labeled(// FIRST means "label are stored at first coordinate of vector"
                Vectorizer.LabelCoordinate.FIRST);
                // Splits dataset to train and test samples with 80/20 proportion.
                TrainTestSplit<Integer, Vector> split = new TrainTestDatasetSplitter<Integer, Vector>().split(0.8);
                KMeansModel mdl = trainer.fit(ignite, dataCache, split.getTrainFilter(), vectorizer);
                double entropy = computeMeanEntropy(dataCache, split.getTestFilter(), vectorizer, mdl);
                System.out.println(String.format(">> Clusters mean entropy [%d clusters]: %.2f", amountOfClusters, entropy));
            }
        } finally {
            if (dataCache != null)
                dataCache.destroy();
        }
    } finally {
        System.out.flush();
    }
}
Also used : SandboxMLCache(org.apache.ignite.examples.ml.util.SandboxMLCache) KMeansModel(org.apache.ignite.ml.clustering.kmeans.KMeansModel) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) EuclideanDistance(org.apache.ignite.ml.math.distances.EuclideanDistance) KMeansTrainer(org.apache.ignite.ml.clustering.kmeans.KMeansTrainer) Ignite(org.apache.ignite.Ignite) Vector(org.apache.ignite.ml.math.primitives.vector.Vector) LabeledVector(org.apache.ignite.ml.structures.LabeledVector)

Example 40 with SandboxMLCache

use of org.apache.ignite.examples.ml.util.SandboxMLCache in project ignite by apache.

the class KMeansClusterizationExample method main.

/**
 * Run example.
 */
public static void main(String[] args) throws IOException {
    System.out.println();
    System.out.println(">>> KMeans clustering algorithm over cached dataset usage example started.");
    // Start ignite grid.
    try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) {
        System.out.println(">>> Ignite grid started.");
        IgniteCache<Integer, Vector> dataCache = null;
        try {
            dataCache = new SandboxMLCache(ignite).fillCacheWith(MLSandboxDatasets.TWO_CLASSED_IRIS);
            Vectorizer<Integer, Vector, Integer, Double> vectorizer = new DummyVectorizer<Integer>().labeled(Vectorizer.LabelCoordinate.FIRST);
            KMeansTrainer trainer = new KMeansTrainer();
            KMeansModel mdl = trainer.fit(ignite, dataCache, vectorizer);
            System.out.println(">>> KMeans centroids");
            Tracer.showAscii(mdl.centers()[0]);
            Tracer.showAscii(mdl.centers()[1]);
            System.out.println(">>>");
            System.out.println(">>> --------------------------------------------");
            System.out.println(">>> | Predicted cluster\t| Erased class label\t|");
            System.out.println(">>> --------------------------------------------");
            try (QueryCursor<Cache.Entry<Integer, Vector>> observations = dataCache.query(new ScanQuery<>())) {
                for (Cache.Entry<Integer, Vector> observation : observations) {
                    Vector val = observation.getValue();
                    Vector inputs = val.copyOfRange(1, val.size());
                    double groundTruth = val.get(0);
                    double prediction = mdl.predict(inputs);
                    System.out.printf(">>> | %.4f\t\t\t| %.4f\t\t|\n", prediction, groundTruth);
                }
                System.out.println(">>> ---------------------------------");
                System.out.println(">>> KMeans clustering algorithm over cached dataset usage example completed.");
            }
        } finally {
            if (dataCache != null)
                dataCache.destroy();
        }
    } finally {
        System.out.flush();
    }
}
Also used : SandboxMLCache(org.apache.ignite.examples.ml.util.SandboxMLCache) KMeansModel(org.apache.ignite.ml.clustering.kmeans.KMeansModel) KMeansTrainer(org.apache.ignite.ml.clustering.kmeans.KMeansTrainer) Ignite(org.apache.ignite.Ignite) Vector(org.apache.ignite.ml.math.primitives.vector.Vector) IgniteCache(org.apache.ignite.IgniteCache) SandboxMLCache(org.apache.ignite.examples.ml.util.SandboxMLCache) Cache(javax.cache.Cache)

Aggregations

SandboxMLCache (org.apache.ignite.examples.ml.util.SandboxMLCache)41 Ignite (org.apache.ignite.Ignite)38 Vector (org.apache.ignite.ml.math.primitives.vector.Vector)34 Path (java.nio.file.Path)9 IgniteCache (org.apache.ignite.IgniteCache)7 LinearRegressionModel (org.apache.ignite.ml.regressions.linear.LinearRegressionModel)7 Cache (javax.cache.Cache)6 LinearRegressionLSQRTrainer (org.apache.ignite.ml.regressions.linear.LinearRegressionLSQRTrainer)6 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)5 DummyVectorizer (org.apache.ignite.ml.dataset.feature.extractor.impl.DummyVectorizer)5 DecisionTreeClassificationTrainer (org.apache.ignite.ml.tree.DecisionTreeClassificationTrainer)5 FileNotFoundException (java.io.FileNotFoundException)4 FeatureMeta (org.apache.ignite.ml.dataset.feature.FeatureMeta)4 GaussianNaiveBayesTrainer (org.apache.ignite.ml.naivebayes.gaussian.GaussianNaiveBayesTrainer)4 LogisticRegressionSGDTrainer (org.apache.ignite.ml.regressions.logistic.LogisticRegressionSGDTrainer)4 KMeansModel (org.apache.ignite.ml.clustering.kmeans.KMeansModel)3 KMeansTrainer (org.apache.ignite.ml.clustering.kmeans.KMeansTrainer)3 ModelsComposition (org.apache.ignite.ml.composition.ModelsComposition)3 EuclideanDistance (org.apache.ignite.ml.math.distances.EuclideanDistance)3 DiscreteNaiveBayesTrainer (org.apache.ignite.ml.naivebayes.discrete.DiscreteNaiveBayesTrainer)3