Search in sources :

Example 31 with SandboxMLCache

use of org.apache.ignite.examples.ml.util.SandboxMLCache in project ignite by apache.

the class RegressionMetricExample method main.

/**
 * Run example.
 */
public static void main(String[] args) throws IOException {
    System.out.println();
    System.out.println(">>> kNN regression over cached dataset usage example started.");
    // Start ignite grid.
    try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) {
        System.out.println(">>> Ignite grid started.");
        IgniteCache<Integer, Vector> dataCache = null;
        try {
            dataCache = new SandboxMLCache(ignite).fillCacheWith(MLSandboxDatasets.CLEARED_MACHINES);
            KNNRegressionTrainer trainer = new KNNRegressionTrainer().withK(5).withDistanceMeasure(new ManhattanDistance()).withIdxType(SpatialIndexType.BALL_TREE).withWeighted(true);
            Vectorizer<Integer, Vector, Integer, Double> vectorizer = new DummyVectorizer<Integer>().labeled(Vectorizer.LabelCoordinate.FIRST);
            KNNRegressionModel knnMdl = trainer.fit(ignite, dataCache, vectorizer);
            double mae = Evaluator.evaluate(dataCache, knnMdl, vectorizer, MetricName.MAE);
            System.out.println("\n>>> Mae " + mae);
        } finally {
            if (dataCache != null)
                dataCache.destroy();
        }
    } finally {
        System.out.flush();
    }
}
Also used : SandboxMLCache(org.apache.ignite.examples.ml.util.SandboxMLCache) KNNRegressionTrainer(org.apache.ignite.ml.knn.regression.KNNRegressionTrainer) Ignite(org.apache.ignite.Ignite) KNNRegressionModel(org.apache.ignite.ml.knn.regression.KNNRegressionModel) Vector(org.apache.ignite.ml.math.primitives.vector.Vector) ManhattanDistance(org.apache.ignite.ml.math.distances.ManhattanDistance)

Example 32 with SandboxMLCache

use of org.apache.ignite.examples.ml.util.SandboxMLCache in project ignite by apache.

the class RandomForestClassificationExample method main.

/**
 * Run example.
 */
public static void main(String[] args) throws IOException {
    System.out.println();
    System.out.println(">>> Random Forest multi-class classification algorithm over cached dataset usage example started.");
    // Start ignite grid.
    try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) {
        System.out.println(">>> Ignite grid started.");
        IgniteCache<Integer, Vector> dataCache = null;
        try {
            dataCache = new SandboxMLCache(ignite).fillCacheWith(MLSandboxDatasets.WINE_RECOGNITION);
            AtomicInteger idx = new AtomicInteger(0);
            RandomForestClassifierTrainer classifier = new RandomForestClassifierTrainer(IntStream.range(0, dataCache.get(1).size() - 1).mapToObj(x -> new FeatureMeta("", idx.getAndIncrement(), false)).collect(Collectors.toList())).withAmountOfTrees(101).withFeaturesCountSelectionStrgy(FeaturesCountSelectionStrategies.ONE_THIRD).withMaxDepth(4).withMinImpurityDelta(0.).withSubSampleSize(0.3).withSeed(0);
            System.out.println(">>> Configured trainer: " + classifier.getClass().getSimpleName());
            Vectorizer<Integer, Vector, Integer, Double> vectorizer = new DummyVectorizer<Integer>().labeled(Vectorizer.LabelCoordinate.FIRST);
            ModelsComposition randomForestMdl = classifier.fit(ignite, dataCache, vectorizer);
            System.out.println(">>> Trained model: " + randomForestMdl.toString(true));
            int amountOfErrors = 0;
            int totalAmount = 0;
            try (QueryCursor<Cache.Entry<Integer, Vector>> observations = dataCache.query(new ScanQuery<>())) {
                for (Cache.Entry<Integer, Vector> observation : observations) {
                    Vector val = observation.getValue();
                    Vector inputs = val.copyOfRange(1, val.size());
                    double groundTruth = val.get(0);
                    double prediction = randomForestMdl.predict(inputs);
                    totalAmount++;
                    if (!Precision.equals(groundTruth, prediction, Precision.EPSILON))
                        amountOfErrors++;
                }
                System.out.println("\n>>> Evaluated model on " + totalAmount + " data points.");
                System.out.println("\n>>> Absolute amount of errors " + amountOfErrors);
                System.out.println("\n>>> Accuracy " + (1 - amountOfErrors / (double) totalAmount));
                System.out.println(">>> Random Forest multi-class classification algorithm over cached dataset usage example completed.");
            }
        } finally {
            if (dataCache != null)
                dataCache.destroy();
        }
    } finally {
        System.out.flush();
    }
}
Also used : SandboxMLCache(org.apache.ignite.examples.ml.util.SandboxMLCache) ModelsComposition(org.apache.ignite.ml.composition.ModelsComposition) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) FeatureMeta(org.apache.ignite.ml.dataset.feature.FeatureMeta) RandomForestClassifierTrainer(org.apache.ignite.ml.tree.randomforest.RandomForestClassifierTrainer) Ignite(org.apache.ignite.Ignite) Vector(org.apache.ignite.ml.math.primitives.vector.Vector) IgniteCache(org.apache.ignite.IgniteCache) SandboxMLCache(org.apache.ignite.examples.ml.util.SandboxMLCache) Cache(javax.cache.Cache)

Example 33 with SandboxMLCache

use of org.apache.ignite.examples.ml.util.SandboxMLCache in project ignite by apache.

the class EncoderExampleWithNormalization method main.

/**
 * Run example.
 */
public static void main(String[] args) {
    System.out.println();
    System.out.println(">>> Train Decision Tree model on mushrooms.csv dataset.");
    try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) {
        try {
            IgniteCache<Integer, Object[]> dataCache = new SandboxMLCache(ignite).fillObjectCacheWithDoubleLabels(MLSandboxDatasets.MUSHROOMS);
            final Vectorizer<Integer, Object[], Integer, Object> vectorizer = new ObjectArrayVectorizer<Integer>(1, 2, 3).labeled(0);
            Preprocessor<Integer, Object[]> encoderPreprocessor = new EncoderTrainer<Integer, Object[]>().withEncoderType(EncoderType.STRING_ENCODER).withEncodedFeature(0).withEncodedFeature(1).withEncodedFeature(2).fit(ignite, dataCache, vectorizer);
            // Defines second preprocessor that normalizes features.
            Preprocessor<Integer, Object[]> normalizer = new NormalizationTrainer<Integer, Object[]>().withP(1).fit(ignite, dataCache, encoderPreprocessor);
            DecisionTreeClassificationTrainer trainer = new DecisionTreeClassificationTrainer(5, 0);
            // Train decision tree model.
            DecisionTreeModel mdl = trainer.fit(ignite, dataCache, normalizer);
            System.out.println("\n>>> Trained model: " + mdl);
            double accuracy = Evaluator.evaluate(dataCache, mdl, normalizer, new Accuracy<>());
            System.out.println("\n>>> Accuracy " + accuracy);
            System.out.println("\n>>> Test Error " + (1 - accuracy));
            System.out.println(">>> Tutorial step 3 (categorial with One-hot encoder) example started.");
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        }
    } finally {
        System.out.flush();
    }
}
Also used : SandboxMLCache(org.apache.ignite.examples.ml.util.SandboxMLCache) DecisionTreeModel(org.apache.ignite.ml.tree.DecisionTreeModel) FileNotFoundException(java.io.FileNotFoundException) NormalizationTrainer(org.apache.ignite.ml.preprocessing.normalization.NormalizationTrainer) DecisionTreeClassificationTrainer(org.apache.ignite.ml.tree.DecisionTreeClassificationTrainer) Ignite(org.apache.ignite.Ignite)

Example 34 with SandboxMLCache

use of org.apache.ignite.examples.ml.util.SandboxMLCache in project ignite by apache.

the class DiscreteNaiveBayesTrainerExample method main.

/**
 * Run example.
 */
public static void main(String[] args) throws IOException {
    System.out.println(">>> Discrete naive Bayes classification model over partitioned dataset usage example started.");
    // Start ignite grid.
    try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) {
        System.out.println(">>> Ignite grid started.");
        IgniteCache<Integer, Vector> dataCache = null;
        try {
            dataCache = new SandboxMLCache(ignite).fillCacheWith(MLSandboxDatasets.ENGLISH_VS_SCOTTISH);
            double[][] thresholds = new double[][] { { .5 }, { .5 }, { .5 }, { .5 }, { .5 } };
            System.out.println(">>> Create new Discrete naive Bayes classification trainer object.");
            DiscreteNaiveBayesTrainer trainer = new DiscreteNaiveBayesTrainer().setBucketThresholds(thresholds);
            System.out.println(">>> Perform the training to get the model.");
            Vectorizer<Integer, Vector, Integer, Double> vectorizer = new DummyVectorizer<Integer>().labeled(Vectorizer.LabelCoordinate.FIRST);
            DiscreteNaiveBayesModel mdl = trainer.fit(ignite, dataCache, vectorizer);
            System.out.println(">>> Discrete Naive Bayes model: " + mdl);
            double accuracy = Evaluator.evaluate(dataCache, mdl, vectorizer, MetricName.ACCURACY);
            System.out.println("\n>>> Accuracy " + accuracy);
            System.out.println(">>> Discrete Naive bayes model over partitioned dataset usage example completed.");
        } finally {
            if (dataCache != null)
                dataCache.destroy();
        }
    } finally {
        System.out.flush();
    }
}
Also used : SandboxMLCache(org.apache.ignite.examples.ml.util.SandboxMLCache) DiscreteNaiveBayesTrainer(org.apache.ignite.ml.naivebayes.discrete.DiscreteNaiveBayesTrainer) Ignite(org.apache.ignite.Ignite) Vector(org.apache.ignite.ml.math.primitives.vector.Vector) DiscreteNaiveBayesModel(org.apache.ignite.ml.naivebayes.discrete.DiscreteNaiveBayesModel)

Example 35 with SandboxMLCache

use of org.apache.ignite.examples.ml.util.SandboxMLCache in project ignite by apache.

the class EvaluatorExample method main.

/**
 * Run example.
 */
public static void main(String[] args) throws IOException {
    System.out.println();
    System.out.println(">>> Evaluation of SVM binary classification algorithm over cached dataset usage example started.");
    // Start ignite grid.
    try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) {
        System.out.println(">>> Ignite grid started.");
        IgniteCache<Integer, Vector> dataCache = null;
        try {
            dataCache = new SandboxMLCache(ignite).fillCacheWith(MLSandboxDatasets.TWO_CLASSED_IRIS);
            SVMLinearClassificationTrainer trainer = new SVMLinearClassificationTrainer();
            Vectorizer<Integer, Vector, Integer, Double> vectorizer = new DummyVectorizer<Integer>().labeled(Vectorizer.LabelCoordinate.FIRST);
            SVMLinearClassificationModel mdl = trainer.fit(ignite, dataCache, vectorizer);
            System.out.println(Evaluator.evaluateBinaryClassification(dataCache, mdl, vectorizer));
        } finally {
            if (dataCache != null)
                dataCache.destroy();
        }
    } finally {
        System.out.flush();
    }
}
Also used : SandboxMLCache(org.apache.ignite.examples.ml.util.SandboxMLCache) Ignite(org.apache.ignite.Ignite) SVMLinearClassificationModel(org.apache.ignite.ml.svm.SVMLinearClassificationModel) Vector(org.apache.ignite.ml.math.primitives.vector.Vector) SVMLinearClassificationTrainer(org.apache.ignite.ml.svm.SVMLinearClassificationTrainer)

Aggregations

SandboxMLCache (org.apache.ignite.examples.ml.util.SandboxMLCache)41 Ignite (org.apache.ignite.Ignite)38 Vector (org.apache.ignite.ml.math.primitives.vector.Vector)34 Path (java.nio.file.Path)9 IgniteCache (org.apache.ignite.IgniteCache)7 LinearRegressionModel (org.apache.ignite.ml.regressions.linear.LinearRegressionModel)7 Cache (javax.cache.Cache)6 LinearRegressionLSQRTrainer (org.apache.ignite.ml.regressions.linear.LinearRegressionLSQRTrainer)6 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)5 DummyVectorizer (org.apache.ignite.ml.dataset.feature.extractor.impl.DummyVectorizer)5 DecisionTreeClassificationTrainer (org.apache.ignite.ml.tree.DecisionTreeClassificationTrainer)5 FileNotFoundException (java.io.FileNotFoundException)4 FeatureMeta (org.apache.ignite.ml.dataset.feature.FeatureMeta)4 GaussianNaiveBayesTrainer (org.apache.ignite.ml.naivebayes.gaussian.GaussianNaiveBayesTrainer)4 LogisticRegressionSGDTrainer (org.apache.ignite.ml.regressions.logistic.LogisticRegressionSGDTrainer)4 KMeansModel (org.apache.ignite.ml.clustering.kmeans.KMeansModel)3 KMeansTrainer (org.apache.ignite.ml.clustering.kmeans.KMeansTrainer)3 ModelsComposition (org.apache.ignite.ml.composition.ModelsComposition)3 EuclideanDistance (org.apache.ignite.ml.math.distances.EuclideanDistance)3 DiscreteNaiveBayesTrainer (org.apache.ignite.ml.naivebayes.discrete.DiscreteNaiveBayesTrainer)3