Search in sources :

Example 26 with SandboxMLCache

use of org.apache.ignite.examples.ml.util.SandboxMLCache in project ignite by apache.

the class RandomForestRegressionExample method main.

/**
 * Run example.
 */
public static void main(String[] args) throws IOException {
    System.out.println();
    System.out.println(">>> Random Forest regression algorithm over cached dataset usage example started.");
    // Start ignite grid.
    try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) {
        System.out.println(">>> Ignite grid started.");
        IgniteCache<Integer, Vector> dataCache = null;
        try {
            dataCache = new SandboxMLCache(ignite).fillCacheWith(MLSandboxDatasets.BOSTON_HOUSE_PRICES);
            AtomicInteger idx = new AtomicInteger(0);
            RandomForestRegressionTrainer trainer = new RandomForestRegressionTrainer(IntStream.range(0, dataCache.get(1).size() - 1).mapToObj(x -> new FeatureMeta("", idx.getAndIncrement(), false)).collect(Collectors.toList())).withAmountOfTrees(101).withFeaturesCountSelectionStrgy(FeaturesCountSelectionStrategies.ONE_THIRD).withMaxDepth(4).withMinImpurityDelta(0.).withSubSampleSize(0.3).withSeed(0);
            trainer.withEnvironmentBuilder(LearningEnvironmentBuilder.defaultBuilder().withParallelismStrategyTypeDependency(ParallelismStrategy.ON_DEFAULT_POOL).withLoggingFactoryDependency(ConsoleLogger.Factory.LOW));
            System.out.println(">>> Configured trainer: " + trainer.getClass().getSimpleName());
            Vectorizer<Integer, Vector, Integer, Double> vectorizer = new DummyVectorizer<Integer>().labeled(Vectorizer.LabelCoordinate.FIRST);
            ModelsComposition randomForestMdl = trainer.fit(ignite, dataCache, vectorizer);
            System.out.println(">>> Trained model: " + randomForestMdl.toString(true));
            double mse = 0.0;
            double mae = 0.0;
            int totalAmount = 0;
            try (QueryCursor<Cache.Entry<Integer, Vector>> observations = dataCache.query(new ScanQuery<>())) {
                for (Cache.Entry<Integer, Vector> observation : observations) {
                    Vector val = observation.getValue();
                    Vector inputs = val.copyOfRange(1, val.size());
                    double groundTruth = val.get(0);
                    double prediction = randomForestMdl.predict(inputs);
                    mse += Math.pow(prediction - groundTruth, 2.0);
                    mae += Math.abs(prediction - groundTruth);
                    totalAmount++;
                }
                System.out.println("\n>>> Evaluated model on " + totalAmount + " data points.");
                mse /= totalAmount;
                System.out.println("\n>>> Mean squared error (MSE) " + mse);
                mae /= totalAmount;
                System.out.println("\n>>> Mean absolute error (MAE) " + mae);
                System.out.println(">>> Random Forest regression algorithm over cached dataset usage example completed.");
            }
        } finally {
            if (dataCache != null)
                dataCache.destroy();
        }
    } finally {
        System.out.flush();
    }
}
Also used : SandboxMLCache(org.apache.ignite.examples.ml.util.SandboxMLCache) ModelsComposition(org.apache.ignite.ml.composition.ModelsComposition) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) FeatureMeta(org.apache.ignite.ml.dataset.feature.FeatureMeta) RandomForestRegressionTrainer(org.apache.ignite.ml.tree.randomforest.RandomForestRegressionTrainer) Ignite(org.apache.ignite.Ignite) Vector(org.apache.ignite.ml.math.primitives.vector.Vector) IgniteCache(org.apache.ignite.IgniteCache) SandboxMLCache(org.apache.ignite.examples.ml.util.SandboxMLCache) Cache(javax.cache.Cache)

Example 27 with SandboxMLCache

use of org.apache.ignite.examples.ml.util.SandboxMLCache in project ignite by apache.

the class MovieLensSQLExample method loadMovieLensDataset.

/**
 * Loads MovieLens dataset into cache.
 *
 * @param ignite Ignite instance.
 * @param cnt Number of rating point to be loaded.
 * @throws IOException If dataset not found.
 */
private static void loadMovieLensDataset(Ignite ignite, IgniteCache<?, ?> cache, int cnt) throws IOException {
    SqlFieldsQuery qry = new SqlFieldsQuery("insert into ratings (rating_id, movie_id, user_id, rating) values (?, ?, ?, ?)");
    int seq = 0;
    for (String s : new SandboxMLCache(ignite).loadDataset(MLSandboxDatasets.MOVIELENS)) {
        String[] line = s.split(",");
        int userId = Integer.valueOf(line[0]);
        int movieId = Integer.valueOf(line[1]);
        double rating = Double.valueOf(line[2]);
        qry.setArgs(seq++, movieId, userId, rating);
        cache.query(qry);
        if (seq == cnt)
            break;
    }
}
Also used : SandboxMLCache(org.apache.ignite.examples.ml.util.SandboxMLCache) SqlFieldsQuery(org.apache.ignite.cache.query.SqlFieldsQuery)

Example 28 with SandboxMLCache

use of org.apache.ignite.examples.ml.util.SandboxMLCache in project ignite by apache.

the class LinearRegressionLSQRTrainerExample method main.

/**
 * Run example.
 */
public static void main(String[] args) throws IOException {
    System.out.println();
    System.out.println(">>> Linear regression model over cache based dataset usage example started.");
    // Start ignite grid.
    try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) {
        System.out.println(">>> Ignite grid started.");
        IgniteCache<Integer, Vector> dataCache = null;
        try {
            dataCache = new SandboxMLCache(ignite).fillCacheWith(MLSandboxDatasets.MORTALITY_DATA);
            System.out.println(">>> Create new linear regression trainer object.");
            LinearRegressionLSQRTrainer trainer = new LinearRegressionLSQRTrainer();
            System.out.println(">>> Perform the training to get the model.");
            // This object is used to extract features and vectors from upstream entities which are
            // essentially tuples of the form (key, value) (in our case (Integer, Vector)).
            // Key part of tuple in our example is ignored.
            // Label is extracted from 0th entry of the value (which is a Vector)
            // and features are all remaining vector part. Alternatively we could use
            // DatasetTrainer#fit(Ignite, IgniteCache, IgniteBiFunction, IgniteBiFunction) method call
            // where there is a separate lambda for extracting label from (key, value) and a separate labmda for
            // extracting features.
            LinearRegressionModel mdl = trainer.fit(ignite, dataCache, new DummyVectorizer<Integer>().labeled(Vectorizer.LabelCoordinate.FIRST));
            double rmse = Evaluator.evaluate(dataCache, mdl, new DummyVectorizer<Integer>().labeled(Vectorizer.LabelCoordinate.FIRST), MetricName.RMSE);
            System.out.println("\n>>> Rmse = " + rmse);
            System.out.println(">>> Linear regression model over cache based dataset usage example completed.");
        } finally {
            if (dataCache != null)
                dataCache.destroy();
        }
    } finally {
        System.out.flush();
    }
}
Also used : LinearRegressionLSQRTrainer(org.apache.ignite.ml.regressions.linear.LinearRegressionLSQRTrainer) SandboxMLCache(org.apache.ignite.examples.ml.util.SandboxMLCache) LinearRegressionModel(org.apache.ignite.ml.regressions.linear.LinearRegressionModel) DummyVectorizer(org.apache.ignite.ml.dataset.feature.extractor.impl.DummyVectorizer) Ignite(org.apache.ignite.Ignite) Vector(org.apache.ignite.ml.math.primitives.vector.Vector)

Example 29 with SandboxMLCache

use of org.apache.ignite.examples.ml.util.SandboxMLCache in project ignite by apache.

the class LinearRegressionSGDTrainerExample method main.

/**
 * Run example.
 */
public static void main(String[] args) throws IOException {
    System.out.println();
    System.out.println(">>> Linear regression model over sparse distributed matrix API usage example started.");
    // Start ignite grid.
    try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) {
        System.out.println(">>> Ignite grid started.");
        IgniteCache<Integer, Vector> dataCache = null;
        try {
            dataCache = new SandboxMLCache(ignite).fillCacheWith(MLSandboxDatasets.MORTALITY_DATA);
            System.out.println(">>> Create new linear regression trainer object.");
            LinearRegressionSGDTrainer<?> trainer = new LinearRegressionSGDTrainer<>(new UpdatesStrategy<>(new RPropUpdateCalculator(), RPropParameterUpdate.SUM_LOCAL, RPropParameterUpdate.AVG), 100000, 10, 100, 123L);
            System.out.println(">>> Perform the training to get the model.");
            Vectorizer<Integer, Vector, Integer, Double> vectorizer = new DummyVectorizer<Integer>().labeled(Vectorizer.LabelCoordinate.FIRST);
            LinearRegressionModel mdl = trainer.fit(ignite, dataCache, vectorizer);
            System.out.println(">>> Linear regression model: " + mdl);
            double rmse = Evaluator.evaluate(dataCache, mdl, vectorizer, MetricName.RMSE);
            System.out.println("\n>>> Rmse = " + rmse);
            System.out.println(">>> ---------------------------------");
            System.out.println(">>> Linear regression model over cache based dataset usage example completed.");
        } finally {
            if (dataCache != null)
                dataCache.destroy();
        }
    } finally {
        System.out.flush();
    }
}
Also used : SandboxMLCache(org.apache.ignite.examples.ml.util.SandboxMLCache) LinearRegressionModel(org.apache.ignite.ml.regressions.linear.LinearRegressionModel) RPropUpdateCalculator(org.apache.ignite.ml.optimization.updatecalculators.RPropUpdateCalculator) LinearRegressionSGDTrainer(org.apache.ignite.ml.regressions.linear.LinearRegressionSGDTrainer) Ignite(org.apache.ignite.Ignite) Vector(org.apache.ignite.ml.math.primitives.vector.Vector)

Example 30 with SandboxMLCache

use of org.apache.ignite.examples.ml.util.SandboxMLCache in project ignite by apache.

the class BaggedLogisticRegressionSGDTrainerExample method main.

/**
 * Run example.
 */
public static void main(String[] args) throws IOException {
    System.out.println();
    System.out.println(">>> Logistic regression model over partitioned dataset usage example started.");
    // Start ignite grid.
    try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) {
        System.out.println(">>> Ignite grid started.");
        IgniteCache<Integer, Vector> dataCache = null;
        try {
            dataCache = new SandboxMLCache(ignite).fillCacheWith(MLSandboxDatasets.TWO_CLASSED_IRIS);
            System.out.println(">>> Create new logistic regression trainer object.");
            LogisticRegressionSGDTrainer trainer = new LogisticRegressionSGDTrainer().withUpdatesStgy(new UpdatesStrategy<>(new SimpleGDUpdateCalculator(0.2), SimpleGDParameterUpdate.SUM_LOCAL, SimpleGDParameterUpdate.AVG)).withMaxIterations(100).withLocIterations(10).withBatchSize(10).withSeed(123L);
            System.out.println(">>> Perform the training to get the model.");
            BaggedTrainer<Double> baggedTrainer = TrainerTransformers.makeBagged(trainer, 10, 0.6, 4, 3, new OnMajorityPredictionsAggregator()).withEnvironmentBuilder(LearningEnvironmentBuilder.defaultBuilder().withRNGSeed(1));
            System.out.println(">>> Perform evaluation of the model.");
            Vectorizer<Integer, Vector, Integer, Double> vectorizer = new DummyVectorizer<Integer>().labeled(Vectorizer.LabelCoordinate.FIRST);
            double accuracy = Evaluator.evaluate(dataCache, baggedTrainer.fit(ignite, dataCache, vectorizer), vectorizer, MetricName.ACCURACY);
            System.out.println(">>> ---------------------------------");
            System.out.println("\n>>> Accuracy " + accuracy);
            System.out.println(">>> Bagged logistic regression model over partitioned dataset usage example completed.");
        } finally {
            if (dataCache != null)
                dataCache.destroy();
        }
    } finally {
        System.out.flush();
    }
}
Also used : LogisticRegressionSGDTrainer(org.apache.ignite.ml.regressions.logistic.LogisticRegressionSGDTrainer) SandboxMLCache(org.apache.ignite.examples.ml.util.SandboxMLCache) OnMajorityPredictionsAggregator(org.apache.ignite.ml.composition.predictionsaggregator.OnMajorityPredictionsAggregator) SimpleGDUpdateCalculator(org.apache.ignite.ml.optimization.updatecalculators.SimpleGDUpdateCalculator) Ignite(org.apache.ignite.Ignite) Vector(org.apache.ignite.ml.math.primitives.vector.Vector)

Aggregations

SandboxMLCache (org.apache.ignite.examples.ml.util.SandboxMLCache)41 Ignite (org.apache.ignite.Ignite)38 Vector (org.apache.ignite.ml.math.primitives.vector.Vector)34 Path (java.nio.file.Path)9 IgniteCache (org.apache.ignite.IgniteCache)7 LinearRegressionModel (org.apache.ignite.ml.regressions.linear.LinearRegressionModel)7 Cache (javax.cache.Cache)6 LinearRegressionLSQRTrainer (org.apache.ignite.ml.regressions.linear.LinearRegressionLSQRTrainer)6 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)5 DummyVectorizer (org.apache.ignite.ml.dataset.feature.extractor.impl.DummyVectorizer)5 DecisionTreeClassificationTrainer (org.apache.ignite.ml.tree.DecisionTreeClassificationTrainer)5 FileNotFoundException (java.io.FileNotFoundException)4 FeatureMeta (org.apache.ignite.ml.dataset.feature.FeatureMeta)4 GaussianNaiveBayesTrainer (org.apache.ignite.ml.naivebayes.gaussian.GaussianNaiveBayesTrainer)4 LogisticRegressionSGDTrainer (org.apache.ignite.ml.regressions.logistic.LogisticRegressionSGDTrainer)4 KMeansModel (org.apache.ignite.ml.clustering.kmeans.KMeansModel)3 KMeansTrainer (org.apache.ignite.ml.clustering.kmeans.KMeansTrainer)3 ModelsComposition (org.apache.ignite.ml.composition.ModelsComposition)3 EuclideanDistance (org.apache.ignite.ml.math.distances.EuclideanDistance)3 DiscreteNaiveBayesTrainer (org.apache.ignite.ml.naivebayes.discrete.DiscreteNaiveBayesTrainer)3