Search in sources :

Example 1 with MinMaxScalerTrainer

use of org.apache.ignite.ml.preprocessing.minmaxscaling.MinMaxScalerTrainer in project ignite by apache.

the class TrainingWithCustomPreprocessorsExample method main.

/**
 * Run example.
 *
 * @param args Command line arguments.
 * @throws Exception Exception.
 */
public static void main(String[] args) throws Exception {
    try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) {
        IgniteCache<Integer, Vector> trainingSet = null;
        try {
            trainingSet = new SandboxMLCache(ignite).fillCacheWith(MLSandboxDatasets.BOSTON_HOUSE_PRICES);
            Vectorizer<Integer, Vector, Integer, Double> basicVectorizer = new DummyVectorizer<Integer>().labeled(Vectorizer.LabelCoordinate.FIRST);
            Preprocessor<Integer, Vector> imputingPreprocessor = new ImputerTrainer<Integer, Vector>().fit(ignite, trainingSet, basicVectorizer);
            // In-place definition of custom preprocessor by lambda expression.
            Preprocessor<Integer, Vector> customPreprocessor = (k, v) -> {
                LabeledVector res = imputingPreprocessor.apply(k, v);
                double fifthFeature = res.features().get(5);
                Vector updatedVector = res.features().set(5, fifthFeature > 0 ? Math.log(fifthFeature) : -1);
                return updatedVector.labeled(res.label());
            };
            Vectorizer9000 customVectorizer = new Vectorizer9000(customPreprocessor);
            PipelineMdl<Integer, Vector> mdl = new Pipeline<Integer, Vector, Integer, Double>().addVectorizer(customVectorizer).addPreprocessingTrainer(new MinMaxScalerTrainer<Integer, Vector>()).addPreprocessingTrainer(new NormalizationTrainer<Integer, Vector>().withP(1)).addPreprocessingTrainer(getCustomTrainer()).addTrainer(new DecisionTreeClassificationTrainer(5, 0)).fit(ignite, trainingSet);
            System.out.println(">>> Perform scoring.");
            double score = Evaluator.evaluate(trainingSet, mdl, mdl.getPreprocessor(), MetricName.R2);
            System.out.println(">>> R^2 score: " + score);
        } finally {
            if (trainingSet != null)
                trainingSet.destroy();
        }
    } finally {
        System.out.flush();
    }
}
Also used : PipelineMdl(org.apache.ignite.ml.pipeline.PipelineMdl) Evaluator(org.apache.ignite.ml.selection.scoring.evaluator.Evaluator) Vector(org.apache.ignite.ml.math.primitives.vector.Vector) Preprocessor(org.apache.ignite.ml.preprocessing.Preprocessor) Ignite(org.apache.ignite.Ignite) DatasetBuilder(org.apache.ignite.ml.dataset.DatasetBuilder) PreprocessingTrainer(org.apache.ignite.ml.preprocessing.PreprocessingTrainer) IgniteCache(org.apache.ignite.IgniteCache) DummyVectorizer(org.apache.ignite.ml.dataset.feature.extractor.impl.DummyVectorizer) Ignition(org.apache.ignite.Ignition) LabeledVector(org.apache.ignite.ml.structures.LabeledVector) MLSandboxDatasets(org.apache.ignite.examples.ml.util.MLSandboxDatasets) SandboxMLCache(org.apache.ignite.examples.ml.util.SandboxMLCache) VectorUtils(org.apache.ignite.ml.math.primitives.vector.VectorUtils) MetricName(org.apache.ignite.ml.selection.scoring.metric.MetricName) ImputerTrainer(org.apache.ignite.ml.preprocessing.imputing.ImputerTrainer) DecisionTreeClassificationTrainer(org.apache.ignite.ml.tree.DecisionTreeClassificationTrainer) NormalizationTrainer(org.apache.ignite.ml.preprocessing.normalization.NormalizationTrainer) Pipeline(org.apache.ignite.ml.pipeline.Pipeline) MinMaxScalerTrainer(org.apache.ignite.ml.preprocessing.minmaxscaling.MinMaxScalerTrainer) LearningEnvironmentBuilder(org.apache.ignite.ml.environment.LearningEnvironmentBuilder) Vectorizer(org.apache.ignite.ml.dataset.feature.extractor.Vectorizer) SandboxMLCache(org.apache.ignite.examples.ml.util.SandboxMLCache) LabeledVector(org.apache.ignite.ml.structures.LabeledVector) Pipeline(org.apache.ignite.ml.pipeline.Pipeline) DecisionTreeClassificationTrainer(org.apache.ignite.ml.tree.DecisionTreeClassificationTrainer) Ignite(org.apache.ignite.Ignite) Vector(org.apache.ignite.ml.math.primitives.vector.Vector) LabeledVector(org.apache.ignite.ml.structures.LabeledVector)

Example 2 with MinMaxScalerTrainer

use of org.apache.ignite.ml.preprocessing.minmaxscaling.MinMaxScalerTrainer in project ignite by apache.

the class LinearRegressionLSQRTrainerWithMinMaxScalerExample method main.

/**
 * Run example.
 */
public static void main(String[] args) throws IOException {
    System.out.println();
    System.out.println(">>> Linear regression model with Min Max Scaling preprocessor over cached dataset usage example started.");
    // Start ignite grid.
    try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) {
        System.out.println(">>> Ignite grid started.");
        IgniteCache<Integer, Vector> dataCache = null;
        try {
            dataCache = new SandboxMLCache(ignite).fillCacheWith(MLSandboxDatasets.MORTALITY_DATA);
            Vectorizer<Integer, Vector, Integer, Double> vectorizer = new DummyVectorizer<Integer>().labeled(Vectorizer.LabelCoordinate.FIRST);
            System.out.println(">>> Create new MinMaxScaler trainer object.");
            MinMaxScalerTrainer<Integer, Vector> minMaxScalerTrainer = new MinMaxScalerTrainer<>();
            System.out.println(">>> Perform the training to get the MinMaxScaler preprocessor.");
            Preprocessor<Integer, Vector> preprocessor = minMaxScalerTrainer.fit(ignite, dataCache, vectorizer);
            System.out.println(">>> Create new linear regression trainer object.");
            LinearRegressionLSQRTrainer trainer = new LinearRegressionLSQRTrainer();
            System.out.println(">>> Perform the training to get the model.");
            // TODO: IGNITE-11581
            LinearRegressionModel mdl = trainer.fit(ignite, dataCache, preprocessor);
            System.out.println(">>> Linear regression model: " + mdl);
            double rmse = Evaluator.evaluate(dataCache, mdl, preprocessor, MetricName.RMSE);
            System.out.println("\n>>> Rmse = " + rmse);
            System.out.println(">>> ---------------------------------");
            System.out.println(">>> Linear regression model with MinMaxScaler preprocessor over cache based dataset usage example completed.");
        } finally {
            if (dataCache != null)
                dataCache.destroy();
        }
    } finally {
        System.out.flush();
    }
}
Also used : SandboxMLCache(org.apache.ignite.examples.ml.util.SandboxMLCache) MinMaxScalerTrainer(org.apache.ignite.ml.preprocessing.minmaxscaling.MinMaxScalerTrainer) LinearRegressionModel(org.apache.ignite.ml.regressions.linear.LinearRegressionModel) LinearRegressionLSQRTrainer(org.apache.ignite.ml.regressions.linear.LinearRegressionLSQRTrainer) Ignite(org.apache.ignite.Ignite) Vector(org.apache.ignite.ml.math.primitives.vector.Vector)

Example 3 with MinMaxScalerTrainer

use of org.apache.ignite.ml.preprocessing.minmaxscaling.MinMaxScalerTrainer in project ignite by apache.

the class OneVsRestClassificationExample method main.

/**
 * Run example.
 */
public static void main(String[] args) throws IOException {
    System.out.println();
    System.out.println(">>> One-vs-Rest SVM Multi-class classification model over cached dataset usage example started.");
    // Start ignite grid.
    try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) {
        System.out.println(">>> Ignite grid started.");
        IgniteCache<Integer, Vector> dataCache = null;
        try {
            dataCache = new SandboxMLCache(ignite).fillCacheWith(MLSandboxDatasets.GLASS_IDENTIFICATION);
            OneVsRestTrainer<SVMLinearClassificationModel> trainer = new OneVsRestTrainer<>(new SVMLinearClassificationTrainer().withAmountOfIterations(20).withAmountOfLocIterations(50).withLambda(0.2).withSeed(1234L));
            MultiClassModel<SVMLinearClassificationModel> mdl = trainer.fit(ignite, dataCache, new DummyVectorizer<Integer>().labeled(0));
            System.out.println(">>> One-vs-Rest SVM Multi-class model");
            System.out.println(mdl.toString());
            MinMaxScalerTrainer<Integer, Vector> minMaxScalerTrainer = new MinMaxScalerTrainer<>();
            Preprocessor<Integer, Vector> preprocessor = minMaxScalerTrainer.fit(ignite, dataCache, new DummyVectorizer<Integer>().labeled(0));
            MultiClassModel<SVMLinearClassificationModel> mdlWithScaling = trainer.fit(ignite, dataCache, preprocessor);
            System.out.println(">>> One-vs-Rest SVM Multi-class model with MinMaxScaling");
            System.out.println(mdlWithScaling.toString());
            System.out.println(">>> ----------------------------------------------------------------");
            System.out.println(">>> | Prediction\t| Prediction with MinMaxScaling\t| Ground Truth\t|");
            System.out.println(">>> ----------------------------------------------------------------");
            int amountOfErrors = 0;
            int amountOfErrorsWithMinMaxScaling = 0;
            int totalAmount = 0;
            // Build confusion matrix. See https://en.wikipedia.org/wiki/Confusion_matrix
            int[][] confusionMtx = { { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 } };
            int[][] confusionMtxWithMinMaxScaling = { { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 } };
            try (QueryCursor<Cache.Entry<Integer, Vector>> observations = dataCache.query(new ScanQuery<>())) {
                for (Cache.Entry<Integer, Vector> observation : observations) {
                    Vector val = observation.getValue();
                    Vector inputs = val.copyOfRange(1, val.size());
                    double groundTruth = val.get(0);
                    double prediction = mdl.predict(inputs);
                    double predictionWithMinMaxScaling = mdlWithScaling.predict(inputs);
                    totalAmount++;
                    // Collect data for model
                    if (!Precision.equals(groundTruth, prediction, Precision.EPSILON))
                        amountOfErrors++;
                    int idx1 = (int) prediction == 1 ? 0 : ((int) prediction == 3 ? 1 : 2);
                    int idx2 = (int) groundTruth == 1 ? 0 : ((int) groundTruth == 3 ? 1 : 2);
                    confusionMtx[idx1][idx2]++;
                    // Collect data for model with min-max scaling
                    if (!Precision.equals(groundTruth, predictionWithMinMaxScaling, Precision.EPSILON))
                        amountOfErrorsWithMinMaxScaling++;
                    idx1 = (int) predictionWithMinMaxScaling == 1 ? 0 : ((int) predictionWithMinMaxScaling == 3 ? 1 : 2);
                    idx2 = (int) groundTruth == 1 ? 0 : ((int) groundTruth == 3 ? 1 : 2);
                    confusionMtxWithMinMaxScaling[idx1][idx2]++;
                    System.out.printf(">>> | %.4f\t\t| %.4f\t\t\t\t\t\t| %.4f\t\t|\n", prediction, predictionWithMinMaxScaling, groundTruth);
                }
                System.out.println(">>> ----------------------------------------------------------------");
                System.out.println("\n>>> -----------------One-vs-Rest SVM model-------------");
                System.out.println("\n>>> Absolute amount of errors " + amountOfErrors);
                System.out.println("\n>>> Accuracy " + (1 - amountOfErrors / (double) totalAmount));
                System.out.println("\n>>> Confusion matrix is " + Arrays.deepToString(confusionMtx));
                System.out.println("\n>>> -----------------One-vs-Rest SVM model with MinMaxScaling-------------");
                System.out.println("\n>>> Absolute amount of errors " + amountOfErrorsWithMinMaxScaling);
                System.out.println("\n>>> Accuracy " + (1 - amountOfErrorsWithMinMaxScaling / (double) totalAmount));
                System.out.println("\n>>> Confusion matrix is " + Arrays.deepToString(confusionMtxWithMinMaxScaling));
                System.out.println(">>> One-vs-Rest SVM model over cache based dataset usage example completed.");
            }
        } finally {
            if (dataCache != null)
                dataCache.destroy();
        }
    } finally {
        System.out.flush();
    }
}
Also used : SandboxMLCache(org.apache.ignite.examples.ml.util.SandboxMLCache) OneVsRestTrainer(org.apache.ignite.ml.multiclass.OneVsRestTrainer) MinMaxScalerTrainer(org.apache.ignite.ml.preprocessing.minmaxscaling.MinMaxScalerTrainer) DummyVectorizer(org.apache.ignite.ml.dataset.feature.extractor.impl.DummyVectorizer) SVMLinearClassificationTrainer(org.apache.ignite.ml.svm.SVMLinearClassificationTrainer) Ignite(org.apache.ignite.Ignite) SVMLinearClassificationModel(org.apache.ignite.ml.svm.SVMLinearClassificationModel) Vector(org.apache.ignite.ml.math.primitives.vector.Vector) IgniteCache(org.apache.ignite.IgniteCache) SandboxMLCache(org.apache.ignite.examples.ml.util.SandboxMLCache) Cache(javax.cache.Cache)

Example 4 with MinMaxScalerTrainer

use of org.apache.ignite.ml.preprocessing.minmaxscaling.MinMaxScalerTrainer in project ignite by apache.

the class Step_5_Scaling_with_Pipeline method main.

/**
 * Run example.
 */
public static void main(String[] args) {
    System.out.println();
    System.out.println(">>> Tutorial step 5 (scaling) via Pipeline example started.");
    try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) {
        try {
            IgniteCache<Integer, Vector> dataCache = TitanicUtils.readPassengers(ignite);
            // Extracts "pclass", "sibsp", "parch", "sex", "embarked", "age", "fare".
            final Vectorizer<Integer, Vector, Integer, Double> vectorizer = new DummyVectorizer<Integer>(0, 3, 4, 5, 6, 8, 10).labeled(1);
            PipelineMdl<Integer, Vector> mdl = new Pipeline<Integer, Vector, Integer, Double>().addVectorizer(vectorizer).addPreprocessingTrainer(new EncoderTrainer<Integer, Vector>().withEncoderType(EncoderType.STRING_ENCODER).withEncodedFeature(1).withEncodedFeature(6)).addPreprocessingTrainer(new ImputerTrainer<Integer, Vector>()).addPreprocessingTrainer(new MinMaxScalerTrainer<Integer, Vector>()).addPreprocessingTrainer(new NormalizationTrainer<Integer, Vector>().withP(1)).addTrainer(new DecisionTreeClassificationTrainer(5, 0)).fit(ignite, dataCache);
            System.out.println("\n>>> Trained model: " + mdl);
            double accuracy = Evaluator.evaluate(dataCache, mdl, mdl.getPreprocessor(), new Accuracy<>());
            System.out.println("\n>>> Accuracy " + accuracy);
            System.out.println("\n>>> Test Error " + (1 - accuracy));
            System.out.println(">>> Tutorial step 5 (scaling) via Pipeline example completed.");
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        }
    } finally {
        System.out.flush();
    }
}
Also used : MinMaxScalerTrainer(org.apache.ignite.ml.preprocessing.minmaxscaling.MinMaxScalerTrainer) FileNotFoundException(java.io.FileNotFoundException) NormalizationTrainer(org.apache.ignite.ml.preprocessing.normalization.NormalizationTrainer) DecisionTreeClassificationTrainer(org.apache.ignite.ml.tree.DecisionTreeClassificationTrainer) Ignite(org.apache.ignite.Ignite) Vector(org.apache.ignite.ml.math.primitives.vector.Vector)

Example 5 with MinMaxScalerTrainer

use of org.apache.ignite.ml.preprocessing.minmaxscaling.MinMaxScalerTrainer in project ignite by apache.

the class Step_8_CV_with_Param_Grid_and_pipeline method main.

/**
 * Run example.
 */
public static void main(String[] args) {
    System.out.println();
    System.out.println(">>> Tutorial step 8 (cross-validation with param grid and pipeline) example started.");
    try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) {
        try {
            IgniteCache<Integer, Vector> dataCache = TitanicUtils.readPassengers(ignite);
            // Extracts "pclass", "sibsp", "parch", "age", "fare".
            final Vectorizer<Integer, Vector, Integer, Double> vectorizer = new DummyVectorizer<Integer>(0, 4, 5, 6, 8).labeled(1);
            TrainTestSplit<Integer, Vector> split = new TrainTestDatasetSplitter<Integer, Vector>().split(0.75);
            DecisionTreeClassificationTrainer trainer = new DecisionTreeClassificationTrainer(5, 0);
            Pipeline<Integer, Vector, Integer, Double> pipeline = new Pipeline<Integer, Vector, Integer, Double>().addVectorizer(vectorizer).addPreprocessingTrainer(new ImputerTrainer<Integer, Vector>()).addPreprocessingTrainer(new MinMaxScalerTrainer<Integer, Vector>()).addTrainer(trainer);
            // Tune hyper-parameters with K-fold Cross-Validation on the split training set.
            CrossValidation<DecisionTreeModel, Integer, Vector> scoreCalculator = new CrossValidation<>();
            ParamGrid paramGrid = new ParamGrid().addHyperParam("maxDeep", trainer::withMaxDeep, new Double[] { 1.0, 2.0, 3.0, 4.0, 5.0, 10.0 }).addHyperParam("minImpurityDecrease", trainer::withMinImpurityDecrease, new Double[] { 0.0, 0.25, 0.5 });
            scoreCalculator.withIgnite(ignite).withUpstreamCache(dataCache).withPipeline(pipeline).withMetric(MetricName.ACCURACY).withFilter(split.getTrainFilter()).withAmountOfFolds(3).withParamGrid(paramGrid);
            CrossValidationResult crossValidationRes = scoreCalculator.tuneHyperParameters();
            System.out.println("Train with maxDeep: " + crossValidationRes.getBest("maxDeep") + " and minImpurityDecrease: " + crossValidationRes.getBest("minImpurityDecrease"));
            System.out.println(crossValidationRes);
            System.out.println("Best score: " + Arrays.toString(crossValidationRes.getBestScore()));
            System.out.println("Best hyper params: " + crossValidationRes.getBestHyperParams());
            System.out.println("Best average score: " + crossValidationRes.getBestAvgScore());
            crossValidationRes.getScoringBoard().forEach((hyperParams, score) -> System.out.println("Score " + Arrays.toString(score) + " for hyper params " + hyperParams));
            System.out.println(">>> Tutorial step 8 (cross-validation with param grid and pipeline) example completed.");
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        }
    } finally {
        System.out.flush();
    }
}
Also used : MinMaxScalerTrainer(org.apache.ignite.ml.preprocessing.minmaxscaling.MinMaxScalerTrainer) DecisionTreeModel(org.apache.ignite.ml.tree.DecisionTreeModel) FileNotFoundException(java.io.FileNotFoundException) Pipeline(org.apache.ignite.ml.pipeline.Pipeline) ParamGrid(org.apache.ignite.ml.selection.paramgrid.ParamGrid) DecisionTreeClassificationTrainer(org.apache.ignite.ml.tree.DecisionTreeClassificationTrainer) Ignite(org.apache.ignite.Ignite) CrossValidation(org.apache.ignite.ml.selection.cv.CrossValidation) Vector(org.apache.ignite.ml.math.primitives.vector.Vector) CrossValidationResult(org.apache.ignite.ml.selection.cv.CrossValidationResult)

Aggregations

Ignite (org.apache.ignite.Ignite)5 Vector (org.apache.ignite.ml.math.primitives.vector.Vector)5 MinMaxScalerTrainer (org.apache.ignite.ml.preprocessing.minmaxscaling.MinMaxScalerTrainer)5 SandboxMLCache (org.apache.ignite.examples.ml.util.SandboxMLCache)3 DecisionTreeClassificationTrainer (org.apache.ignite.ml.tree.DecisionTreeClassificationTrainer)3 FileNotFoundException (java.io.FileNotFoundException)2 IgniteCache (org.apache.ignite.IgniteCache)2 DummyVectorizer (org.apache.ignite.ml.dataset.feature.extractor.impl.DummyVectorizer)2 Pipeline (org.apache.ignite.ml.pipeline.Pipeline)2 NormalizationTrainer (org.apache.ignite.ml.preprocessing.normalization.NormalizationTrainer)2 Cache (javax.cache.Cache)1 Ignition (org.apache.ignite.Ignition)1 MLSandboxDatasets (org.apache.ignite.examples.ml.util.MLSandboxDatasets)1 DatasetBuilder (org.apache.ignite.ml.dataset.DatasetBuilder)1 Vectorizer (org.apache.ignite.ml.dataset.feature.extractor.Vectorizer)1 LearningEnvironmentBuilder (org.apache.ignite.ml.environment.LearningEnvironmentBuilder)1 VectorUtils (org.apache.ignite.ml.math.primitives.vector.VectorUtils)1 OneVsRestTrainer (org.apache.ignite.ml.multiclass.OneVsRestTrainer)1 PipelineMdl (org.apache.ignite.ml.pipeline.PipelineMdl)1 PreprocessingTrainer (org.apache.ignite.ml.preprocessing.PreprocessingTrainer)1