Search in sources :

Example 1 with Vector

use of org.apache.ignite.ml.math.primitives.vector.Vector in project ignite by apache.

the class CacheBasedDatasetExample method main.

/**
 * Run example.
 */
public static void main(String[] args) throws Exception {
    try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) {
        System.out.println(">>> Cache Based Dataset example started.");
        IgniteCache<Integer, Vector> persons = null;
        try {
            persons = createCache(ignite);
            Vectorizer<Integer, Vector, Integer, Double> vectorizer = new DummyVectorizer<>(1, 2);
            // Creates a cache based simple dataset containing features and providing standard dataset API.
            try (SimpleDataset<?> dataset = DatasetFactory.createSimpleDataset(ignite, persons, vectorizer)) {
                new DatasetHelper(dataset).describe();
            }
            System.out.println(">>> Cache Based Dataset example completed.");
        } finally {
            persons.destroy();
        }
    } finally {
        System.out.flush();
    }
}
Also used : DummyVectorizer(org.apache.ignite.ml.dataset.feature.extractor.impl.DummyVectorizer) Ignite(org.apache.ignite.Ignite) Vector(org.apache.ignite.ml.math.primitives.vector.Vector) DenseVector(org.apache.ignite.ml.math.primitives.vector.impl.DenseVector) DatasetHelper(org.apache.ignite.examples.ml.util.DatasetHelper)

Example 2 with Vector

use of org.apache.ignite.ml.math.primitives.vector.Vector in project ignite by apache.

the class TrainingWithCustomPreprocessorsExample method main.

/**
 * Run example.
 *
 * @param args Command line arguments.
 * @throws Exception Exception.
 */
public static void main(String[] args) throws Exception {
    try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) {
        IgniteCache<Integer, Vector> trainingSet = null;
        try {
            trainingSet = new SandboxMLCache(ignite).fillCacheWith(MLSandboxDatasets.BOSTON_HOUSE_PRICES);
            Vectorizer<Integer, Vector, Integer, Double> basicVectorizer = new DummyVectorizer<Integer>().labeled(Vectorizer.LabelCoordinate.FIRST);
            Preprocessor<Integer, Vector> imputingPreprocessor = new ImputerTrainer<Integer, Vector>().fit(ignite, trainingSet, basicVectorizer);
            // In-place definition of custom preprocessor by lambda expression.
            Preprocessor<Integer, Vector> customPreprocessor = (k, v) -> {
                LabeledVector res = imputingPreprocessor.apply(k, v);
                double fifthFeature = res.features().get(5);
                Vector updatedVector = res.features().set(5, fifthFeature > 0 ? Math.log(fifthFeature) : -1);
                return updatedVector.labeled(res.label());
            };
            Vectorizer9000 customVectorizer = new Vectorizer9000(customPreprocessor);
            PipelineMdl<Integer, Vector> mdl = new Pipeline<Integer, Vector, Integer, Double>().addVectorizer(customVectorizer).addPreprocessingTrainer(new MinMaxScalerTrainer<Integer, Vector>()).addPreprocessingTrainer(new NormalizationTrainer<Integer, Vector>().withP(1)).addPreprocessingTrainer(getCustomTrainer()).addTrainer(new DecisionTreeClassificationTrainer(5, 0)).fit(ignite, trainingSet);
            System.out.println(">>> Perform scoring.");
            double score = Evaluator.evaluate(trainingSet, mdl, mdl.getPreprocessor(), MetricName.R2);
            System.out.println(">>> R^2 score: " + score);
        } finally {
            if (trainingSet != null)
                trainingSet.destroy();
        }
    } finally {
        System.out.flush();
    }
}
Also used : PipelineMdl(org.apache.ignite.ml.pipeline.PipelineMdl) Evaluator(org.apache.ignite.ml.selection.scoring.evaluator.Evaluator) Vector(org.apache.ignite.ml.math.primitives.vector.Vector) Preprocessor(org.apache.ignite.ml.preprocessing.Preprocessor) Ignite(org.apache.ignite.Ignite) DatasetBuilder(org.apache.ignite.ml.dataset.DatasetBuilder) PreprocessingTrainer(org.apache.ignite.ml.preprocessing.PreprocessingTrainer) IgniteCache(org.apache.ignite.IgniteCache) DummyVectorizer(org.apache.ignite.ml.dataset.feature.extractor.impl.DummyVectorizer) Ignition(org.apache.ignite.Ignition) LabeledVector(org.apache.ignite.ml.structures.LabeledVector) MLSandboxDatasets(org.apache.ignite.examples.ml.util.MLSandboxDatasets) SandboxMLCache(org.apache.ignite.examples.ml.util.SandboxMLCache) VectorUtils(org.apache.ignite.ml.math.primitives.vector.VectorUtils) MetricName(org.apache.ignite.ml.selection.scoring.metric.MetricName) ImputerTrainer(org.apache.ignite.ml.preprocessing.imputing.ImputerTrainer) DecisionTreeClassificationTrainer(org.apache.ignite.ml.tree.DecisionTreeClassificationTrainer) NormalizationTrainer(org.apache.ignite.ml.preprocessing.normalization.NormalizationTrainer) Pipeline(org.apache.ignite.ml.pipeline.Pipeline) MinMaxScalerTrainer(org.apache.ignite.ml.preprocessing.minmaxscaling.MinMaxScalerTrainer) LearningEnvironmentBuilder(org.apache.ignite.ml.environment.LearningEnvironmentBuilder) Vectorizer(org.apache.ignite.ml.dataset.feature.extractor.Vectorizer) SandboxMLCache(org.apache.ignite.examples.ml.util.SandboxMLCache) LabeledVector(org.apache.ignite.ml.structures.LabeledVector) Pipeline(org.apache.ignite.ml.pipeline.Pipeline) DecisionTreeClassificationTrainer(org.apache.ignite.ml.tree.DecisionTreeClassificationTrainer) Ignite(org.apache.ignite.Ignite) Vector(org.apache.ignite.ml.math.primitives.vector.Vector) LabeledVector(org.apache.ignite.ml.structures.LabeledVector)

Example 3 with Vector

use of org.apache.ignite.ml.math.primitives.vector.Vector in project ignite by apache.

the class IgniteModelDistributedInferenceExample method main.

/**
 * Run example.
 */
public static void main(String... args) throws IOException, ExecutionException, InterruptedException {
    System.out.println();
    System.out.println(">>> Linear regression model over cache based dataset usage example started.");
    // Start ignite grid.
    try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) {
        System.out.println(">>> Ignite grid started.");
        IgniteCache<Integer, Vector> dataCache = null;
        try {
            dataCache = new SandboxMLCache(ignite).fillCacheWith(MLSandboxDatasets.MORTALITY_DATA);
            System.out.println(">>> Create new linear regression trainer object.");
            LinearRegressionLSQRTrainer trainer = new LinearRegressionLSQRTrainer();
            System.out.println(">>> Perform the training to get the model.");
            LinearRegressionModel mdl = trainer.fit(ignite, dataCache, new DummyVectorizer<Integer>().labeled(Vectorizer.LabelCoordinate.FIRST));
            System.out.println(">>> Linear regression model: " + mdl);
            System.out.println(">>> Preparing model reader and model parser.");
            ModelReader reader = new InMemoryModelReader(mdl);
            ModelParser<Vector, Double, ?> parser = new IgniteModelParser<>();
            try (Model<Vector, Future<Double>> infMdl = new IgniteDistributedModelBuilder(ignite, 4, 4).build(reader, parser)) {
                System.out.println(">>> Inference model is ready.");
                System.out.println(">>> ---------------------------------");
                System.out.println(">>> | Prediction\t| Ground Truth\t|");
                System.out.println(">>> ---------------------------------");
                try (QueryCursor<Cache.Entry<Integer, Vector>> observations = dataCache.query(new ScanQuery<>())) {
                    for (Cache.Entry<Integer, Vector> observation : observations) {
                        Vector val = observation.getValue();
                        Vector inputs = val.copyOfRange(1, val.size());
                        double groundTruth = val.get(0);
                        double prediction = infMdl.predict(inputs).get();
                        System.out.printf(">>> | %.4f\t\t| %.4f\t\t|\n", prediction, groundTruth);
                    }
                }
            }
            System.out.println(">>> ---------------------------------");
            System.out.println(">>> Linear regression model over cache based dataset usage example completed.");
        } finally {
            if (dataCache != null)
                dataCache.destroy();
        }
    } finally {
        System.out.flush();
    }
}
Also used : SandboxMLCache(org.apache.ignite.examples.ml.util.SandboxMLCache) LinearRegressionModel(org.apache.ignite.ml.regressions.linear.LinearRegressionModel) IgniteModelParser(org.apache.ignite.ml.inference.parser.IgniteModelParser) DummyVectorizer(org.apache.ignite.ml.dataset.feature.extractor.impl.DummyVectorizer) InMemoryModelReader(org.apache.ignite.ml.inference.reader.InMemoryModelReader) LinearRegressionLSQRTrainer(org.apache.ignite.ml.regressions.linear.LinearRegressionLSQRTrainer) InMemoryModelReader(org.apache.ignite.ml.inference.reader.InMemoryModelReader) ModelReader(org.apache.ignite.ml.inference.reader.ModelReader) Future(java.util.concurrent.Future) Ignite(org.apache.ignite.Ignite) IgniteDistributedModelBuilder(org.apache.ignite.ml.inference.builder.IgniteDistributedModelBuilder) Vector(org.apache.ignite.ml.math.primitives.vector.Vector) IgniteCache(org.apache.ignite.IgniteCache) SandboxMLCache(org.apache.ignite.examples.ml.util.SandboxMLCache) Cache(javax.cache.Cache)

Example 4 with Vector

use of org.apache.ignite.ml.math.primitives.vector.Vector in project ignite by apache.

the class CompoundNaiveBayesExportImportExample method main.

/**
 * Run example.
 */
public static void main(String[] args) throws IOException {
    System.out.println();
    System.out.println(">>> Compound Naive Bayes classification model over partitioned dataset usage example started.");
    // Start ignite grid.
    try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) {
        System.out.println(">>> Ignite grid started.");
        IgniteCache<Integer, Vector> dataCache = null;
        Path jsonMdlPath = null;
        try {
            dataCache = new SandboxMLCache(ignite).fillCacheWith(MLSandboxDatasets.MIXED_DATASET);
            double[] priorProbabilities = new double[] { .5, .5 };
            double[][] thresholds = new double[][] { { .5 }, { .5 }, { .5 }, { .5 }, { .5 } };
            System.out.println("\n>>> Create new naive Bayes classification trainer object.");
            CompoundNaiveBayesTrainer trainer = new CompoundNaiveBayesTrainer().withPriorProbabilities(priorProbabilities).withGaussianNaiveBayesTrainer(new GaussianNaiveBayesTrainer()).withGaussianFeatureIdsToSkip(asList(3, 4, 5, 6, 7)).withDiscreteNaiveBayesTrainer(new DiscreteNaiveBayesTrainer().setBucketThresholds(thresholds)).withDiscreteFeatureIdsToSkip(asList(0, 1, 2));
            System.out.println("\n>>> Perform the training to get the model.");
            Vectorizer<Integer, Vector, Integer, Double> vectorizer = new DummyVectorizer<Integer>().labeled(Vectorizer.LabelCoordinate.FIRST);
            CompoundNaiveBayesModel mdl = trainer.fit(ignite, dataCache, vectorizer);
            System.out.println("\n>>> Exported Compound Naive Bayes model: " + mdl.toString(true));
            double accuracy = Evaluator.evaluate(dataCache, mdl, vectorizer, MetricName.ACCURACY);
            System.out.println("\n>>> Accuracy for exported Compound Naive Bayes model:" + accuracy);
            jsonMdlPath = Files.createTempFile(null, null);
            mdl.toJSON(jsonMdlPath);
            CompoundNaiveBayesModel modelImportedFromJSON = CompoundNaiveBayesModel.fromJSON(jsonMdlPath);
            System.out.println("\n>>> Imported Compound Naive Bayes model: " + modelImportedFromJSON.toString(true));
            accuracy = Evaluator.evaluate(dataCache, modelImportedFromJSON, vectorizer, MetricName.ACCURACY);
            System.out.println("\n>>> Accuracy for imported Compound Naive Bayes model:" + accuracy);
            System.out.println("\n>>> Compound Naive Bayes model over partitioned dataset usage example completed.");
        } finally {
            if (dataCache != null)
                dataCache.destroy();
            if (jsonMdlPath != null)
                Files.deleteIfExists(jsonMdlPath);
        }
    } finally {
        System.out.flush();
    }
}
Also used : Path(java.nio.file.Path) SandboxMLCache(org.apache.ignite.examples.ml.util.SandboxMLCache) DiscreteNaiveBayesTrainer(org.apache.ignite.ml.naivebayes.discrete.DiscreteNaiveBayesTrainer) CompoundNaiveBayesTrainer(org.apache.ignite.ml.naivebayes.compound.CompoundNaiveBayesTrainer) CompoundNaiveBayesModel(org.apache.ignite.ml.naivebayes.compound.CompoundNaiveBayesModel) GaussianNaiveBayesTrainer(org.apache.ignite.ml.naivebayes.gaussian.GaussianNaiveBayesTrainer) Ignite(org.apache.ignite.Ignite) Vector(org.apache.ignite.ml.math.primitives.vector.Vector)

Example 5 with Vector

use of org.apache.ignite.ml.math.primitives.vector.Vector in project ignite by apache.

the class GaussianNaiveBayesExportImportExample method main.

/**
 * Run example.
 */
public static void main(String[] args) throws IOException {
    System.out.println();
    System.out.println(">>> Naive Bayes classification model over partitioned dataset usage example started.");
    // Start ignite grid.
    try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) {
        System.out.println(">>> Ignite grid started.");
        IgniteCache<Integer, Vector> dataCache = null;
        Path jsonMdlPath = null;
        try {
            dataCache = new SandboxMLCache(ignite).fillCacheWith(MLSandboxDatasets.TWO_CLASSED_IRIS);
            System.out.println(">>> Create new Gaussian Naive Bayes classification trainer object.");
            GaussianNaiveBayesTrainer trainer = new GaussianNaiveBayesTrainer();
            System.out.println("\n>>> Perform the training to get the model.");
            Vectorizer<Integer, Vector, Integer, Double> vectorizer = new DummyVectorizer<Integer>().labeled(Vectorizer.LabelCoordinate.FIRST);
            GaussianNaiveBayesModel mdl = trainer.fit(ignite, dataCache, vectorizer);
            System.out.println("\n>>> Exported Gaussian Naive Bayes model: " + mdl.toString(true));
            double accuracy = Evaluator.evaluate(dataCache, mdl, vectorizer, MetricName.ACCURACY);
            System.out.println("\n>>> Accuracy for exported Gaussian Naive Bayes model:" + accuracy);
            jsonMdlPath = Files.createTempFile(null, null);
            mdl.toJSON(jsonMdlPath);
            GaussianNaiveBayesModel modelImportedFromJSON = GaussianNaiveBayesModel.fromJSON(jsonMdlPath);
            System.out.println("\n>>> Imported Gaussian Naive Bayes model: " + modelImportedFromJSON.toString(true));
            accuracy = Evaluator.evaluate(dataCache, modelImportedFromJSON, vectorizer, MetricName.ACCURACY);
            System.out.println("\n>>> Accuracy for imported Gaussian Naive Bayes model:" + accuracy);
            System.out.println("\n>>> Gaussian Naive bayes model over partitioned dataset usage example completed.");
        } finally {
            if (dataCache != null)
                dataCache.destroy();
            if (jsonMdlPath != null)
                Files.deleteIfExists(jsonMdlPath);
        }
    } finally {
        System.out.flush();
    }
}
Also used : Path(java.nio.file.Path) SandboxMLCache(org.apache.ignite.examples.ml.util.SandboxMLCache) GaussianNaiveBayesTrainer(org.apache.ignite.ml.naivebayes.gaussian.GaussianNaiveBayesTrainer) Ignite(org.apache.ignite.Ignite) GaussianNaiveBayesModel(org.apache.ignite.ml.naivebayes.gaussian.GaussianNaiveBayesModel) Vector(org.apache.ignite.ml.math.primitives.vector.Vector)

Aggregations

Vector (org.apache.ignite.ml.math.primitives.vector.Vector)265 DenseVector (org.apache.ignite.ml.math.primitives.vector.impl.DenseVector)95 Test (org.junit.Test)94 Ignite (org.apache.ignite.Ignite)78 LabeledVector (org.apache.ignite.ml.structures.LabeledVector)49 HashMap (java.util.HashMap)39 SandboxMLCache (org.apache.ignite.examples.ml.util.SandboxMLCache)38 DummyVectorizer (org.apache.ignite.ml.dataset.feature.extractor.impl.DummyVectorizer)26 FileNotFoundException (java.io.FileNotFoundException)22 TrainerTest (org.apache.ignite.ml.common.TrainerTest)22 DecisionTreeClassificationTrainer (org.apache.ignite.ml.tree.DecisionTreeClassificationTrainer)21 DecisionTreeModel (org.apache.ignite.ml.tree.DecisionTreeModel)21 Serializable (java.io.Serializable)19 IgniteCache (org.apache.ignite.IgniteCache)18 EncoderTrainer (org.apache.ignite.ml.preprocessing.encoding.EncoderTrainer)16 Cache (javax.cache.Cache)15 DoubleArrayVectorizer (org.apache.ignite.ml.dataset.feature.extractor.impl.DoubleArrayVectorizer)15 EuclideanDistance (org.apache.ignite.ml.math.distances.EuclideanDistance)14 ArrayList (java.util.ArrayList)12 ModelsComposition (org.apache.ignite.ml.composition.ModelsComposition)12