Search in sources :

Example 1 with DummyVectorizer

use of org.apache.ignite.ml.dataset.feature.extractor.impl.DummyVectorizer in project ignite by apache.

the class CacheBasedDatasetExample method main.

/**
 * Run example.
 */
public static void main(String[] args) throws Exception {
    try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) {
        System.out.println(">>> Cache Based Dataset example started.");
        IgniteCache<Integer, Vector> persons = null;
        try {
            persons = createCache(ignite);
            Vectorizer<Integer, Vector, Integer, Double> vectorizer = new DummyVectorizer<>(1, 2);
            // Creates a cache based simple dataset containing features and providing standard dataset API.
            try (SimpleDataset<?> dataset = DatasetFactory.createSimpleDataset(ignite, persons, vectorizer)) {
                new DatasetHelper(dataset).describe();
            }
            System.out.println(">>> Cache Based Dataset example completed.");
        } finally {
            persons.destroy();
        }
    } finally {
        System.out.flush();
    }
}
Also used : DummyVectorizer(org.apache.ignite.ml.dataset.feature.extractor.impl.DummyVectorizer) Ignite(org.apache.ignite.Ignite) Vector(org.apache.ignite.ml.math.primitives.vector.Vector) DenseVector(org.apache.ignite.ml.math.primitives.vector.impl.DenseVector) DatasetHelper(org.apache.ignite.examples.ml.util.DatasetHelper)

Example 2 with DummyVectorizer

use of org.apache.ignite.ml.dataset.feature.extractor.impl.DummyVectorizer in project ignite by apache.

the class TrainingWithCustomPreprocessorsExample method main.

/**
 * Run example.
 *
 * @param args Command line arguments.
 * @throws Exception Exception.
 */
public static void main(String[] args) throws Exception {
    try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) {
        IgniteCache<Integer, Vector> trainingSet = null;
        try {
            trainingSet = new SandboxMLCache(ignite).fillCacheWith(MLSandboxDatasets.BOSTON_HOUSE_PRICES);
            Vectorizer<Integer, Vector, Integer, Double> basicVectorizer = new DummyVectorizer<Integer>().labeled(Vectorizer.LabelCoordinate.FIRST);
            Preprocessor<Integer, Vector> imputingPreprocessor = new ImputerTrainer<Integer, Vector>().fit(ignite, trainingSet, basicVectorizer);
            // In-place definition of custom preprocessor by lambda expression.
            Preprocessor<Integer, Vector> customPreprocessor = (k, v) -> {
                LabeledVector res = imputingPreprocessor.apply(k, v);
                double fifthFeature = res.features().get(5);
                Vector updatedVector = res.features().set(5, fifthFeature > 0 ? Math.log(fifthFeature) : -1);
                return updatedVector.labeled(res.label());
            };
            Vectorizer9000 customVectorizer = new Vectorizer9000(customPreprocessor);
            PipelineMdl<Integer, Vector> mdl = new Pipeline<Integer, Vector, Integer, Double>().addVectorizer(customVectorizer).addPreprocessingTrainer(new MinMaxScalerTrainer<Integer, Vector>()).addPreprocessingTrainer(new NormalizationTrainer<Integer, Vector>().withP(1)).addPreprocessingTrainer(getCustomTrainer()).addTrainer(new DecisionTreeClassificationTrainer(5, 0)).fit(ignite, trainingSet);
            System.out.println(">>> Perform scoring.");
            double score = Evaluator.evaluate(trainingSet, mdl, mdl.getPreprocessor(), MetricName.R2);
            System.out.println(">>> R^2 score: " + score);
        } finally {
            if (trainingSet != null)
                trainingSet.destroy();
        }
    } finally {
        System.out.flush();
    }
}
Also used : PipelineMdl(org.apache.ignite.ml.pipeline.PipelineMdl) Evaluator(org.apache.ignite.ml.selection.scoring.evaluator.Evaluator) Vector(org.apache.ignite.ml.math.primitives.vector.Vector) Preprocessor(org.apache.ignite.ml.preprocessing.Preprocessor) Ignite(org.apache.ignite.Ignite) DatasetBuilder(org.apache.ignite.ml.dataset.DatasetBuilder) PreprocessingTrainer(org.apache.ignite.ml.preprocessing.PreprocessingTrainer) IgniteCache(org.apache.ignite.IgniteCache) DummyVectorizer(org.apache.ignite.ml.dataset.feature.extractor.impl.DummyVectorizer) Ignition(org.apache.ignite.Ignition) LabeledVector(org.apache.ignite.ml.structures.LabeledVector) MLSandboxDatasets(org.apache.ignite.examples.ml.util.MLSandboxDatasets) SandboxMLCache(org.apache.ignite.examples.ml.util.SandboxMLCache) VectorUtils(org.apache.ignite.ml.math.primitives.vector.VectorUtils) MetricName(org.apache.ignite.ml.selection.scoring.metric.MetricName) ImputerTrainer(org.apache.ignite.ml.preprocessing.imputing.ImputerTrainer) DecisionTreeClassificationTrainer(org.apache.ignite.ml.tree.DecisionTreeClassificationTrainer) NormalizationTrainer(org.apache.ignite.ml.preprocessing.normalization.NormalizationTrainer) Pipeline(org.apache.ignite.ml.pipeline.Pipeline) MinMaxScalerTrainer(org.apache.ignite.ml.preprocessing.minmaxscaling.MinMaxScalerTrainer) LearningEnvironmentBuilder(org.apache.ignite.ml.environment.LearningEnvironmentBuilder) Vectorizer(org.apache.ignite.ml.dataset.feature.extractor.Vectorizer) SandboxMLCache(org.apache.ignite.examples.ml.util.SandboxMLCache) LabeledVector(org.apache.ignite.ml.structures.LabeledVector) Pipeline(org.apache.ignite.ml.pipeline.Pipeline) DecisionTreeClassificationTrainer(org.apache.ignite.ml.tree.DecisionTreeClassificationTrainer) Ignite(org.apache.ignite.Ignite) Vector(org.apache.ignite.ml.math.primitives.vector.Vector) LabeledVector(org.apache.ignite.ml.structures.LabeledVector)

Example 3 with DummyVectorizer

use of org.apache.ignite.ml.dataset.feature.extractor.impl.DummyVectorizer in project ignite by apache.

the class IgniteModelDistributedInferenceExample method main.

/**
 * Run example.
 */
public static void main(String... args) throws IOException, ExecutionException, InterruptedException {
    System.out.println();
    System.out.println(">>> Linear regression model over cache based dataset usage example started.");
    // Start ignite grid.
    try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) {
        System.out.println(">>> Ignite grid started.");
        IgniteCache<Integer, Vector> dataCache = null;
        try {
            dataCache = new SandboxMLCache(ignite).fillCacheWith(MLSandboxDatasets.MORTALITY_DATA);
            System.out.println(">>> Create new linear regression trainer object.");
            LinearRegressionLSQRTrainer trainer = new LinearRegressionLSQRTrainer();
            System.out.println(">>> Perform the training to get the model.");
            LinearRegressionModel mdl = trainer.fit(ignite, dataCache, new DummyVectorizer<Integer>().labeled(Vectorizer.LabelCoordinate.FIRST));
            System.out.println(">>> Linear regression model: " + mdl);
            System.out.println(">>> Preparing model reader and model parser.");
            ModelReader reader = new InMemoryModelReader(mdl);
            ModelParser<Vector, Double, ?> parser = new IgniteModelParser<>();
            try (Model<Vector, Future<Double>> infMdl = new IgniteDistributedModelBuilder(ignite, 4, 4).build(reader, parser)) {
                System.out.println(">>> Inference model is ready.");
                System.out.println(">>> ---------------------------------");
                System.out.println(">>> | Prediction\t| Ground Truth\t|");
                System.out.println(">>> ---------------------------------");
                try (QueryCursor<Cache.Entry<Integer, Vector>> observations = dataCache.query(new ScanQuery<>())) {
                    for (Cache.Entry<Integer, Vector> observation : observations) {
                        Vector val = observation.getValue();
                        Vector inputs = val.copyOfRange(1, val.size());
                        double groundTruth = val.get(0);
                        double prediction = infMdl.predict(inputs).get();
                        System.out.printf(">>> | %.4f\t\t| %.4f\t\t|\n", prediction, groundTruth);
                    }
                }
            }
            System.out.println(">>> ---------------------------------");
            System.out.println(">>> Linear regression model over cache based dataset usage example completed.");
        } finally {
            if (dataCache != null)
                dataCache.destroy();
        }
    } finally {
        System.out.flush();
    }
}
Also used : SandboxMLCache(org.apache.ignite.examples.ml.util.SandboxMLCache) LinearRegressionModel(org.apache.ignite.ml.regressions.linear.LinearRegressionModel) IgniteModelParser(org.apache.ignite.ml.inference.parser.IgniteModelParser) DummyVectorizer(org.apache.ignite.ml.dataset.feature.extractor.impl.DummyVectorizer) InMemoryModelReader(org.apache.ignite.ml.inference.reader.InMemoryModelReader) LinearRegressionLSQRTrainer(org.apache.ignite.ml.regressions.linear.LinearRegressionLSQRTrainer) InMemoryModelReader(org.apache.ignite.ml.inference.reader.InMemoryModelReader) ModelReader(org.apache.ignite.ml.inference.reader.ModelReader) Future(java.util.concurrent.Future) Ignite(org.apache.ignite.Ignite) IgniteDistributedModelBuilder(org.apache.ignite.ml.inference.builder.IgniteDistributedModelBuilder) Vector(org.apache.ignite.ml.math.primitives.vector.Vector) IgniteCache(org.apache.ignite.IgniteCache) SandboxMLCache(org.apache.ignite.examples.ml.util.SandboxMLCache) Cache(javax.cache.Cache)

Example 4 with DummyVectorizer

use of org.apache.ignite.ml.dataset.feature.extractor.impl.DummyVectorizer in project ignite by apache.

the class AlgorithmSpecificDatasetExample method main.

/**
 * Run example.
 */
public static void main(String[] args) throws Exception {
    try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) {
        System.out.println(">>> Algorithm Specific Dataset example started.");
        IgniteCache<Integer, Vector> persons = null;
        try {
            persons = createCache(ignite);
            Vectorizer<Integer, Vector, Integer, Double> vectorizer = new DummyVectorizer<>(1);
            IgniteFunction<LabeledVector<Double>, LabeledVector<double[]>> func = lv -> new LabeledVector<>(lv.features(), new double[] { lv.label() });
            // NOTE: This class is part of Developer API and all lambdas should be loaded on server manually.
            Preprocessor<Integer, Vector> preprocessor = new PatchedPreprocessor<>(func, vectorizer);
            // Creates a algorithm specific dataset to perform linear regression. Here we define the way features and
            // labels are extracted, and partition data and context are created.
            SimpleLabeledDatasetDataBuilder<Integer, Vector, AlgorithmSpecificPartitionContext> builder = new SimpleLabeledDatasetDataBuilder<>(preprocessor);
            IgniteBiFunction<SimpleLabeledDatasetData, AlgorithmSpecificPartitionContext, SimpleLabeledDatasetData> builderFun = (data, ctx) -> {
                double[] features = data.getFeatures();
                int rows = data.getRows();
                // Makes a copy of features to supplement it by columns with values equal to 1.0.
                double[] a = new double[features.length + rows];
                Arrays.fill(a, 1.0);
                System.arraycopy(features, 0, a, rows, features.length);
                return new SimpleLabeledDatasetData(a, data.getLabels(), rows);
            };
            try (AlgorithmSpecificDataset dataset = DatasetFactory.create(ignite, persons, (env, upstream, upstreamSize) -> new AlgorithmSpecificPartitionContext(), builder.andThen(builderFun)).wrap(AlgorithmSpecificDataset::new)) {
                // Trains linear regression model using gradient descent.
                double[] linearRegressionMdl = new double[2];
                for (int i = 0; i < 1000; i++) {
                    double[] gradient = dataset.gradient(linearRegressionMdl);
                    if (BLAS.getInstance().dnrm2(gradient.length, gradient, 1) < 1e-4)
                        break;
                    for (int j = 0; j < gradient.length; j++) linearRegressionMdl[j] -= 0.1 / persons.size() * gradient[j];
                }
                System.out.println("Linear Regression Model: " + Arrays.toString(linearRegressionMdl));
            }
            System.out.println(">>> Algorithm Specific Dataset example completed.");
        } finally {
            persons.destroy();
        }
    } finally {
        System.out.flush();
    }
}
Also used : Arrays(java.util.Arrays) BLAS(com.github.fommil.netlib.BLAS) SimpleLabeledDatasetData(org.apache.ignite.ml.dataset.primitive.data.SimpleLabeledDatasetData) IgniteFunction(org.apache.ignite.ml.math.functions.IgniteFunction) Vector(org.apache.ignite.ml.math.primitives.vector.Vector) SimpleLabeledDatasetDataBuilder(org.apache.ignite.ml.dataset.primitive.builder.data.SimpleLabeledDatasetDataBuilder) Preprocessor(org.apache.ignite.ml.preprocessing.Preprocessor) Ignite(org.apache.ignite.Ignite) IgniteCache(org.apache.ignite.IgniteCache) RendezvousAffinityFunction(org.apache.ignite.cache.affinity.rendezvous.RendezvousAffinityFunction) DummyVectorizer(org.apache.ignite.ml.dataset.feature.extractor.impl.DummyVectorizer) Serializable(java.io.Serializable) Ignition(org.apache.ignite.Ignition) LabeledVector(org.apache.ignite.ml.structures.LabeledVector) DatasetFactory(org.apache.ignite.ml.dataset.DatasetFactory) IgniteBiFunction(org.apache.ignite.ml.math.functions.IgniteBiFunction) CacheConfiguration(org.apache.ignite.configuration.CacheConfiguration) Dataset(org.apache.ignite.ml.dataset.Dataset) PatchedPreprocessor(org.apache.ignite.ml.preprocessing.developer.PatchedPreprocessor) DatasetWrapper(org.apache.ignite.ml.dataset.primitive.DatasetWrapper) DenseVector(org.apache.ignite.ml.math.primitives.vector.impl.DenseVector) Vectorizer(org.apache.ignite.ml.dataset.feature.extractor.Vectorizer) SimpleLabeledDatasetData(org.apache.ignite.ml.dataset.primitive.data.SimpleLabeledDatasetData) SimpleLabeledDatasetDataBuilder(org.apache.ignite.ml.dataset.primitive.builder.data.SimpleLabeledDatasetDataBuilder) DummyVectorizer(org.apache.ignite.ml.dataset.feature.extractor.impl.DummyVectorizer) LabeledVector(org.apache.ignite.ml.structures.LabeledVector) Ignite(org.apache.ignite.Ignite) PatchedPreprocessor(org.apache.ignite.ml.preprocessing.developer.PatchedPreprocessor) Vector(org.apache.ignite.ml.math.primitives.vector.Vector) LabeledVector(org.apache.ignite.ml.structures.LabeledVector) DenseVector(org.apache.ignite.ml.math.primitives.vector.impl.DenseVector)

Example 5 with DummyVectorizer

use of org.apache.ignite.ml.dataset.feature.extractor.impl.DummyVectorizer in project ignite by apache.

the class MinMaxScalerExample method main.

/**
 * Run example.
 */
public static void main(String[] args) throws Exception {
    try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) {
        System.out.println(">>> MinMax preprocessing example started.");
        IgniteCache<Integer, Vector> data = null;
        try {
            data = createCache(ignite);
            Vectorizer<Integer, Vector, Integer, Double> vectorizer = new DummyVectorizer<>(1, 2);
            // Defines second preprocessor that imputing features.
            Preprocessor<Integer, Vector> preprocessor = new MinMaxScalerTrainer<Integer, Vector>().fit(ignite, data, vectorizer);
            // Creates a cache based simple dataset containing features and providing standard dataset API.
            try (SimpleDataset<?> dataset = DatasetFactory.createSimpleDataset(ignite, data, preprocessor)) {
                new DatasetHelper(dataset).describe();
            }
            System.out.println(">>> MinMax preprocessing example completed.");
        } finally {
            data.destroy();
        }
    } finally {
        System.out.flush();
    }
}
Also used : DummyVectorizer(org.apache.ignite.ml.dataset.feature.extractor.impl.DummyVectorizer) Ignite(org.apache.ignite.Ignite) Vector(org.apache.ignite.ml.math.primitives.vector.Vector) DenseVector(org.apache.ignite.ml.math.primitives.vector.impl.DenseVector) DatasetHelper(org.apache.ignite.examples.ml.util.DatasetHelper)

Aggregations

DummyVectorizer (org.apache.ignite.ml.dataset.feature.extractor.impl.DummyVectorizer)23 Vector (org.apache.ignite.ml.math.primitives.vector.Vector)23 DenseVector (org.apache.ignite.ml.math.primitives.vector.impl.DenseVector)18 Ignite (org.apache.ignite.Ignite)13 HashMap (java.util.HashMap)10 Test (org.junit.Test)10 DatasetHelper (org.apache.ignite.examples.ml.util.DatasetHelper)7 HashSet (java.util.HashSet)6 SandboxMLCache (org.apache.ignite.examples.ml.util.SandboxMLCache)5 Serializable (java.io.Serializable)4 IgniteCache (org.apache.ignite.IgniteCache)4 OneHotEncoderPreprocessor (org.apache.ignite.ml.preprocessing.encoding.onehotencoder.OneHotEncoderPreprocessor)4 TrainerTest (org.apache.ignite.ml.common.TrainerTest)3 LocalDatasetBuilder (org.apache.ignite.ml.dataset.impl.local.LocalDatasetBuilder)3 Cache (javax.cache.Cache)2 Ignition (org.apache.ignite.Ignition)2 Vectorizer (org.apache.ignite.ml.dataset.feature.extractor.Vectorizer)2 UnknownCategorialValueException (org.apache.ignite.ml.math.exceptions.preprocessing.UnknownCategorialValueException)2 Preprocessor (org.apache.ignite.ml.preprocessing.Preprocessor)2 LinearRegressionLSQRTrainer (org.apache.ignite.ml.regressions.linear.LinearRegressionLSQRTrainer)2