Search in sources :

Example 1 with PatchedPreprocessor

use of org.apache.ignite.ml.preprocessing.developer.PatchedPreprocessor in project ignite by apache.

the class AlgorithmSpecificDatasetExample method main.

/**
 * Run example.
 */
public static void main(String[] args) throws Exception {
    try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) {
        System.out.println(">>> Algorithm Specific Dataset example started.");
        IgniteCache<Integer, Vector> persons = null;
        try {
            persons = createCache(ignite);
            Vectorizer<Integer, Vector, Integer, Double> vectorizer = new DummyVectorizer<>(1);
            IgniteFunction<LabeledVector<Double>, LabeledVector<double[]>> func = lv -> new LabeledVector<>(lv.features(), new double[] { lv.label() });
            // NOTE: This class is part of Developer API and all lambdas should be loaded on server manually.
            Preprocessor<Integer, Vector> preprocessor = new PatchedPreprocessor<>(func, vectorizer);
            // Creates a algorithm specific dataset to perform linear regression. Here we define the way features and
            // labels are extracted, and partition data and context are created.
            SimpleLabeledDatasetDataBuilder<Integer, Vector, AlgorithmSpecificPartitionContext> builder = new SimpleLabeledDatasetDataBuilder<>(preprocessor);
            IgniteBiFunction<SimpleLabeledDatasetData, AlgorithmSpecificPartitionContext, SimpleLabeledDatasetData> builderFun = (data, ctx) -> {
                double[] features = data.getFeatures();
                int rows = data.getRows();
                // Makes a copy of features to supplement it by columns with values equal to 1.0.
                double[] a = new double[features.length + rows];
                Arrays.fill(a, 1.0);
                System.arraycopy(features, 0, a, rows, features.length);
                return new SimpleLabeledDatasetData(a, data.getLabels(), rows);
            };
            try (AlgorithmSpecificDataset dataset = DatasetFactory.create(ignite, persons, (env, upstream, upstreamSize) -> new AlgorithmSpecificPartitionContext(), builder.andThen(builderFun)).wrap(AlgorithmSpecificDataset::new)) {
                // Trains linear regression model using gradient descent.
                double[] linearRegressionMdl = new double[2];
                for (int i = 0; i < 1000; i++) {
                    double[] gradient = dataset.gradient(linearRegressionMdl);
                    if (BLAS.getInstance().dnrm2(gradient.length, gradient, 1) < 1e-4)
                        break;
                    for (int j = 0; j < gradient.length; j++) linearRegressionMdl[j] -= 0.1 / persons.size() * gradient[j];
                }
                System.out.println("Linear Regression Model: " + Arrays.toString(linearRegressionMdl));
            }
            System.out.println(">>> Algorithm Specific Dataset example completed.");
        } finally {
            persons.destroy();
        }
    } finally {
        System.out.flush();
    }
}
Also used : Arrays(java.util.Arrays) BLAS(com.github.fommil.netlib.BLAS) SimpleLabeledDatasetData(org.apache.ignite.ml.dataset.primitive.data.SimpleLabeledDatasetData) IgniteFunction(org.apache.ignite.ml.math.functions.IgniteFunction) Vector(org.apache.ignite.ml.math.primitives.vector.Vector) SimpleLabeledDatasetDataBuilder(org.apache.ignite.ml.dataset.primitive.builder.data.SimpleLabeledDatasetDataBuilder) Preprocessor(org.apache.ignite.ml.preprocessing.Preprocessor) Ignite(org.apache.ignite.Ignite) IgniteCache(org.apache.ignite.IgniteCache) RendezvousAffinityFunction(org.apache.ignite.cache.affinity.rendezvous.RendezvousAffinityFunction) DummyVectorizer(org.apache.ignite.ml.dataset.feature.extractor.impl.DummyVectorizer) Serializable(java.io.Serializable) Ignition(org.apache.ignite.Ignition) LabeledVector(org.apache.ignite.ml.structures.LabeledVector) DatasetFactory(org.apache.ignite.ml.dataset.DatasetFactory) IgniteBiFunction(org.apache.ignite.ml.math.functions.IgniteBiFunction) CacheConfiguration(org.apache.ignite.configuration.CacheConfiguration) Dataset(org.apache.ignite.ml.dataset.Dataset) PatchedPreprocessor(org.apache.ignite.ml.preprocessing.developer.PatchedPreprocessor) DatasetWrapper(org.apache.ignite.ml.dataset.primitive.DatasetWrapper) DenseVector(org.apache.ignite.ml.math.primitives.vector.impl.DenseVector) Vectorizer(org.apache.ignite.ml.dataset.feature.extractor.Vectorizer) SimpleLabeledDatasetData(org.apache.ignite.ml.dataset.primitive.data.SimpleLabeledDatasetData) SimpleLabeledDatasetDataBuilder(org.apache.ignite.ml.dataset.primitive.builder.data.SimpleLabeledDatasetDataBuilder) DummyVectorizer(org.apache.ignite.ml.dataset.feature.extractor.impl.DummyVectorizer) LabeledVector(org.apache.ignite.ml.structures.LabeledVector) Ignite(org.apache.ignite.Ignite) PatchedPreprocessor(org.apache.ignite.ml.preprocessing.developer.PatchedPreprocessor) Vector(org.apache.ignite.ml.math.primitives.vector.Vector) LabeledVector(org.apache.ignite.ml.structures.LabeledVector) DenseVector(org.apache.ignite.ml.math.primitives.vector.impl.DenseVector)

Example 2 with PatchedPreprocessor

use of org.apache.ignite.ml.preprocessing.developer.PatchedPreprocessor in project ignite by apache.

the class LinearRegressionSGDTrainer method updateModel.

/**
 * {@inheritDoc}
 */
@Override
protected <K, V> LinearRegressionModel updateModel(LinearRegressionModel mdl, DatasetBuilder<K, V> datasetBuilder, Preprocessor<K, V> extractor) {
    assert updatesStgy != null;
    IgniteFunction<Dataset<EmptyContext, SimpleLabeledDatasetData>, MLPArchitecture> archSupplier = dataset -> {
        int cols = dataset.compute(data -> {
            if (data.getFeatures() == null)
                return null;
            return data.getFeatures().length / data.getRows();
        }, (a, b) -> {
            if (a == null)
                return b == null ? 0 : b;
            if (b == null)
                return a;
            return b;
        });
        MLPArchitecture architecture = new MLPArchitecture(cols);
        architecture = architecture.withAddedLayer(1, true, Activators.LINEAR);
        return architecture;
    };
    MLPTrainer<?> trainer = new MLPTrainer<>(archSupplier, LossFunctions.MSE, updatesStgy, maxIterations, batchSize, locIterations, seed);
    IgniteFunction<LabeledVector<Double>, LabeledVector<double[]>> func = lv -> new LabeledVector<>(lv.features(), new double[] { lv.label() });
    PatchedPreprocessor<K, V, Double, double[]> patchedPreprocessor = new PatchedPreprocessor<>(func, extractor);
    MultilayerPerceptron mlp = Optional.ofNullable(mdl).map(this::restoreMLPState).map(m -> trainer.update(m, datasetBuilder, patchedPreprocessor)).orElseGet(() -> trainer.fit(datasetBuilder, patchedPreprocessor));
    double[] p = mlp.parameters().getStorage().data();
    return new LinearRegressionModel(new DenseVector(Arrays.copyOf(p, p.length - 1)), p[p.length - 1]);
}
Also used : Arrays(java.util.Arrays) Activators(org.apache.ignite.ml.nn.Activators) UpdatesStrategy(org.apache.ignite.ml.nn.UpdatesStrategy) SimpleLabeledDatasetData(org.apache.ignite.ml.dataset.primitive.data.SimpleLabeledDatasetData) IgniteFunction(org.apache.ignite.ml.math.functions.IgniteFunction) Vector(org.apache.ignite.ml.math.primitives.vector.Vector) Preprocessor(org.apache.ignite.ml.preprocessing.Preprocessor) DatasetBuilder(org.apache.ignite.ml.dataset.DatasetBuilder) MLPArchitecture(org.apache.ignite.ml.nn.architecture.MLPArchitecture) Serializable(java.io.Serializable) LabeledVector(org.apache.ignite.ml.structures.LabeledVector) Dataset(org.apache.ignite.ml.dataset.Dataset) SingleLabelDatasetTrainer(org.apache.ignite.ml.trainers.SingleLabelDatasetTrainer) Optional(java.util.Optional) LossFunctions(org.apache.ignite.ml.optimization.LossFunctions) MultilayerPerceptron(org.apache.ignite.ml.nn.MultilayerPerceptron) PatchedPreprocessor(org.apache.ignite.ml.preprocessing.developer.PatchedPreprocessor) NotNull(org.jetbrains.annotations.NotNull) DenseVector(org.apache.ignite.ml.math.primitives.vector.impl.DenseVector) EmptyContext(org.apache.ignite.ml.dataset.primitive.context.EmptyContext) MLPTrainer(org.apache.ignite.ml.nn.MLPTrainer) MLPArchitecture(org.apache.ignite.ml.nn.architecture.MLPArchitecture) Dataset(org.apache.ignite.ml.dataset.Dataset) MLPTrainer(org.apache.ignite.ml.nn.MLPTrainer) LabeledVector(org.apache.ignite.ml.structures.LabeledVector) MultilayerPerceptron(org.apache.ignite.ml.nn.MultilayerPerceptron) PatchedPreprocessor(org.apache.ignite.ml.preprocessing.developer.PatchedPreprocessor) DenseVector(org.apache.ignite.ml.math.primitives.vector.impl.DenseVector)

Example 3 with PatchedPreprocessor

use of org.apache.ignite.ml.preprocessing.developer.PatchedPreprocessor in project ignite by apache.

the class LinearRegressionLSQRTrainer method updateModel.

/**
 * {@inheritDoc}
 */
@Override
protected <K, V> LinearRegressionModel updateModel(LinearRegressionModel mdl, DatasetBuilder<K, V> datasetBuilder, Preprocessor<K, V> extractor) {
    LSQRResult res;
    PatchedPreprocessor<K, V, Double, double[]> patchedPreprocessor = new PatchedPreprocessor<>(LinearRegressionLSQRTrainer::extendLabeledVector, extractor);
    try (LSQROnHeap<K, V> lsqr = new LSQROnHeap<>(datasetBuilder, envBuilder, new SimpleLabeledDatasetDataBuilder<>(patchedPreprocessor), learningEnvironment())) {
        double[] x0 = null;
        if (mdl != null) {
            int x0Size = mdl.weights().size() + 1;
            Vector weights = mdl.weights().like(x0Size);
            mdl.weights().nonZeroes().forEach(ith -> weights.set(ith.index(), ith.get()));
            weights.set(weights.size() - 1, mdl.intercept());
            x0 = weights.asArray();
        }
        res = lsqr.solve(0, 1e-12, 1e-12, 1e8, -1, false, x0);
        if (res == null)
            return getLastTrainedModelOrThrowEmptyDatasetException(mdl);
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
    double[] x = res.getX();
    Vector weights = new DenseVector(Arrays.copyOfRange(x, 0, x.length - 1));
    return new LinearRegressionModel(weights, x[x.length - 1]);
}
Also used : LSQROnHeap(org.apache.ignite.ml.math.isolve.lsqr.LSQROnHeap) LSQRResult(org.apache.ignite.ml.math.isolve.lsqr.LSQRResult) PatchedPreprocessor(org.apache.ignite.ml.preprocessing.developer.PatchedPreprocessor) Vector(org.apache.ignite.ml.math.primitives.vector.Vector) LabeledVector(org.apache.ignite.ml.structures.LabeledVector) DenseVector(org.apache.ignite.ml.math.primitives.vector.impl.DenseVector) DenseVector(org.apache.ignite.ml.math.primitives.vector.impl.DenseVector)

Example 4 with PatchedPreprocessor

use of org.apache.ignite.ml.preprocessing.developer.PatchedPreprocessor in project ignite by apache.

the class LogisticRegressionSGDTrainer method updateModel.

/**
 * {@inheritDoc}
 */
@Override
protected <K, V> LogisticRegressionModel updateModel(LogisticRegressionModel mdl, DatasetBuilder<K, V> datasetBuilder, Preprocessor<K, V> extractor) {
    IgniteFunction<Dataset<EmptyContext, SimpleLabeledDatasetData>, MLPArchitecture> archSupplier = dataset -> {
        Integer cols = dataset.compute(data -> {
            if (data.getFeatures() == null)
                return null;
            return data.getFeatures().length / data.getRows();
        }, (a, b) -> {
            // If both are null then zero will be propagated, no good.
            if (a == null)
                return b;
            return a;
        });
        if (cols == null)
            throw new IllegalStateException("Cannot train on empty dataset");
        MLPArchitecture architecture = new MLPArchitecture(cols);
        architecture = architecture.withAddedLayer(1, true, Activators.SIGMOID);
        return architecture;
    };
    MLPTrainer<?> trainer = new MLPTrainer<>(archSupplier, LossFunctions.L2, updatesStgy, maxIterations, batchSize, locIterations, seed).withEnvironmentBuilder(envBuilder);
    MultilayerPerceptron mlp;
    IgniteFunction<LabeledVector<Double>, LabeledVector<double[]>> func = lv -> new LabeledVector<>(lv.features(), new double[] { lv.label() });
    PatchedPreprocessor<K, V, Double, double[]> patchedPreprocessor = new PatchedPreprocessor<>(func, extractor);
    if (mdl != null) {
        mlp = restoreMLPState(mdl);
        mlp = trainer.update(mlp, datasetBuilder, patchedPreprocessor);
    } else
        mlp = trainer.fit(datasetBuilder, patchedPreprocessor);
    double[] params = mlp.parameters().getStorage().data();
    return new LogisticRegressionModel(new DenseVector(Arrays.copyOf(params, params.length - 1)), params[params.length - 1]);
}
Also used : SimpleGDUpdateCalculator(org.apache.ignite.ml.optimization.updatecalculators.SimpleGDUpdateCalculator) Arrays(java.util.Arrays) Activators(org.apache.ignite.ml.nn.Activators) SimpleGDParameterUpdate(org.apache.ignite.ml.optimization.updatecalculators.SimpleGDParameterUpdate) UpdatesStrategy(org.apache.ignite.ml.nn.UpdatesStrategy) SimpleLabeledDatasetData(org.apache.ignite.ml.dataset.primitive.data.SimpleLabeledDatasetData) IgniteFunction(org.apache.ignite.ml.math.functions.IgniteFunction) Vector(org.apache.ignite.ml.math.primitives.vector.Vector) Preprocessor(org.apache.ignite.ml.preprocessing.Preprocessor) DatasetBuilder(org.apache.ignite.ml.dataset.DatasetBuilder) MLPArchitecture(org.apache.ignite.ml.nn.architecture.MLPArchitecture) LabeledVector(org.apache.ignite.ml.structures.LabeledVector) Dataset(org.apache.ignite.ml.dataset.Dataset) SingleLabelDatasetTrainer(org.apache.ignite.ml.trainers.SingleLabelDatasetTrainer) LossFunctions(org.apache.ignite.ml.optimization.LossFunctions) MultilayerPerceptron(org.apache.ignite.ml.nn.MultilayerPerceptron) PatchedPreprocessor(org.apache.ignite.ml.preprocessing.developer.PatchedPreprocessor) NotNull(org.jetbrains.annotations.NotNull) DenseVector(org.apache.ignite.ml.math.primitives.vector.impl.DenseVector) EmptyContext(org.apache.ignite.ml.dataset.primitive.context.EmptyContext) MLPTrainer(org.apache.ignite.ml.nn.MLPTrainer) MLPArchitecture(org.apache.ignite.ml.nn.architecture.MLPArchitecture) Dataset(org.apache.ignite.ml.dataset.Dataset) LabeledVector(org.apache.ignite.ml.structures.LabeledVector) MultilayerPerceptron(org.apache.ignite.ml.nn.MultilayerPerceptron) PatchedPreprocessor(org.apache.ignite.ml.preprocessing.developer.PatchedPreprocessor) DenseVector(org.apache.ignite.ml.math.primitives.vector.impl.DenseVector)

Example 5 with PatchedPreprocessor

use of org.apache.ignite.ml.preprocessing.developer.PatchedPreprocessor in project ignite by apache.

the class Deltas method updateModel.

/**
 * {@inheritDoc}
 */
@Override
protected <K, V> SVMLinearClassificationModel updateModel(SVMLinearClassificationModel mdl, DatasetBuilder<K, V> datasetBuilder, Preprocessor<K, V> preprocessor) {
    assert datasetBuilder != null;
    IgniteFunction<Double, Double> lbTransformer = lb -> {
        if (lb == 0.0)
            return -1.0;
        else
            return lb;
    };
    IgniteFunction<LabeledVector<Double>, LabeledVector<Double>> func = lv -> new LabeledVector<>(lv.features(), lbTransformer.apply(lv.label()));
    PatchedPreprocessor<K, V, Double, Double> patchedPreprocessor = new PatchedPreprocessor<>(func, preprocessor);
    PartitionDataBuilder<K, V, EmptyContext, LabeledVectorSet<LabeledVector>> partDataBuilder = new LabeledDatasetPartitionDataBuilderOnHeap<>(patchedPreprocessor);
    Vector weights;
    try (Dataset<EmptyContext, LabeledVectorSet<LabeledVector>> dataset = datasetBuilder.build(envBuilder, (env, upstream, upstreamSize) -> new EmptyContext(), partDataBuilder, learningEnvironment())) {
        if (mdl == null) {
            final int cols = dataset.compute(org.apache.ignite.ml.structures.Dataset::colSize, (a, b) -> {
                if (a == null)
                    return b == null ? 0 : b;
                if (b == null)
                    return a;
                return b;
            });
            final int weightVectorSizeWithIntercept = cols + 1;
            weights = initializeWeightsWithZeros(weightVectorSizeWithIntercept);
        } else
            weights = getStateVector(mdl);
        for (int i = 0; i < this.getAmountOfIterations(); i++) {
            Vector deltaWeights = calculateUpdates(weights, dataset);
            if (deltaWeights == null)
                return getLastTrainedModelOrThrowEmptyDatasetException(mdl);
            // creates new vector
            weights = weights.plus(deltaWeights);
        }
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
    return new SVMLinearClassificationModel(weights.copyOfRange(1, weights.size()), weights.get(0));
}
Also used : IgniteFunction(org.apache.ignite.ml.math.functions.IgniteFunction) Vector(org.apache.ignite.ml.math.primitives.vector.Vector) Preprocessor(org.apache.ignite.ml.preprocessing.Preprocessor) Random(java.util.Random) DatasetBuilder(org.apache.ignite.ml.dataset.DatasetBuilder) SparseVector(org.apache.ignite.ml.math.primitives.vector.impl.SparseVector) LabeledVector(org.apache.ignite.ml.structures.LabeledVector) Dataset(org.apache.ignite.ml.dataset.Dataset) SingleLabelDatasetTrainer(org.apache.ignite.ml.trainers.SingleLabelDatasetTrainer) LabeledVectorSet(org.apache.ignite.ml.structures.LabeledVectorSet) PatchedPreprocessor(org.apache.ignite.ml.preprocessing.developer.PatchedPreprocessor) PartitionDataBuilder(org.apache.ignite.ml.dataset.PartitionDataBuilder) NotNull(org.jetbrains.annotations.NotNull) DenseVector(org.apache.ignite.ml.math.primitives.vector.impl.DenseVector) UpstreamEntry(org.apache.ignite.ml.dataset.UpstreamEntry) EmptyContext(org.apache.ignite.ml.dataset.primitive.context.EmptyContext) LabeledDatasetPartitionDataBuilderOnHeap(org.apache.ignite.ml.structures.partition.LabeledDatasetPartitionDataBuilderOnHeap) EmptyContext(org.apache.ignite.ml.dataset.primitive.context.EmptyContext) LabeledDatasetPartitionDataBuilderOnHeap(org.apache.ignite.ml.structures.partition.LabeledDatasetPartitionDataBuilderOnHeap) Dataset(org.apache.ignite.ml.dataset.Dataset) LabeledVector(org.apache.ignite.ml.structures.LabeledVector) LabeledVectorSet(org.apache.ignite.ml.structures.LabeledVectorSet) PatchedPreprocessor(org.apache.ignite.ml.preprocessing.developer.PatchedPreprocessor) Vector(org.apache.ignite.ml.math.primitives.vector.Vector) SparseVector(org.apache.ignite.ml.math.primitives.vector.impl.SparseVector) LabeledVector(org.apache.ignite.ml.structures.LabeledVector) DenseVector(org.apache.ignite.ml.math.primitives.vector.impl.DenseVector)

Aggregations

Vector (org.apache.ignite.ml.math.primitives.vector.Vector)5 DenseVector (org.apache.ignite.ml.math.primitives.vector.impl.DenseVector)5 PatchedPreprocessor (org.apache.ignite.ml.preprocessing.developer.PatchedPreprocessor)5 LabeledVector (org.apache.ignite.ml.structures.LabeledVector)5 Dataset (org.apache.ignite.ml.dataset.Dataset)4 IgniteFunction (org.apache.ignite.ml.math.functions.IgniteFunction)4 Preprocessor (org.apache.ignite.ml.preprocessing.Preprocessor)4 Arrays (java.util.Arrays)3 DatasetBuilder (org.apache.ignite.ml.dataset.DatasetBuilder)3 EmptyContext (org.apache.ignite.ml.dataset.primitive.context.EmptyContext)3 SimpleLabeledDatasetData (org.apache.ignite.ml.dataset.primitive.data.SimpleLabeledDatasetData)3 SingleLabelDatasetTrainer (org.apache.ignite.ml.trainers.SingleLabelDatasetTrainer)3 NotNull (org.jetbrains.annotations.NotNull)3 Serializable (java.io.Serializable)2 Activators (org.apache.ignite.ml.nn.Activators)2 MLPTrainer (org.apache.ignite.ml.nn.MLPTrainer)2 MultilayerPerceptron (org.apache.ignite.ml.nn.MultilayerPerceptron)2 UpdatesStrategy (org.apache.ignite.ml.nn.UpdatesStrategy)2 MLPArchitecture (org.apache.ignite.ml.nn.architecture.MLPArchitecture)2 LossFunctions (org.apache.ignite.ml.optimization.LossFunctions)2