use of org.apache.ignite.ml.preprocessing.developer.PatchedPreprocessor in project ignite by apache.
the class AlgorithmSpecificDatasetExample method main.
/**
* Run example.
*/
public static void main(String[] args) throws Exception {
try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) {
System.out.println(">>> Algorithm Specific Dataset example started.");
IgniteCache<Integer, Vector> persons = null;
try {
persons = createCache(ignite);
Vectorizer<Integer, Vector, Integer, Double> vectorizer = new DummyVectorizer<>(1);
IgniteFunction<LabeledVector<Double>, LabeledVector<double[]>> func = lv -> new LabeledVector<>(lv.features(), new double[] { lv.label() });
// NOTE: This class is part of Developer API and all lambdas should be loaded on server manually.
Preprocessor<Integer, Vector> preprocessor = new PatchedPreprocessor<>(func, vectorizer);
// Creates a algorithm specific dataset to perform linear regression. Here we define the way features and
// labels are extracted, and partition data and context are created.
SimpleLabeledDatasetDataBuilder<Integer, Vector, AlgorithmSpecificPartitionContext> builder = new SimpleLabeledDatasetDataBuilder<>(preprocessor);
IgniteBiFunction<SimpleLabeledDatasetData, AlgorithmSpecificPartitionContext, SimpleLabeledDatasetData> builderFun = (data, ctx) -> {
double[] features = data.getFeatures();
int rows = data.getRows();
// Makes a copy of features to supplement it by columns with values equal to 1.0.
double[] a = new double[features.length + rows];
Arrays.fill(a, 1.0);
System.arraycopy(features, 0, a, rows, features.length);
return new SimpleLabeledDatasetData(a, data.getLabels(), rows);
};
try (AlgorithmSpecificDataset dataset = DatasetFactory.create(ignite, persons, (env, upstream, upstreamSize) -> new AlgorithmSpecificPartitionContext(), builder.andThen(builderFun)).wrap(AlgorithmSpecificDataset::new)) {
// Trains linear regression model using gradient descent.
double[] linearRegressionMdl = new double[2];
for (int i = 0; i < 1000; i++) {
double[] gradient = dataset.gradient(linearRegressionMdl);
if (BLAS.getInstance().dnrm2(gradient.length, gradient, 1) < 1e-4)
break;
for (int j = 0; j < gradient.length; j++) linearRegressionMdl[j] -= 0.1 / persons.size() * gradient[j];
}
System.out.println("Linear Regression Model: " + Arrays.toString(linearRegressionMdl));
}
System.out.println(">>> Algorithm Specific Dataset example completed.");
} finally {
persons.destroy();
}
} finally {
System.out.flush();
}
}
use of org.apache.ignite.ml.preprocessing.developer.PatchedPreprocessor in project ignite by apache.
the class LinearRegressionSGDTrainer method updateModel.
/**
* {@inheritDoc}
*/
@Override
protected <K, V> LinearRegressionModel updateModel(LinearRegressionModel mdl, DatasetBuilder<K, V> datasetBuilder, Preprocessor<K, V> extractor) {
assert updatesStgy != null;
IgniteFunction<Dataset<EmptyContext, SimpleLabeledDatasetData>, MLPArchitecture> archSupplier = dataset -> {
int cols = dataset.compute(data -> {
if (data.getFeatures() == null)
return null;
return data.getFeatures().length / data.getRows();
}, (a, b) -> {
if (a == null)
return b == null ? 0 : b;
if (b == null)
return a;
return b;
});
MLPArchitecture architecture = new MLPArchitecture(cols);
architecture = architecture.withAddedLayer(1, true, Activators.LINEAR);
return architecture;
};
MLPTrainer<?> trainer = new MLPTrainer<>(archSupplier, LossFunctions.MSE, updatesStgy, maxIterations, batchSize, locIterations, seed);
IgniteFunction<LabeledVector<Double>, LabeledVector<double[]>> func = lv -> new LabeledVector<>(lv.features(), new double[] { lv.label() });
PatchedPreprocessor<K, V, Double, double[]> patchedPreprocessor = new PatchedPreprocessor<>(func, extractor);
MultilayerPerceptron mlp = Optional.ofNullable(mdl).map(this::restoreMLPState).map(m -> trainer.update(m, datasetBuilder, patchedPreprocessor)).orElseGet(() -> trainer.fit(datasetBuilder, patchedPreprocessor));
double[] p = mlp.parameters().getStorage().data();
return new LinearRegressionModel(new DenseVector(Arrays.copyOf(p, p.length - 1)), p[p.length - 1]);
}
use of org.apache.ignite.ml.preprocessing.developer.PatchedPreprocessor in project ignite by apache.
the class LinearRegressionLSQRTrainer method updateModel.
/**
* {@inheritDoc}
*/
@Override
protected <K, V> LinearRegressionModel updateModel(LinearRegressionModel mdl, DatasetBuilder<K, V> datasetBuilder, Preprocessor<K, V> extractor) {
LSQRResult res;
PatchedPreprocessor<K, V, Double, double[]> patchedPreprocessor = new PatchedPreprocessor<>(LinearRegressionLSQRTrainer::extendLabeledVector, extractor);
try (LSQROnHeap<K, V> lsqr = new LSQROnHeap<>(datasetBuilder, envBuilder, new SimpleLabeledDatasetDataBuilder<>(patchedPreprocessor), learningEnvironment())) {
double[] x0 = null;
if (mdl != null) {
int x0Size = mdl.weights().size() + 1;
Vector weights = mdl.weights().like(x0Size);
mdl.weights().nonZeroes().forEach(ith -> weights.set(ith.index(), ith.get()));
weights.set(weights.size() - 1, mdl.intercept());
x0 = weights.asArray();
}
res = lsqr.solve(0, 1e-12, 1e-12, 1e8, -1, false, x0);
if (res == null)
return getLastTrainedModelOrThrowEmptyDatasetException(mdl);
} catch (Exception e) {
throw new RuntimeException(e);
}
double[] x = res.getX();
Vector weights = new DenseVector(Arrays.copyOfRange(x, 0, x.length - 1));
return new LinearRegressionModel(weights, x[x.length - 1]);
}
use of org.apache.ignite.ml.preprocessing.developer.PatchedPreprocessor in project ignite by apache.
the class LogisticRegressionSGDTrainer method updateModel.
/**
* {@inheritDoc}
*/
@Override
protected <K, V> LogisticRegressionModel updateModel(LogisticRegressionModel mdl, DatasetBuilder<K, V> datasetBuilder, Preprocessor<K, V> extractor) {
IgniteFunction<Dataset<EmptyContext, SimpleLabeledDatasetData>, MLPArchitecture> archSupplier = dataset -> {
Integer cols = dataset.compute(data -> {
if (data.getFeatures() == null)
return null;
return data.getFeatures().length / data.getRows();
}, (a, b) -> {
// If both are null then zero will be propagated, no good.
if (a == null)
return b;
return a;
});
if (cols == null)
throw new IllegalStateException("Cannot train on empty dataset");
MLPArchitecture architecture = new MLPArchitecture(cols);
architecture = architecture.withAddedLayer(1, true, Activators.SIGMOID);
return architecture;
};
MLPTrainer<?> trainer = new MLPTrainer<>(archSupplier, LossFunctions.L2, updatesStgy, maxIterations, batchSize, locIterations, seed).withEnvironmentBuilder(envBuilder);
MultilayerPerceptron mlp;
IgniteFunction<LabeledVector<Double>, LabeledVector<double[]>> func = lv -> new LabeledVector<>(lv.features(), new double[] { lv.label() });
PatchedPreprocessor<K, V, Double, double[]> patchedPreprocessor = new PatchedPreprocessor<>(func, extractor);
if (mdl != null) {
mlp = restoreMLPState(mdl);
mlp = trainer.update(mlp, datasetBuilder, patchedPreprocessor);
} else
mlp = trainer.fit(datasetBuilder, patchedPreprocessor);
double[] params = mlp.parameters().getStorage().data();
return new LogisticRegressionModel(new DenseVector(Arrays.copyOf(params, params.length - 1)), params[params.length - 1]);
}
use of org.apache.ignite.ml.preprocessing.developer.PatchedPreprocessor in project ignite by apache.
the class Deltas method updateModel.
/**
* {@inheritDoc}
*/
@Override
protected <K, V> SVMLinearClassificationModel updateModel(SVMLinearClassificationModel mdl, DatasetBuilder<K, V> datasetBuilder, Preprocessor<K, V> preprocessor) {
assert datasetBuilder != null;
IgniteFunction<Double, Double> lbTransformer = lb -> {
if (lb == 0.0)
return -1.0;
else
return lb;
};
IgniteFunction<LabeledVector<Double>, LabeledVector<Double>> func = lv -> new LabeledVector<>(lv.features(), lbTransformer.apply(lv.label()));
PatchedPreprocessor<K, V, Double, Double> patchedPreprocessor = new PatchedPreprocessor<>(func, preprocessor);
PartitionDataBuilder<K, V, EmptyContext, LabeledVectorSet<LabeledVector>> partDataBuilder = new LabeledDatasetPartitionDataBuilderOnHeap<>(patchedPreprocessor);
Vector weights;
try (Dataset<EmptyContext, LabeledVectorSet<LabeledVector>> dataset = datasetBuilder.build(envBuilder, (env, upstream, upstreamSize) -> new EmptyContext(), partDataBuilder, learningEnvironment())) {
if (mdl == null) {
final int cols = dataset.compute(org.apache.ignite.ml.structures.Dataset::colSize, (a, b) -> {
if (a == null)
return b == null ? 0 : b;
if (b == null)
return a;
return b;
});
final int weightVectorSizeWithIntercept = cols + 1;
weights = initializeWeightsWithZeros(weightVectorSizeWithIntercept);
} else
weights = getStateVector(mdl);
for (int i = 0; i < this.getAmountOfIterations(); i++) {
Vector deltaWeights = calculateUpdates(weights, dataset);
if (deltaWeights == null)
return getLastTrainedModelOrThrowEmptyDatasetException(mdl);
// creates new vector
weights = weights.plus(deltaWeights);
}
} catch (Exception e) {
throw new RuntimeException(e);
}
return new SVMLinearClassificationModel(weights.copyOfRange(1, weights.size()), weights.get(0));
}
Aggregations