use of org.apache.ignite.examples.ml.util.SandboxMLCache in project ignite by apache.
the class RandomForestRegressionExample method main.
/**
* Run example.
*/
public static void main(String[] args) throws IOException {
System.out.println();
System.out.println(">>> Random Forest regression algorithm over cached dataset usage example started.");
// Start ignite grid.
try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) {
System.out.println(">>> Ignite grid started.");
IgniteCache<Integer, Vector> dataCache = null;
try {
dataCache = new SandboxMLCache(ignite).fillCacheWith(MLSandboxDatasets.BOSTON_HOUSE_PRICES);
AtomicInteger idx = new AtomicInteger(0);
RandomForestRegressionTrainer trainer = new RandomForestRegressionTrainer(IntStream.range(0, dataCache.get(1).size() - 1).mapToObj(x -> new FeatureMeta("", idx.getAndIncrement(), false)).collect(Collectors.toList())).withAmountOfTrees(101).withFeaturesCountSelectionStrgy(FeaturesCountSelectionStrategies.ONE_THIRD).withMaxDepth(4).withMinImpurityDelta(0.).withSubSampleSize(0.3).withSeed(0);
trainer.withEnvironmentBuilder(LearningEnvironmentBuilder.defaultBuilder().withParallelismStrategyTypeDependency(ParallelismStrategy.ON_DEFAULT_POOL).withLoggingFactoryDependency(ConsoleLogger.Factory.LOW));
System.out.println(">>> Configured trainer: " + trainer.getClass().getSimpleName());
Vectorizer<Integer, Vector, Integer, Double> vectorizer = new DummyVectorizer<Integer>().labeled(Vectorizer.LabelCoordinate.FIRST);
ModelsComposition randomForestMdl = trainer.fit(ignite, dataCache, vectorizer);
System.out.println(">>> Trained model: " + randomForestMdl.toString(true));
double mse = 0.0;
double mae = 0.0;
int totalAmount = 0;
try (QueryCursor<Cache.Entry<Integer, Vector>> observations = dataCache.query(new ScanQuery<>())) {
for (Cache.Entry<Integer, Vector> observation : observations) {
Vector val = observation.getValue();
Vector inputs = val.copyOfRange(1, val.size());
double groundTruth = val.get(0);
double prediction = randomForestMdl.predict(inputs);
mse += Math.pow(prediction - groundTruth, 2.0);
mae += Math.abs(prediction - groundTruth);
totalAmount++;
}
System.out.println("\n>>> Evaluated model on " + totalAmount + " data points.");
mse /= totalAmount;
System.out.println("\n>>> Mean squared error (MSE) " + mse);
mae /= totalAmount;
System.out.println("\n>>> Mean absolute error (MAE) " + mae);
System.out.println(">>> Random Forest regression algorithm over cached dataset usage example completed.");
}
} finally {
if (dataCache != null)
dataCache.destroy();
}
} finally {
System.out.flush();
}
}
use of org.apache.ignite.examples.ml.util.SandboxMLCache in project ignite by apache.
the class MovieLensSQLExample method loadMovieLensDataset.
/**
* Loads MovieLens dataset into cache.
*
* @param ignite Ignite instance.
* @param cnt Number of rating point to be loaded.
* @throws IOException If dataset not found.
*/
private static void loadMovieLensDataset(Ignite ignite, IgniteCache<?, ?> cache, int cnt) throws IOException {
SqlFieldsQuery qry = new SqlFieldsQuery("insert into ratings (rating_id, movie_id, user_id, rating) values (?, ?, ?, ?)");
int seq = 0;
for (String s : new SandboxMLCache(ignite).loadDataset(MLSandboxDatasets.MOVIELENS)) {
String[] line = s.split(",");
int userId = Integer.valueOf(line[0]);
int movieId = Integer.valueOf(line[1]);
double rating = Double.valueOf(line[2]);
qry.setArgs(seq++, movieId, userId, rating);
cache.query(qry);
if (seq == cnt)
break;
}
}
use of org.apache.ignite.examples.ml.util.SandboxMLCache in project ignite by apache.
the class LinearRegressionLSQRTrainerExample method main.
/**
* Run example.
*/
public static void main(String[] args) throws IOException {
System.out.println();
System.out.println(">>> Linear regression model over cache based dataset usage example started.");
// Start ignite grid.
try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) {
System.out.println(">>> Ignite grid started.");
IgniteCache<Integer, Vector> dataCache = null;
try {
dataCache = new SandboxMLCache(ignite).fillCacheWith(MLSandboxDatasets.MORTALITY_DATA);
System.out.println(">>> Create new linear regression trainer object.");
LinearRegressionLSQRTrainer trainer = new LinearRegressionLSQRTrainer();
System.out.println(">>> Perform the training to get the model.");
// This object is used to extract features and vectors from upstream entities which are
// essentially tuples of the form (key, value) (in our case (Integer, Vector)).
// Key part of tuple in our example is ignored.
// Label is extracted from 0th entry of the value (which is a Vector)
// and features are all remaining vector part. Alternatively we could use
// DatasetTrainer#fit(Ignite, IgniteCache, IgniteBiFunction, IgniteBiFunction) method call
// where there is a separate lambda for extracting label from (key, value) and a separate labmda for
// extracting features.
LinearRegressionModel mdl = trainer.fit(ignite, dataCache, new DummyVectorizer<Integer>().labeled(Vectorizer.LabelCoordinate.FIRST));
double rmse = Evaluator.evaluate(dataCache, mdl, new DummyVectorizer<Integer>().labeled(Vectorizer.LabelCoordinate.FIRST), MetricName.RMSE);
System.out.println("\n>>> Rmse = " + rmse);
System.out.println(">>> Linear regression model over cache based dataset usage example completed.");
} finally {
if (dataCache != null)
dataCache.destroy();
}
} finally {
System.out.flush();
}
}
use of org.apache.ignite.examples.ml.util.SandboxMLCache in project ignite by apache.
the class LinearRegressionSGDTrainerExample method main.
/**
* Run example.
*/
public static void main(String[] args) throws IOException {
System.out.println();
System.out.println(">>> Linear regression model over sparse distributed matrix API usage example started.");
// Start ignite grid.
try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) {
System.out.println(">>> Ignite grid started.");
IgniteCache<Integer, Vector> dataCache = null;
try {
dataCache = new SandboxMLCache(ignite).fillCacheWith(MLSandboxDatasets.MORTALITY_DATA);
System.out.println(">>> Create new linear regression trainer object.");
LinearRegressionSGDTrainer<?> trainer = new LinearRegressionSGDTrainer<>(new UpdatesStrategy<>(new RPropUpdateCalculator(), RPropParameterUpdate.SUM_LOCAL, RPropParameterUpdate.AVG), 100000, 10, 100, 123L);
System.out.println(">>> Perform the training to get the model.");
Vectorizer<Integer, Vector, Integer, Double> vectorizer = new DummyVectorizer<Integer>().labeled(Vectorizer.LabelCoordinate.FIRST);
LinearRegressionModel mdl = trainer.fit(ignite, dataCache, vectorizer);
System.out.println(">>> Linear regression model: " + mdl);
double rmse = Evaluator.evaluate(dataCache, mdl, vectorizer, MetricName.RMSE);
System.out.println("\n>>> Rmse = " + rmse);
System.out.println(">>> ---------------------------------");
System.out.println(">>> Linear regression model over cache based dataset usage example completed.");
} finally {
if (dataCache != null)
dataCache.destroy();
}
} finally {
System.out.flush();
}
}
use of org.apache.ignite.examples.ml.util.SandboxMLCache in project ignite by apache.
the class BaggedLogisticRegressionSGDTrainerExample method main.
/**
* Run example.
*/
public static void main(String[] args) throws IOException {
System.out.println();
System.out.println(">>> Logistic regression model over partitioned dataset usage example started.");
// Start ignite grid.
try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) {
System.out.println(">>> Ignite grid started.");
IgniteCache<Integer, Vector> dataCache = null;
try {
dataCache = new SandboxMLCache(ignite).fillCacheWith(MLSandboxDatasets.TWO_CLASSED_IRIS);
System.out.println(">>> Create new logistic regression trainer object.");
LogisticRegressionSGDTrainer trainer = new LogisticRegressionSGDTrainer().withUpdatesStgy(new UpdatesStrategy<>(new SimpleGDUpdateCalculator(0.2), SimpleGDParameterUpdate.SUM_LOCAL, SimpleGDParameterUpdate.AVG)).withMaxIterations(100).withLocIterations(10).withBatchSize(10).withSeed(123L);
System.out.println(">>> Perform the training to get the model.");
BaggedTrainer<Double> baggedTrainer = TrainerTransformers.makeBagged(trainer, 10, 0.6, 4, 3, new OnMajorityPredictionsAggregator()).withEnvironmentBuilder(LearningEnvironmentBuilder.defaultBuilder().withRNGSeed(1));
System.out.println(">>> Perform evaluation of the model.");
Vectorizer<Integer, Vector, Integer, Double> vectorizer = new DummyVectorizer<Integer>().labeled(Vectorizer.LabelCoordinate.FIRST);
double accuracy = Evaluator.evaluate(dataCache, baggedTrainer.fit(ignite, dataCache, vectorizer), vectorizer, MetricName.ACCURACY);
System.out.println(">>> ---------------------------------");
System.out.println("\n>>> Accuracy " + accuracy);
System.out.println(">>> Bagged logistic regression model over partitioned dataset usage example completed.");
} finally {
if (dataCache != null)
dataCache.destroy();
}
} finally {
System.out.flush();
}
}
Aggregations