use of org.apache.ignite.examples.ml.util.SandboxMLCache in project ignite by apache.
the class OneVsRestClassificationExample method main.
/**
* Run example.
*/
public static void main(String[] args) throws IOException {
System.out.println();
System.out.println(">>> One-vs-Rest SVM Multi-class classification model over cached dataset usage example started.");
// Start ignite grid.
try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) {
System.out.println(">>> Ignite grid started.");
IgniteCache<Integer, Vector> dataCache = null;
try {
dataCache = new SandboxMLCache(ignite).fillCacheWith(MLSandboxDatasets.GLASS_IDENTIFICATION);
OneVsRestTrainer<SVMLinearClassificationModel> trainer = new OneVsRestTrainer<>(new SVMLinearClassificationTrainer().withAmountOfIterations(20).withAmountOfLocIterations(50).withLambda(0.2).withSeed(1234L));
MultiClassModel<SVMLinearClassificationModel> mdl = trainer.fit(ignite, dataCache, new DummyVectorizer<Integer>().labeled(0));
System.out.println(">>> One-vs-Rest SVM Multi-class model");
System.out.println(mdl.toString());
MinMaxScalerTrainer<Integer, Vector> minMaxScalerTrainer = new MinMaxScalerTrainer<>();
Preprocessor<Integer, Vector> preprocessor = minMaxScalerTrainer.fit(ignite, dataCache, new DummyVectorizer<Integer>().labeled(0));
MultiClassModel<SVMLinearClassificationModel> mdlWithScaling = trainer.fit(ignite, dataCache, preprocessor);
System.out.println(">>> One-vs-Rest SVM Multi-class model with MinMaxScaling");
System.out.println(mdlWithScaling.toString());
System.out.println(">>> ----------------------------------------------------------------");
System.out.println(">>> | Prediction\t| Prediction with MinMaxScaling\t| Ground Truth\t|");
System.out.println(">>> ----------------------------------------------------------------");
int amountOfErrors = 0;
int amountOfErrorsWithMinMaxScaling = 0;
int totalAmount = 0;
// Build confusion matrix. See https://en.wikipedia.org/wiki/Confusion_matrix
int[][] confusionMtx = { { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 } };
int[][] confusionMtxWithMinMaxScaling = { { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 } };
try (QueryCursor<Cache.Entry<Integer, Vector>> observations = dataCache.query(new ScanQuery<>())) {
for (Cache.Entry<Integer, Vector> observation : observations) {
Vector val = observation.getValue();
Vector inputs = val.copyOfRange(1, val.size());
double groundTruth = val.get(0);
double prediction = mdl.predict(inputs);
double predictionWithMinMaxScaling = mdlWithScaling.predict(inputs);
totalAmount++;
// Collect data for model
if (!Precision.equals(groundTruth, prediction, Precision.EPSILON))
amountOfErrors++;
int idx1 = (int) prediction == 1 ? 0 : ((int) prediction == 3 ? 1 : 2);
int idx2 = (int) groundTruth == 1 ? 0 : ((int) groundTruth == 3 ? 1 : 2);
confusionMtx[idx1][idx2]++;
// Collect data for model with min-max scaling
if (!Precision.equals(groundTruth, predictionWithMinMaxScaling, Precision.EPSILON))
amountOfErrorsWithMinMaxScaling++;
idx1 = (int) predictionWithMinMaxScaling == 1 ? 0 : ((int) predictionWithMinMaxScaling == 3 ? 1 : 2);
idx2 = (int) groundTruth == 1 ? 0 : ((int) groundTruth == 3 ? 1 : 2);
confusionMtxWithMinMaxScaling[idx1][idx2]++;
System.out.printf(">>> | %.4f\t\t| %.4f\t\t\t\t\t\t| %.4f\t\t|\n", prediction, predictionWithMinMaxScaling, groundTruth);
}
System.out.println(">>> ----------------------------------------------------------------");
System.out.println("\n>>> -----------------One-vs-Rest SVM model-------------");
System.out.println("\n>>> Absolute amount of errors " + amountOfErrors);
System.out.println("\n>>> Accuracy " + (1 - amountOfErrors / (double) totalAmount));
System.out.println("\n>>> Confusion matrix is " + Arrays.deepToString(confusionMtx));
System.out.println("\n>>> -----------------One-vs-Rest SVM model with MinMaxScaling-------------");
System.out.println("\n>>> Absolute amount of errors " + amountOfErrorsWithMinMaxScaling);
System.out.println("\n>>> Accuracy " + (1 - amountOfErrorsWithMinMaxScaling / (double) totalAmount));
System.out.println("\n>>> Confusion matrix is " + Arrays.deepToString(confusionMtxWithMinMaxScaling));
System.out.println(">>> One-vs-Rest SVM model over cache based dataset usage example completed.");
}
} finally {
if (dataCache != null)
dataCache.destroy();
}
} finally {
System.out.flush();
}
}
use of org.apache.ignite.examples.ml.util.SandboxMLCache in project ignite by apache.
the class CompoundNaiveBayesExample method main.
/**
* Run example.
*/
public static void main(String[] args) throws IOException {
System.out.println();
System.out.println(">>> Compound Naive Bayes classification model over partitioned dataset usage example started.");
// Start ignite grid.
try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) {
System.out.println(">>> Ignite grid started.");
IgniteCache<Integer, Vector> dataCache = new SandboxMLCache(ignite).fillCacheWith(MLSandboxDatasets.MIXED_DATASET);
double[] priorProbabilities = new double[] { .5, .5 };
double[][] thresholds = new double[][] { { .5 }, { .5 }, { .5 }, { .5 }, { .5 } };
System.out.println(">>> Create new naive Bayes classification trainer object.");
CompoundNaiveBayesTrainer trainer = new CompoundNaiveBayesTrainer().withPriorProbabilities(priorProbabilities).withGaussianNaiveBayesTrainer(new GaussianNaiveBayesTrainer()).withGaussianFeatureIdsToSkip(asList(3, 4, 5, 6, 7)).withDiscreteNaiveBayesTrainer(new DiscreteNaiveBayesTrainer().setBucketThresholds(thresholds)).withDiscreteFeatureIdsToSkip(asList(0, 1, 2));
System.out.println(">>> Perform the training to get the model.");
Vectorizer<Integer, Vector, Integer, Double> vectorizer = new DummyVectorizer<Integer>().labeled(Vectorizer.LabelCoordinate.FIRST);
CompoundNaiveBayesModel mdl = trainer.fit(ignite, dataCache, vectorizer);
System.out.println(">>> Compound Naive Bayes model: " + mdl);
double accuracy = Evaluator.evaluate(dataCache, mdl, vectorizer, MetricName.ACCURACY);
System.out.println("\n>>> Accuracy " + accuracy);
System.out.println(">>> Compound Naive bayes model over partitioned dataset usage example completed.");
}
}
use of org.apache.ignite.examples.ml.util.SandboxMLCache in project ignite by apache.
the class GaussianNaiveBayesTrainerExample method main.
/**
* Run example.
*/
public static void main(String[] args) throws IOException {
System.out.println();
System.out.println(">>> Naive Bayes classification model over partitioned dataset usage example started.");
// Start ignite grid.
try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) {
System.out.println(">>> Ignite grid started.");
IgniteCache<Integer, Vector> dataCache = null;
try {
dataCache = new SandboxMLCache(ignite).fillCacheWith(MLSandboxDatasets.TWO_CLASSED_IRIS);
System.out.println(">>> Create new naive Bayes classification trainer object.");
GaussianNaiveBayesTrainer trainer = new GaussianNaiveBayesTrainer();
System.out.println(">>> Perform the training to get the model.");
Vectorizer<Integer, Vector, Integer, Double> vectorizer = new DummyVectorizer<Integer>().labeled(Vectorizer.LabelCoordinate.FIRST);
GaussianNaiveBayesModel mdl = trainer.fit(ignite, dataCache, vectorizer);
System.out.println(">>> Naive Bayes model: " + mdl);
double accuracy = Evaluator.evaluate(dataCache, mdl, vectorizer, MetricName.ACCURACY);
System.out.println("\n>>> Accuracy " + accuracy);
System.out.println(">>> Naive bayes model over partitioned dataset usage example completed.");
} finally {
dataCache.destroy();
}
} finally {
System.out.flush();
}
}
use of org.apache.ignite.examples.ml.util.SandboxMLCache in project ignite by apache.
the class IrisClassificationExample method main.
/**
* Runs example.
*/
public static void main(String[] args) throws IOException {
try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) {
System.out.println(">>> Ignite grid started.");
IgniteCache<Integer, Vector> dataCache = null;
try {
System.out.println(">>> Fill dataset cache.");
dataCache = new SandboxMLCache(ignite).fillCacheWith(MLSandboxDatasets.IRIS);
KNNClassificationTrainer trainer = ((KNNClassificationTrainer) new KNNClassificationTrainer().withEnvironmentBuilder(LearningEnvironmentBuilder.defaultBuilder().withRNGSeed(0))).withK(3).withDistanceMeasure(new EuclideanDistance()).withWeighted(true);
// This vectorizer works with values in cache of Vector class.
Vectorizer<Integer, Vector, Integer, Double> vectorizer = new DummyVectorizer<Integer>().labeled(// FIRST means "label are stored at first coordinate of vector"
Vectorizer.LabelCoordinate.FIRST);
// Splits dataset to train and test samples with 60/40 proportion.
TrainTestSplit<Integer, Vector> split = new TrainTestDatasetSplitter<Integer, Vector>().split(0.6);
System.out.println(">>> Start traininig.");
KNNClassificationModel mdl = trainer.fit(ignite, dataCache, split.getTrainFilter(), vectorizer);
System.out.println(">>> Perform scoring.");
double accuracy = Evaluator.evaluate(dataCache, split.getTestFilter(), mdl, vectorizer, new Accuracy<>());
System.out.println(">> Model accuracy: " + accuracy);
} finally {
if (dataCache != null)
dataCache.destroy();
}
} finally {
System.out.flush();
}
}
use of org.apache.ignite.examples.ml.util.SandboxMLCache in project ignite by apache.
the class TrainTestDatasetSplitterExample method main.
/**
* Run example.
*/
public static void main(String[] args) throws IOException {
System.out.println();
System.out.println(">>> Linear regression model over cache based dataset usage example started.");
// Start ignite grid.
try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) {
System.out.println(">>> Ignite grid started.");
IgniteCache<Integer, Vector> dataCache = null;
try {
dataCache = new SandboxMLCache(ignite).fillCacheWith(MLSandboxDatasets.MORTALITY_DATA);
System.out.println(">>> Create new linear regression trainer object.");
LinearRegressionLSQRTrainer trainer = new LinearRegressionLSQRTrainer();
System.out.println(">>> Create new training dataset splitter object.");
TrainTestSplit<Integer, Vector> split = new TrainTestDatasetSplitter<Integer, Vector>().split(0.75);
System.out.println(">>> Perform the training to get the model.");
Vectorizer<Integer, Vector, Integer, Double> vectorizer = new DummyVectorizer<Integer>().labeled(Vectorizer.LabelCoordinate.FIRST);
LinearRegressionModel mdl = trainer.fit(ignite, dataCache, split.getTrainFilter(), vectorizer);
System.out.println(">>> Linear regression model: " + mdl);
System.out.println(">>> ---------------------------------");
System.out.println(">>> | Prediction\t| Ground Truth\t|");
System.out.println(">>> ---------------------------------");
ScanQuery<Integer, Vector> qry = new ScanQuery<>();
qry.setFilter(split.getTestFilter());
try (QueryCursor<Cache.Entry<Integer, Vector>> observations = dataCache.query(qry)) {
for (Cache.Entry<Integer, Vector> observation : observations) {
Vector val = observation.getValue();
Vector inputs = val.copyOfRange(1, val.size());
double groundTruth = val.get(0);
double prediction = mdl.predict(inputs);
System.out.printf(">>> | %.4f\t\t| %.4f\t\t|\n", prediction, groundTruth);
}
}
System.out.println(">>> ---------------------------------");
System.out.println(">>> Linear regression model over cache based dataset usage example completed.");
} finally {
if (dataCache != null)
dataCache.destroy();
}
} finally {
System.out.flush();
}
}
Aggregations