use of org.apache.ignite.ml.structures.LabeledDatasetTestTrainPair in project ignite by apache.
the class LabeledDatasetTest method testSplitting.
/**
*/
public void testSplitting() {
IgniteUtils.setCurrentIgniteName(ignite.configuration().getIgniteInstanceName());
double[][] mtx = new double[][] { { 1.0, 1.0 }, { 1.0, 2.0 }, { 2.0, 1.0 }, { -1.0, -1.0 }, { -1.0, -2.0 }, { -2.0, -1.0 } };
double[] lbs = new double[] { 1.0, 1.0, 1.0, 2.0, 2.0, 2.0 };
LabeledDataset training = new LabeledDataset(mtx, lbs);
LabeledDatasetTestTrainPair split1 = new LabeledDatasetTestTrainPair(training, 0.67);
assertEquals(4, split1.test().rowSize());
assertEquals(2, split1.train().rowSize());
LabeledDatasetTestTrainPair split2 = new LabeledDatasetTestTrainPair(training, 0.65);
assertEquals(3, split2.test().rowSize());
assertEquals(3, split2.train().rowSize());
LabeledDatasetTestTrainPair split3 = new LabeledDatasetTestTrainPair(training, 0.4);
assertEquals(2, split3.test().rowSize());
assertEquals(4, split3.train().rowSize());
LabeledDatasetTestTrainPair split4 = new LabeledDatasetTestTrainPair(training, 0.3);
assertEquals(1, split4.test().rowSize());
assertEquals(5, split4.train().rowSize());
}
use of org.apache.ignite.ml.structures.LabeledDatasetTestTrainPair in project ignite by apache.
the class KNNClassificationExample method main.
/**
* Executes example.
*
* @param args Command line arguments, none required.
*/
public static void main(String[] args) throws InterruptedException {
System.out.println(">>> kNN classification example started.");
// Start ignite grid.
try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) {
System.out.println(">>> Ignite grid started.");
IgniteThread igniteThread = new IgniteThread(ignite.configuration().getIgniteInstanceName(), KNNClassificationExample.class.getSimpleName(), () -> {
try {
// Prepare path to read
File file = IgniteUtils.resolveIgnitePath(KNN_IRIS_TXT);
if (file == null)
throw new RuntimeException("Can't find file: " + KNN_IRIS_TXT);
Path path = file.toPath();
// Read dataset from file
LabeledDataset dataset = LabeledDatasetLoader.loadFromTxtFile(path, SEPARATOR, true, false);
// Random splitting of iris data as 70% train and 30% test datasets
LabeledDatasetTestTrainPair split = new LabeledDatasetTestTrainPair(dataset, 0.3);
System.out.println("\n>>> Amount of observations in train dataset " + split.train().rowSize());
System.out.println("\n>>> Amount of observations in test dataset " + split.test().rowSize());
LabeledDataset test = split.test();
LabeledDataset train = split.train();
KNNModel knnMdl = new KNNModel(5, new EuclideanDistance(), KNNStrategy.SIMPLE, train);
// Clone labels
final double[] labels = test.labels();
// Save predicted classes to test dataset
LabellingMachine.assignLabels(test, knnMdl);
// Calculate amount of errors on test dataset
int amountOfErrors = 0;
for (int i = 0; i < test.rowSize(); i++) {
if (test.label(i) != labels[i])
amountOfErrors++;
}
System.out.println("\n>>> Absolute amount of errors " + amountOfErrors);
System.out.println("\n>>> Accuracy " + amountOfErrors / (double) test.rowSize());
// Build confusion matrix. See https://en.wikipedia.org/wiki/Confusion_matrix
int[][] confusionMtx = { { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 } };
for (int i = 0; i < test.rowSize(); i++) {
int idx1 = (int) test.label(i);
int idx2 = (int) labels[i];
confusionMtx[idx1 - 1][idx2 - 1]++;
}
System.out.println("\n>>> Confusion matrix is " + Arrays.deepToString(confusionMtx));
// Calculate precision, recall and F-metric for each class
for (int i = 0; i < 3; i++) {
double precision = 0.0;
for (int j = 0; j < 3; j++) precision += confusionMtx[i][j];
precision = confusionMtx[i][i] / precision;
double clsLb = (double) (i + 1);
System.out.println("\n>>> Precision for class " + clsLb + " is " + precision);
double recall = 0.0;
for (int j = 0; j < 3; j++) recall += confusionMtx[j][i];
recall = confusionMtx[i][i] / recall;
System.out.println("\n>>> Recall for class " + clsLb + " is " + recall);
double fScore = 2 * precision * recall / (precision + recall);
System.out.println("\n>>> F-score for class " + clsLb + " is " + fScore);
}
} catch (IOException e) {
e.printStackTrace();
System.out.println("\n>>> Unexpected exception, check resources: " + e);
} finally {
System.out.println("\n>>> kNN classification example completed.");
}
});
igniteThread.start();
igniteThread.join();
}
}
use of org.apache.ignite.ml.structures.LabeledDatasetTestTrainPair in project ignite by apache.
the class KNNRegressionExample method main.
/**
* Executes example.
*
* @param args Command line arguments, none required.
*/
public static void main(String[] args) throws InterruptedException {
System.out.println(">>> kNN regression example started.");
// Start ignite grid.
try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) {
System.out.println(">>> Ignite grid started.");
IgniteThread igniteThread = new IgniteThread(ignite.configuration().getIgniteInstanceName(), KNNRegressionExample.class.getSimpleName(), () -> {
try {
// Prepare path to read
File file = IgniteUtils.resolveIgnitePath(KNN_CLEARED_MACHINES_TXT);
if (file == null)
throw new RuntimeException("Can't find file: " + KNN_CLEARED_MACHINES_TXT);
Path path = file.toPath();
// Read dataset from file
LabeledDataset dataset = LabeledDatasetLoader.loadFromTxtFile(path, SEPARATOR, false, false);
// Normalize dataset
Normalizer.normalizeWithMiniMax(dataset);
// Random splitting of iris data as 80% train and 20% test datasets
LabeledDatasetTestTrainPair split = new LabeledDatasetTestTrainPair(dataset, 0.2);
System.out.println("\n>>> Amount of observations in train dataset: " + split.train().rowSize());
System.out.println("\n>>> Amount of observations in test dataset: " + split.test().rowSize());
LabeledDataset test = split.test();
LabeledDataset train = split.train();
// Builds weighted kNN-regression with Manhattan Distance
KNNMultipleLinearRegression knnMdl = new KNNMultipleLinearRegression(7, new ManhattanDistance(), KNNStrategy.WEIGHTED, train);
// Clone labels
final double[] labels = test.labels();
// Save predicted classes to test dataset
LabellingMachine.assignLabels(test, knnMdl);
// Calculate mean squared error (MSE)
double mse = 0.0;
for (int i = 0; i < test.rowSize(); i++) mse += Math.pow(test.label(i) - labels[i], 2.0);
mse = mse / test.rowSize();
System.out.println("\n>>> Mean squared error (MSE) " + mse);
// Calculate mean absolute error (MAE)
double mae = 0.0;
for (int i = 0; i < test.rowSize(); i++) mae += Math.abs(test.label(i) - labels[i]);
mae = mae / test.rowSize();
System.out.println("\n>>> Mean absolute error (MAE) " + mae);
// Calculate R^2 as 1 - RSS/TSS
double avg = 0.0;
for (int i = 0; i < test.rowSize(); i++) avg += test.label(i);
avg = avg / test.rowSize();
double detCf = 0.0;
double tss = 0.0;
for (int i = 0; i < test.rowSize(); i++) {
detCf += Math.pow(test.label(i) - labels[i], 2.0);
tss += Math.pow(test.label(i) - avg, 2.0);
}
detCf = 1 - detCf / tss;
System.out.println("\n>>> R^2 " + detCf);
} catch (IOException e) {
e.printStackTrace();
System.out.println("\n>>> Unexpected exception, check resources: " + e);
} finally {
System.out.println("\n>>> kNN regression example completed.");
}
});
igniteThread.start();
igniteThread.join();
}
}
use of org.apache.ignite.ml.structures.LabeledDatasetTestTrainPair in project ignite by apache.
the class IgniteKNNClassificationBenchmark method test.
/**
* {@inheritDoc}
*/
@Override
public boolean test(Map<Object, Object> ctx) throws Exception {
// Create IgniteThread, we must work with SparseDistributedMatrix inside IgniteThread
// because we create ignite cache internally.
IgniteThread igniteThread = new IgniteThread(ignite.configuration().getIgniteInstanceName(), this.getClass().getSimpleName(), new Runnable() {
/**
* {@inheritDoc}
*/
@Override
public void run() {
// IMPL NOTE originally taken from KNNClassificationExample.
// Obtain shuffled dataset.
LabeledDataset dataset = new Datasets().shuffleIris((int) (DataChanger.next()));
// Random splitting of iris data as 70% train and 30% test datasets.
LabeledDatasetTestTrainPair split = new LabeledDatasetTestTrainPair(dataset, 0.3);
LabeledDataset test = split.test();
LabeledDataset train = split.train();
KNNModel knnMdl = new KNNModel(5, new EuclideanDistance(), KNNStrategy.SIMPLE, train);
// Calculate predicted classes.
for (int i = 0; i < test.rowSize() - 1; i++) knnMdl.apply(test.getRow(i).features());
}
});
igniteThread.start();
igniteThread.join();
return true;
}
use of org.apache.ignite.ml.structures.LabeledDatasetTestTrainPair in project ignite by apache.
the class IgniteKNNRegressionBenchmark method test.
/**
* {@inheritDoc}
*/
@Override
public boolean test(Map<Object, Object> ctx) throws Exception {
// Create IgniteThread, we must work with SparseDistributedMatrix inside IgniteThread
// because we create ignite cache internally.
IgniteThread igniteThread = new IgniteThread(ignite.configuration().getIgniteInstanceName(), this.getClass().getSimpleName(), new Runnable() {
/**
* {@inheritDoc}
*/
@Override
public void run() {
// IMPL NOTE originally taken from KNNRegressionExample.
// Obtain shuffled dataset.
LabeledDataset dataset = new Datasets().shuffleClearedMachines((int) (DataChanger.next()));
// Normalize dataset
Normalizer.normalizeWithMiniMax(dataset);
// Random splitting of iris data as 80% train and 20% test datasets.
LabeledDatasetTestTrainPair split = new LabeledDatasetTestTrainPair(dataset, 0.2);
LabeledDataset test = split.test();
LabeledDataset train = split.train();
// Builds weighted kNN-regression with Manhattan Distance.
KNNModel knnMdl = new KNNMultipleLinearRegression(7, new ManhattanDistance(), KNNStrategy.WEIGHTED, train);
// Clone labels
final double[] labels = test.labels();
// Calculate predicted classes.
for (int i = 0; i < test.rowSize() - 1; i++) knnMdl.apply(test.getRow(i).features());
}
});
igniteThread.start();
igniteThread.join();
return true;
}
Aggregations