use of org.apache.ignite.ml.math.distances.EuclideanDistance in project ignite by apache.
the class KMeansLocalClustererExample method main.
/**
* Executes example.
*
* @param args Command line arguments, none required.
*/
public static void main(String[] args) {
// IMPL NOTE based on KMeansDistributedClustererTestSingleNode#testClusterizationOnDatasetWithObviousStructure
System.out.println(">>> K-means local clusterer example started.");
int ptsCnt = 10000;
DenseLocalOnHeapMatrix points = new DenseLocalOnHeapMatrix(ptsCnt, 2);
DatasetWithObviousStructure dataset = new DatasetWithObviousStructure(10000);
List<Vector> massCenters = dataset.generate(points);
EuclideanDistance dist = new EuclideanDistance();
OrderedNodesComparator comp = new OrderedNodesComparator(dataset.centers().values().toArray(new Vector[] {}), dist);
massCenters.sort(comp);
KMeansLocalClusterer clusterer = new KMeansLocalClusterer(dist, 100, 1L);
KMeansModel mdl = clusterer.cluster(points, 4);
Vector[] resCenters = mdl.centers();
Arrays.sort(resCenters, comp);
System.out.println("Mass centers:");
massCenters.forEach(Tracer::showAscii);
System.out.println("Cluster centers:");
Arrays.asList(resCenters).forEach(Tracer::showAscii);
System.out.println("\n>>> K-means local clusterer example completed.");
}
use of org.apache.ignite.ml.math.distances.EuclideanDistance in project ignite by apache.
the class KNNClassificationExample method main.
/**
* Executes example.
*
* @param args Command line arguments, none required.
*/
public static void main(String[] args) throws InterruptedException {
System.out.println(">>> kNN classification example started.");
// Start ignite grid.
try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) {
System.out.println(">>> Ignite grid started.");
IgniteThread igniteThread = new IgniteThread(ignite.configuration().getIgniteInstanceName(), KNNClassificationExample.class.getSimpleName(), () -> {
try {
// Prepare path to read
File file = IgniteUtils.resolveIgnitePath(KNN_IRIS_TXT);
if (file == null)
throw new RuntimeException("Can't find file: " + KNN_IRIS_TXT);
Path path = file.toPath();
// Read dataset from file
LabeledDataset dataset = LabeledDatasetLoader.loadFromTxtFile(path, SEPARATOR, true, false);
// Random splitting of iris data as 70% train and 30% test datasets
LabeledDatasetTestTrainPair split = new LabeledDatasetTestTrainPair(dataset, 0.3);
System.out.println("\n>>> Amount of observations in train dataset " + split.train().rowSize());
System.out.println("\n>>> Amount of observations in test dataset " + split.test().rowSize());
LabeledDataset test = split.test();
LabeledDataset train = split.train();
KNNModel knnMdl = new KNNModel(5, new EuclideanDistance(), KNNStrategy.SIMPLE, train);
// Clone labels
final double[] labels = test.labels();
// Save predicted classes to test dataset
LabellingMachine.assignLabels(test, knnMdl);
// Calculate amount of errors on test dataset
int amountOfErrors = 0;
for (int i = 0; i < test.rowSize(); i++) {
if (test.label(i) != labels[i])
amountOfErrors++;
}
System.out.println("\n>>> Absolute amount of errors " + amountOfErrors);
System.out.println("\n>>> Accuracy " + amountOfErrors / (double) test.rowSize());
// Build confusion matrix. See https://en.wikipedia.org/wiki/Confusion_matrix
int[][] confusionMtx = { { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 } };
for (int i = 0; i < test.rowSize(); i++) {
int idx1 = (int) test.label(i);
int idx2 = (int) labels[i];
confusionMtx[idx1 - 1][idx2 - 1]++;
}
System.out.println("\n>>> Confusion matrix is " + Arrays.deepToString(confusionMtx));
// Calculate precision, recall and F-metric for each class
for (int i = 0; i < 3; i++) {
double precision = 0.0;
for (int j = 0; j < 3; j++) precision += confusionMtx[i][j];
precision = confusionMtx[i][i] / precision;
double clsLb = (double) (i + 1);
System.out.println("\n>>> Precision for class " + clsLb + " is " + precision);
double recall = 0.0;
for (int j = 0; j < 3; j++) recall += confusionMtx[j][i];
recall = confusionMtx[i][i] / recall;
System.out.println("\n>>> Recall for class " + clsLb + " is " + recall);
double fScore = 2 * precision * recall / (precision + recall);
System.out.println("\n>>> F-score for class " + clsLb + " is " + fScore);
}
} catch (IOException e) {
e.printStackTrace();
System.out.println("\n>>> Unexpected exception, check resources: " + e);
} finally {
System.out.println("\n>>> kNN classification example completed.");
}
});
igniteThread.start();
igniteThread.join();
}
}
use of org.apache.ignite.ml.math.distances.EuclideanDistance in project ignite by apache.
the class FuzzyCMeansExample method main.
/**
* Executes example.
*
* @param args Command line arguments, none required.
*/
public static void main(String[] args) throws InterruptedException {
System.out.println(">>> Fuzzy C-Means usage example started.");
// Start ignite grid.
try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) {
System.out.println(">>> Ignite grid started.");
// Start new Ignite thread.
IgniteThread igniteThread = new IgniteThread(ignite.configuration().getIgniteInstanceName(), FuzzyCMeansExample.class.getSimpleName(), () -> {
// Distance measure that computes distance between two points.
DistanceMeasure distanceMeasure = new EuclideanDistance();
// "Fuzziness" - specific constant that is used in membership calculation (1.0+-eps ~ K-Means).
double exponentialWeight = 2.0;
// Condition that indicated when algorithm must stop.
// In this example algorithm stops if memberships have changed insignificantly.
BaseFuzzyCMeansClusterer.StopCondition stopCond = BaseFuzzyCMeansClusterer.StopCondition.STABLE_MEMBERSHIPS;
// Maximum difference between new and old membership values with which algorithm will continue to work.
double maxDelta = 0.01;
// The maximum number of FCM iterations.
int maxIterations = 50;
// Value that is used to initialize random numbers generator. You can choose it randomly.
Long seed = null;
// Number of steps of primary centers selection (more steps more candidates).
int initializationSteps = 2;
// Number of K-Means iteration that is used to choose required number of primary centers from candidates.
int kMeansMaxIterations = 50;
// Create new distributed clusterer with parameters described above.
System.out.println(">>> Create new Distributed Fuzzy C-Means clusterer.");
FuzzyCMeansDistributedClusterer clusterer = new FuzzyCMeansDistributedClusterer(distanceMeasure, exponentialWeight, stopCond, maxDelta, maxIterations, seed, initializationSteps, kMeansMaxIterations);
// Create sample data.
double[][] points = new double[][] { { -10, -10 }, { -9, -11 }, { -10, -9 }, { -11, -9 }, { 10, 10 }, { 9, 11 }, { 10, 9 }, { 11, 9 }, { -10, 10 }, { -9, 11 }, { -10, 9 }, { -11, 9 }, { 10, -10 }, { 9, -11 }, { 10, -9 }, { 11, -9 } };
// Initialize matrix of data points. Each row contains one point.
int rows = points.length;
int cols = points[0].length;
System.out.println(">>> Create the matrix that contains sample points.");
SparseDistributedMatrix pntMatrix = new SparseDistributedMatrix(rows, cols, StorageConstants.ROW_STORAGE_MODE, StorageConstants.RANDOM_ACCESS_MODE);
// Store points into matrix.
pntMatrix.assign(points);
// Call clusterization method with some number of centers.
// It returns model that can predict results for new points.
System.out.println(">>> Perform clusterization.");
int numCenters = 4;
FuzzyCMeansModel mdl = clusterer.cluster(pntMatrix, numCenters);
// You can also get centers of clusters that is computed by Fuzzy C-Means algorithm.
Vector[] centers = mdl.centers();
String res = ">>> Results:\n" + ">>> 1st center: " + centers[0].get(0) + " " + centers[0].get(1) + "\n" + ">>> 2nd center: " + centers[1].get(0) + " " + centers[1].get(1) + "\n" + ">>> 3rd center: " + centers[2].get(0) + " " + centers[2].get(1) + "\n" + ">>> 4th center: " + centers[3].get(0) + " " + centers[3].get(1) + "\n";
System.out.println(res);
pntMatrix.destroy();
});
igniteThread.start();
igniteThread.join();
}
}
use of org.apache.ignite.ml.math.distances.EuclideanDistance in project ignite by apache.
the class LocalModelsTest method importExportKNNModelTest.
/**
*/
@Test
public void importExportKNNModelTest() throws IOException {
executeModelTest(mdlFilePath -> {
double[][] mtx = new double[][] { { 1.0, 1.0 }, { 1.0, 2.0 }, { 2.0, 1.0 }, { -1.0, -1.0 }, { -1.0, -2.0 }, { -2.0, -1.0 } };
double[] lbs = new double[] { 1.0, 1.0, 1.0, 2.0, 2.0, 2.0 };
LabeledDataset training = new LabeledDataset(mtx, lbs);
KNNModel mdl = new KNNModel(3, new EuclideanDistance(), KNNStrategy.SIMPLE, training);
Exporter<KNNModelFormat, String> exporter = new FileExporter<>();
mdl.saveModel(exporter, mdlFilePath);
KNNModelFormat load = exporter.load(mdlFilePath);
Assert.assertNotNull(load);
KNNModel importedMdl = new KNNModel(load.getK(), load.getDistanceMeasure(), load.getStgy(), load.getTraining());
Assert.assertTrue("", mdl.equals(importedMdl));
return null;
});
}
use of org.apache.ignite.ml.math.distances.EuclideanDistance in project ignite by apache.
the class FuzzyCMeansDistributedClustererTest method testTwoDimensionsLittleData.
/**
* Test that algorithm gives correct results on a small sample - 4 centers on the plane.
*/
public void testTwoDimensionsLittleData() {
IgniteUtils.setCurrentIgniteName(ignite.configuration().getIgniteInstanceName());
FuzzyCMeansDistributedClusterer clusterer = new FuzzyCMeansDistributedClusterer(new EuclideanDistance(), 2, BaseFuzzyCMeansClusterer.StopCondition.STABLE_MEMBERSHIPS, 0.01, 500, null, 2, 50);
double[][] points = new double[][] { { -10, -10 }, { -9, -11 }, { -10, -9 }, { -11, -9 }, { 10, 10 }, { 9, 11 }, { 10, 9 }, { 11, 9 }, { -10, 10 }, { -9, 11 }, { -10, 9 }, { -11, 9 }, { 10, -10 }, { 9, -11 }, { 10, -9 }, { 11, -9 } };
SparseDistributedMatrix pntMatrix = new SparseDistributedMatrix(16, 2, StorageConstants.ROW_STORAGE_MODE, StorageConstants.RANDOM_ACCESS_MODE);
for (int i = 0; i < 16; i++) pntMatrix.setRow(i, points[i]);
FuzzyCMeansModel mdl = clusterer.cluster(pntMatrix, 4);
Vector[] centers = mdl.centers();
Arrays.sort(centers, Comparator.comparing(vector -> Math.atan2(vector.get(1), vector.get(0))));
DistanceMeasure measure = mdl.distanceMeasure();
assertEquals(0, measure.compute(centers[0], new DenseLocalOnHeapVector(new double[] { -10, -10 })), 1);
assertEquals(0, measure.compute(centers[1], new DenseLocalOnHeapVector(new double[] { 10, -10 })), 1);
assertEquals(0, measure.compute(centers[2], new DenseLocalOnHeapVector(new double[] { 10, 10 })), 1);
assertEquals(0, measure.compute(centers[3], new DenseLocalOnHeapVector(new double[] { -10, 10 })), 1);
pntMatrix.destroy();
}
Aggregations