use of org.apache.ignite.ml.clustering.kmeans.KMeansModel in project ignite by apache.
the class CustomersClusterizationExample method main.
/**
* Runs example.
*/
public static void main(String[] args) throws IOException {
try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) {
System.out.println(">>> Ignite grid started.");
IgniteCache<Integer, Vector> dataCache = null;
try {
System.out.println(">>> Fill dataset cache.");
dataCache = new SandboxMLCache(ignite).fillCacheWith(MLSandboxDatasets.WHOLESALE_CUSTOMERS);
System.out.println(">>> Start training and scoring.");
for (int amountOfClusters = 1; amountOfClusters < 10; amountOfClusters++) {
KMeansTrainer trainer = new KMeansTrainer().withAmountOfClusters(amountOfClusters).withDistance(new EuclideanDistance()).withEnvironmentBuilder(LearningEnvironmentBuilder.defaultBuilder().withRNGSeed(0)).withMaxIterations(50);
// This vectorizer works with values in cache of Vector class.
Vectorizer<Integer, Vector, Integer, Double> vectorizer = new DummyVectorizer<Integer>().labeled(// FIRST means "label are stored at first coordinate of vector"
Vectorizer.LabelCoordinate.FIRST);
// Splits dataset to train and test samples with 80/20 proportion.
TrainTestSplit<Integer, Vector> split = new TrainTestDatasetSplitter<Integer, Vector>().split(0.8);
KMeansModel mdl = trainer.fit(ignite, dataCache, split.getTrainFilter(), vectorizer);
double entropy = computeMeanEntropy(dataCache, split.getTestFilter(), vectorizer, mdl);
System.out.println(String.format(">> Clusters mean entropy [%d clusters]: %.2f", amountOfClusters, entropy));
}
} finally {
if (dataCache != null)
dataCache.destroy();
}
} finally {
System.out.flush();
}
}
use of org.apache.ignite.ml.clustering.kmeans.KMeansModel in project ignite by apache.
the class CustomersClusterizationExample method computeMeanEntropy.
/**
* Computes mean entropy in clusters.
*
* @param cache Dataset cache.
* @param filter Test dataset filter.
* @param vectorizer Upstream vectorizer.
* @param mdl KMeans model.
* @return Score.
*/
private static double computeMeanEntropy(IgniteCache<Integer, Vector> cache, IgniteBiPredicate<Integer, Vector> filter, Vectorizer<Integer, Vector, Integer, Double> vectorizer, KMeansModel mdl) {
Map<Integer, Map<Integer, AtomicInteger>> clusterUniqueLbCounts = new HashMap<>();
try (QueryCursor<Cache.Entry<Integer, Vector>> cursor = cache.query(new ScanQuery<>(filter))) {
for (Cache.Entry<Integer, Vector> ent : cursor) {
LabeledVector<Double> vec = vectorizer.apply(ent.getKey(), ent.getValue());
int cluster = mdl.predict(vec.features());
int ch = vec.label().intValue();
if (!clusterUniqueLbCounts.containsKey(cluster))
clusterUniqueLbCounts.put(cluster, new HashMap<>());
if (!clusterUniqueLbCounts.get(cluster).containsKey(ch))
clusterUniqueLbCounts.get(cluster).put(ch, new AtomicInteger());
clusterUniqueLbCounts.get(cluster).get(ch).incrementAndGet();
}
}
double sumOfClusterEntropies = 0.0;
for (Integer cluster : clusterUniqueLbCounts.keySet()) {
Map<Integer, AtomicInteger> lbCounters = clusterUniqueLbCounts.get(cluster);
int sizeOfCluster = lbCounters.values().stream().mapToInt(AtomicInteger::get).sum();
double entropyInCluster = lbCounters.values().stream().mapToDouble(AtomicInteger::get).map(lblsCount -> lblsCount / sizeOfCluster).map(lblProb -> -lblProb * Math.log(lblProb)).sum();
sumOfClusterEntropies += entropyInCluster;
}
return sumOfClusterEntropies / clusterUniqueLbCounts.size();
}
use of org.apache.ignite.ml.clustering.kmeans.KMeansModel in project ignite by apache.
the class KMeansClusterizationExample method main.
/**
* Run example.
*/
public static void main(String[] args) throws IOException {
System.out.println();
System.out.println(">>> KMeans clustering algorithm over cached dataset usage example started.");
// Start ignite grid.
try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) {
System.out.println(">>> Ignite grid started.");
IgniteCache<Integer, Vector> dataCache = null;
try {
dataCache = new SandboxMLCache(ignite).fillCacheWith(MLSandboxDatasets.TWO_CLASSED_IRIS);
Vectorizer<Integer, Vector, Integer, Double> vectorizer = new DummyVectorizer<Integer>().labeled(Vectorizer.LabelCoordinate.FIRST);
KMeansTrainer trainer = new KMeansTrainer();
KMeansModel mdl = trainer.fit(ignite, dataCache, vectorizer);
System.out.println(">>> KMeans centroids");
Tracer.showAscii(mdl.centers()[0]);
Tracer.showAscii(mdl.centers()[1]);
System.out.println(">>>");
System.out.println(">>> --------------------------------------------");
System.out.println(">>> | Predicted cluster\t| Erased class label\t|");
System.out.println(">>> --------------------------------------------");
try (QueryCursor<Cache.Entry<Integer, Vector>> observations = dataCache.query(new ScanQuery<>())) {
for (Cache.Entry<Integer, Vector> observation : observations) {
Vector val = observation.getValue();
Vector inputs = val.copyOfRange(1, val.size());
double groundTruth = val.get(0);
double prediction = mdl.predict(inputs);
System.out.printf(">>> | %.4f\t\t\t| %.4f\t\t|\n", prediction, groundTruth);
}
System.out.println(">>> ---------------------------------");
System.out.println(">>> KMeans clustering algorithm over cached dataset usage example completed.");
}
} finally {
if (dataCache != null)
dataCache.destroy();
}
} finally {
System.out.flush();
}
}
use of org.apache.ignite.ml.clustering.kmeans.KMeansModel in project ignite by apache.
the class KMeansFromSparkExample method main.
/**
* Run example.
*/
public static void main(String[] args) throws FileNotFoundException {
System.out.println();
System.out.println(">>> K-means model loaded from Spark through serialization over partitioned dataset usage example started.");
// Start ignite grid.
try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) {
System.out.println(">>> Ignite grid started.");
IgniteCache<Integer, Vector> dataCache = null;
try {
dataCache = TitanicUtils.readPassengers(ignite);
final Vectorizer<Integer, Vector, Integer, Double> vectorizer = new DummyVectorizer<Integer>(0, 5, 6, 4).labeled(1);
KMeansModel mdl = (KMeansModel) SparkModelParser.parse(SPARK_MDL_PATH, SupportedSparkModels.KMEANS, env);
System.out.println(">>> K-Means model: " + mdl);
System.out.println(">>> ------------------------------------");
System.out.println(">>> | Predicted cluster\t| Is survived\t|");
System.out.println(">>> ------------------------------------");
try (QueryCursor<Cache.Entry<Integer, Vector>> observations = dataCache.query(new ScanQuery<>())) {
for (Cache.Entry<Integer, Vector> observation : observations) {
LabeledVector<Double> lv = vectorizer.apply(observation.getKey(), observation.getValue());
Vector inputs = lv.features();
double isSurvived = lv.label();
double clusterId = mdl.predict(inputs);
System.out.printf(">>> | %.4f\t\t| %.4f\t\t|\n", clusterId, isSurvived);
}
}
System.out.println(">>> ---------------------------------");
} finally {
dataCache.destroy();
}
}
}
use of org.apache.ignite.ml.clustering.kmeans.KMeansModel in project ignite by apache.
the class LocalModelsTest method importExportKMeansModelTest.
/**
*/
@Test
public void importExportKMeansModelTest() throws IOException {
executeModelTest(mdlFilePath -> {
KMeansModel mdl = getClusterModel();
Exporter<KMeansModelFormat, String> exporter = new FileExporter<>();
mdl.saveModel(exporter, mdlFilePath);
KMeansModelFormat load = exporter.load(mdlFilePath);
Assert.assertNotNull(load);
KMeansModel importedMdl = new KMeansModel(load.getCenters(), load.getDistance());
Assert.assertEquals("", mdl, importedMdl);
return null;
});
}
Aggregations