Search in sources :

Example 6 with KMeansTrainer

use of org.apache.ignite.ml.clustering.kmeans.KMeansTrainer in project ignite by apache.

the class TrainingWithBinaryObjectExample method main.

/**
 * Run example.
 */
public static void main(String[] args) {
    System.out.println();
    System.out.println(">>> Model training over cached dataset with binary objects usage example started.");
    // Start ignite grid.
    try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) {
        System.out.println(">>> Ignite grid started.");
        IgniteCache<Integer, BinaryObject> dataCache = null;
        try {
            dataCache = populateCache(ignite);
            // Create dataset builder with enabled support of keeping binary for upstream cache.
            CacheBasedDatasetBuilder<Integer, BinaryObject> datasetBuilder = new CacheBasedDatasetBuilder<>(ignite, dataCache).withKeepBinary(true);
            Vectorizer<Integer, BinaryObject, String, Double> vectorizer = new BinaryObjectVectorizer<Integer>("feature1").labeled("label");
            KMeansTrainer trainer = new KMeansTrainer();
            KMeansModel mdl = trainer.fit(datasetBuilder, vectorizer);
            System.out.println(">>> Model trained over binary objects. Model " + mdl);
        } finally {
            dataCache.destroy();
        }
    } finally {
        System.out.flush();
    }
}
Also used : KMeansModel(org.apache.ignite.ml.clustering.kmeans.KMeansModel) BinaryObject(org.apache.ignite.binary.BinaryObject) KMeansTrainer(org.apache.ignite.ml.clustering.kmeans.KMeansTrainer) Ignite(org.apache.ignite.Ignite)

Example 7 with KMeansTrainer

use of org.apache.ignite.ml.clustering.kmeans.KMeansTrainer in project ignite by apache.

the class ANNClassificationTrainer method getCentroids.

/**
 * Perform KMeans clusterization algorithm to find centroids.
 *
 * @param vectorizer Upstream vectorizer.
 * @param datasetBuilder The dataset builder.
 * @param <K> Type of a key in {@code upstream} data.
 * @param <V> Type of a value in {@code upstream} data.
 * @return The arrays of vectors.
 */
private <K, V, C extends Serializable> List<Vector> getCentroids(Preprocessor<K, V> vectorizer, DatasetBuilder<K, V> datasetBuilder) {
    KMeansTrainer trainer = new KMeansTrainer().withAmountOfClusters(k).withMaxIterations(maxIterations).withDistance(distance).withEpsilon(epsilon);
    KMeansModel mdl = trainer.fit(datasetBuilder, vectorizer);
    return Arrays.asList(mdl.centers());
}
Also used : KMeansModel(org.apache.ignite.ml.clustering.kmeans.KMeansModel) KMeansTrainer(org.apache.ignite.ml.clustering.kmeans.KMeansTrainer)

Example 8 with KMeansTrainer

use of org.apache.ignite.ml.clustering.kmeans.KMeansTrainer in project ignite by apache.

the class KMeansTrainerTest method testUpdateMdl.

/**
 */
@Test
public void testUpdateMdl() {
    KMeansTrainer trainer = createAndCheckTrainer();
    Vectorizer<Integer, double[], Integer, Double> vectorizer = new DoubleArrayVectorizer<Integer>().labeled(Vectorizer.LabelCoordinate.LAST);
    KMeansModel originalMdl = trainer.withAmountOfClusters(1).fit(new LocalDatasetBuilder<>(data, parts), vectorizer);
    KMeansModel updatedMdlOnSameDataset = trainer.update(originalMdl, new LocalDatasetBuilder<>(data, parts), vectorizer);
    KMeansModel updatedMdlOnEmptyDataset = trainer.update(originalMdl, new LocalDatasetBuilder<>(new HashMap<>(), parts), vectorizer);
    Vector firstVector = new DenseVector(new double[] { 2.0, 2.0 });
    Vector secondVector = new DenseVector(new double[] { -2.0, -2.0 });
    assertEquals(originalMdl.predict(firstVector), updatedMdlOnSameDataset.predict(firstVector), PRECISION);
    assertEquals(originalMdl.predict(secondVector), updatedMdlOnSameDataset.predict(secondVector), PRECISION);
    assertEquals(originalMdl.predict(firstVector), updatedMdlOnEmptyDataset.predict(firstVector), PRECISION);
    assertEquals(originalMdl.predict(secondVector), updatedMdlOnEmptyDataset.predict(secondVector), PRECISION);
}
Also used : KMeansModel(org.apache.ignite.ml.clustering.kmeans.KMeansModel) HashMap(java.util.HashMap) KMeansTrainer(org.apache.ignite.ml.clustering.kmeans.KMeansTrainer) Vector(org.apache.ignite.ml.math.primitives.vector.Vector) DenseVector(org.apache.ignite.ml.math.primitives.vector.impl.DenseVector) DenseVector(org.apache.ignite.ml.math.primitives.vector.impl.DenseVector) TrainerTest(org.apache.ignite.ml.common.TrainerTest) Test(org.junit.Test)

Example 9 with KMeansTrainer

use of org.apache.ignite.ml.clustering.kmeans.KMeansTrainer in project ignite by apache.

the class KMeansTrainerTest method findOneClusters.

/**
 * A few points, one cluster, one iteration
 */
@Test
public void findOneClusters() {
    KMeansTrainer trainer = createAndCheckTrainer();
    KMeansModel knnMdl = trainer.withAmountOfClusters(1).fit(new LocalDatasetBuilder<>(data, parts), new DoubleArrayVectorizer<Integer>().labeled(Vectorizer.LabelCoordinate.LAST));
    Vector firstVector = new DenseVector(new double[] { 2.0, 2.0 });
    assertEquals(knnMdl.predict(firstVector), 0.0, PRECISION);
    Vector secondVector = new DenseVector(new double[] { -2.0, -2.0 });
    assertEquals(knnMdl.predict(secondVector), 0.0, PRECISION);
    assertEquals(trainer.getMaxIterations(), 1);
    assertEquals(trainer.getEpsilon(), PRECISION, PRECISION);
}
Also used : KMeansModel(org.apache.ignite.ml.clustering.kmeans.KMeansModel) DoubleArrayVectorizer(org.apache.ignite.ml.dataset.feature.extractor.impl.DoubleArrayVectorizer) KMeansTrainer(org.apache.ignite.ml.clustering.kmeans.KMeansTrainer) Vector(org.apache.ignite.ml.math.primitives.vector.Vector) DenseVector(org.apache.ignite.ml.math.primitives.vector.impl.DenseVector) DenseVector(org.apache.ignite.ml.math.primitives.vector.impl.DenseVector) TrainerTest(org.apache.ignite.ml.common.TrainerTest) Test(org.junit.Test)

Example 10 with KMeansTrainer

use of org.apache.ignite.ml.clustering.kmeans.KMeansTrainer in project ignite by apache.

the class KMeansTrainerTest method createAndCheckTrainer.

/**
 */
@NotNull
private KMeansTrainer createAndCheckTrainer() {
    KMeansTrainer trainer = new KMeansTrainer().withDistance(new EuclideanDistance()).withAmountOfClusters(10).withMaxIterations(1).withEpsilon(PRECISION);
    assertEquals(10, trainer.getAmountOfClusters());
    assertTrue(trainer.getDistance() instanceof EuclideanDistance);
    return trainer;
}
Also used : EuclideanDistance(org.apache.ignite.ml.math.distances.EuclideanDistance) KMeansTrainer(org.apache.ignite.ml.clustering.kmeans.KMeansTrainer) NotNull(org.jetbrains.annotations.NotNull)

Aggregations

KMeansTrainer (org.apache.ignite.ml.clustering.kmeans.KMeansTrainer)10 KMeansModel (org.apache.ignite.ml.clustering.kmeans.KMeansModel)8 Vector (org.apache.ignite.ml.math.primitives.vector.Vector)5 Ignite (org.apache.ignite.Ignite)4 SandboxMLCache (org.apache.ignite.examples.ml.util.SandboxMLCache)3 Test (org.junit.Test)3 HashMap (java.util.HashMap)2 BinaryObject (org.apache.ignite.binary.BinaryObject)2 TrainerTest (org.apache.ignite.ml.common.TrainerTest)2 DoubleArrayVectorizer (org.apache.ignite.ml.dataset.feature.extractor.impl.DoubleArrayVectorizer)2 EuclideanDistance (org.apache.ignite.ml.math.distances.EuclideanDistance)2 DenseVector (org.apache.ignite.ml.math.primitives.vector.impl.DenseVector)2 Path (java.nio.file.Path)1 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)1 Cache (javax.cache.Cache)1 IgniteCache (org.apache.ignite.IgniteCache)1 BinaryObjectVectorizer (org.apache.ignite.ml.dataset.feature.extractor.impl.BinaryObjectVectorizer)1 WeightedMinkowskiDistance (org.apache.ignite.ml.math.distances.WeightedMinkowskiDistance)1 LabeledVector (org.apache.ignite.ml.structures.LabeledVector)1 GridCommonAbstractTest (org.apache.ignite.testframework.junits.common.GridCommonAbstractTest)1