Search in sources :

Example 1 with Vector

use of org.apache.ignite.ml.math.Vector in project ignite by apache.

the class KMeansDistributedClustererTest method testClusterizationOnDatasetWithObviousStructure.

/** */
@Test
public void testClusterizationOnDatasetWithObviousStructure() throws IOException {
    IgniteUtils.setCurrentIgniteName(ignite.configuration().getIgniteInstanceName());
    int ptsCnt = 10000;
    int squareSideLen = 10000;
    Random rnd = new Random(123456L);
    // Let centers be in the vertices of square.
    Map<Integer, Vector> centers = new HashMap<>();
    centers.put(100, new DenseLocalOnHeapVector(new double[] { 0.0, 0.0 }));
    centers.put(900, new DenseLocalOnHeapVector(new double[] { squareSideLen, 0.0 }));
    centers.put(3000, new DenseLocalOnHeapVector(new double[] { 0.0, squareSideLen }));
    centers.put(6000, new DenseLocalOnHeapVector(new double[] { squareSideLen, squareSideLen }));
    int centersCnt = centers.size();
    SparseDistributedMatrix points = new SparseDistributedMatrix(ptsCnt, 2, StorageConstants.ROW_STORAGE_MODE, StorageConstants.RANDOM_ACCESS_MODE);
    List<Integer> permutation = IntStream.range(0, ptsCnt).boxed().collect(Collectors.toList());
    Collections.shuffle(permutation, rnd);
    Vector[] mc = new Vector[centersCnt];
    Arrays.fill(mc, VectorUtils.zeroes(2));
    int centIndex = 0;
    int totalCount = 0;
    List<Vector> massCenters = new ArrayList<>();
    for (Integer count : centers.keySet()) {
        for (int i = 0; i < count; i++) {
            DenseLocalOnHeapVector pnt = (DenseLocalOnHeapVector) new DenseLocalOnHeapVector(2).assign(centers.get(count));
            // pertrubate point on random value.
            pnt.map(val -> val + rnd.nextDouble() * squareSideLen / 100);
            mc[centIndex] = mc[centIndex].plus(pnt);
            points.assignRow(permutation.get(totalCount), pnt);
            totalCount++;
        }
        massCenters.add(mc[centIndex].times(1 / (double) count));
        centIndex++;
    }
    EuclideanDistance dist = new EuclideanDistance();
    OrderedNodesComparator comp = new OrderedNodesComparator(centers.values().toArray(new Vector[] {}), dist);
    massCenters.sort(comp);
    KMeansDistributedClusterer clusterer = new KMeansDistributedClusterer(dist, 3, 100, 1L);
    KMeansModel mdl = clusterer.cluster(points, 4);
    Vector[] resCenters = mdl.centers();
    Arrays.sort(resCenters, comp);
    checkIsInEpsilonNeighbourhood(resCenters, massCenters.toArray(new Vector[] {}), 30.0);
}
Also used : SparseDistributedMatrix(org.apache.ignite.ml.math.impls.matrix.SparseDistributedMatrix) DenseLocalOnHeapVector(org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector) Vector(org.apache.ignite.ml.math.Vector) DenseLocalOnHeapVector(org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector) GridCommonAbstractTest(org.apache.ignite.testframework.junits.common.GridCommonAbstractTest) Test(org.junit.Test)

Example 2 with Vector

use of org.apache.ignite.ml.math.Vector in project ignite by apache.

the class KMeansLocalClustererExample method main.

/**
 * Executes example.
 *
 * @param args Command line arguments, none required.
 */
public static void main(String[] args) {
    // IMPL NOTE based on KMeansDistributedClustererTestSingleNode#testClusterizationOnDatasetWithObviousStructure
    System.out.println(">>> K-means local clusterer example started.");
    int ptsCnt = 10000;
    DenseLocalOnHeapMatrix points = new DenseLocalOnHeapMatrix(ptsCnt, 2);
    DatasetWithObviousStructure dataset = new DatasetWithObviousStructure(10000);
    List<Vector> massCenters = dataset.generate(points);
    EuclideanDistance dist = new EuclideanDistance();
    OrderedNodesComparator comp = new OrderedNodesComparator(dataset.centers().values().toArray(new Vector[] {}), dist);
    massCenters.sort(comp);
    KMeansLocalClusterer clusterer = new KMeansLocalClusterer(dist, 100, 1L);
    KMeansModel mdl = clusterer.cluster(points, 4);
    Vector[] resCenters = mdl.centers();
    Arrays.sort(resCenters, comp);
    System.out.println("Mass centers:");
    massCenters.forEach(Tracer::showAscii);
    System.out.println("Cluster centers:");
    Arrays.asList(resCenters).forEach(Tracer::showAscii);
    System.out.println("\n>>> K-means local clusterer example completed.");
}
Also used : EuclideanDistance(org.apache.ignite.ml.math.distances.EuclideanDistance) KMeansModel(org.apache.ignite.ml.clustering.KMeansModel) Tracer(org.apache.ignite.ml.math.Tracer) DenseLocalOnHeapMatrix(org.apache.ignite.ml.math.impls.matrix.DenseLocalOnHeapMatrix) Vector(org.apache.ignite.ml.math.Vector) KMeansLocalClusterer(org.apache.ignite.ml.clustering.KMeansLocalClusterer)

Example 3 with Vector

use of org.apache.ignite.ml.math.Vector in project ignite by apache.

the class OffHeapVectorExample method main.

/**
 * Executes example.
 *
 * @param args Command line arguments, none required.
 */
public static void main(String[] args) {
    System.out.println();
    System.out.println(">>> Off-heap vector API usage example started.");
    System.out.println("\n>>> Creating perpendicular off-heap vectors.");
    double[] data1 = new double[] { 1, 0, 3, 0, 5, 0 };
    double[] data2 = new double[] { 0, 2, 0, 4, 0, 6 };
    Vector v1 = new DenseLocalOffHeapVector(data1.length);
    Vector v2 = new DenseLocalOffHeapVector(data2.length);
    v1.assign(data1);
    v2.assign(data2);
    System.out.println(">>> First vector: " + Arrays.toString(data1));
    System.out.println(">>> Second vector: " + Arrays.toString(data2));
    double dotProduct = v1.dot(v2);
    boolean dotProductIsAsExp = dotProduct == 0;
    System.out.println("\n>>> Dot product of vectors: [" + dotProduct + "], it is 0 as expected: [" + dotProductIsAsExp + "].");
    assert dotProductIsAsExp : "Expect dot product of perpendicular vectors to be 0.";
    Vector hypotenuse = v1.plus(v2);
    System.out.println("\n>>> Hypotenuse (sum of vectors): " + Arrays.toString(hypotenuse.getStorage().data()));
    double lenSquared1 = v1.getLengthSquared();
    double lenSquared2 = v2.getLengthSquared();
    double lenSquaredHypotenuse = hypotenuse.getLengthSquared();
    boolean lenSquaredHypotenuseIsAsExp = lenSquaredHypotenuse == lenSquared1 + lenSquared2;
    System.out.println(">>> Squared length of first vector: [" + lenSquared1 + "].");
    System.out.println(">>> Squared length of second vector: [" + lenSquared2 + "].");
    System.out.println(">>> Squared length of hypotenuse: [" + lenSquaredHypotenuse + "], equals sum of squared lengths of two original vectors as expected: [" + lenSquaredHypotenuseIsAsExp + "].");
    assert lenSquaredHypotenuseIsAsExp : "Expect squared length of hypotenuse to be as per Pythagorean theorem.";
    System.out.println("\n>>> Off-heap vector API usage example completed.");
}
Also used : DenseLocalOffHeapVector(org.apache.ignite.ml.math.impls.vector.DenseLocalOffHeapVector) Vector(org.apache.ignite.ml.math.Vector) DenseLocalOffHeapVector(org.apache.ignite.ml.math.impls.vector.DenseLocalOffHeapVector)

Example 4 with Vector

use of org.apache.ignite.ml.math.Vector in project ignite by apache.

the class DistributedLinearRegressionWithSGDTrainerExample method main.

/**
 * Run example.
 */
public static void main(String[] args) throws InterruptedException {
    System.out.println();
    System.out.println(">>> Linear regression model over sparse distributed matrix API usage example started.");
    // Start ignite grid.
    try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) {
        System.out.println(">>> Ignite grid started.");
        // Create IgniteThread, we must work with SparseDistributedMatrix inside IgniteThread
        // because we create ignite cache internally.
        IgniteThread igniteThread = new IgniteThread(ignite.configuration().getIgniteInstanceName(), SparseDistributedMatrixExample.class.getSimpleName(), () -> {
            // Create SparseDistributedMatrix, new cache will be created automagically.
            System.out.println(">>> Create new SparseDistributedMatrix inside IgniteThread.");
            SparseDistributedMatrix distributedMatrix = new SparseDistributedMatrix(data);
            System.out.println(">>> Create new linear regression trainer object.");
            Trainer<LinearRegressionModel, Matrix> trainer = new LinearRegressionSGDTrainer(100_000, 1e-12);
            System.out.println(">>> Perform the training to get the model.");
            LinearRegressionModel model = trainer.train(distributedMatrix);
            System.out.println(">>> Linear regression model: " + model);
            System.out.println(">>> ---------------------------------");
            System.out.println(">>> | Prediction\t| Ground Truth\t|");
            System.out.println(">>> ---------------------------------");
            for (double[] observation : data) {
                Vector inputs = new SparseDistributedVector(Arrays.copyOfRange(observation, 1, observation.length));
                double prediction = model.apply(inputs);
                double groundTruth = observation[0];
                System.out.printf(">>> | %.4f\t\t| %.4f\t\t|\n", prediction, groundTruth);
            }
            System.out.println(">>> ---------------------------------");
        });
        igniteThread.start();
        igniteThread.join();
    }
}
Also used : SparseDistributedMatrix(org.apache.ignite.ml.math.impls.matrix.SparseDistributedMatrix) SparseDistributedMatrixExample(org.apache.ignite.examples.ml.math.matrix.SparseDistributedMatrixExample) SparseDistributedMatrix(org.apache.ignite.ml.math.impls.matrix.SparseDistributedMatrix) Matrix(org.apache.ignite.ml.math.Matrix) LinearRegressionModel(org.apache.ignite.ml.regressions.linear.LinearRegressionModel) LinearRegressionSGDTrainer(org.apache.ignite.ml.regressions.linear.LinearRegressionSGDTrainer) SparseDistributedVector(org.apache.ignite.ml.math.impls.vector.SparseDistributedVector) Ignite(org.apache.ignite.Ignite) IgniteThread(org.apache.ignite.thread.IgniteThread) SparseDistributedVector(org.apache.ignite.ml.math.impls.vector.SparseDistributedVector) Vector(org.apache.ignite.ml.math.Vector)

Example 5 with Vector

use of org.apache.ignite.ml.math.Vector in project ignite by apache.

the class DecisionTreesExample method main.

/**
 * Launches example.
 *
 * @param args Program arguments.
 */
public static void main(String[] args) throws IOException {
    System.out.println(">>> Decision trees example started.");
    String igniteCfgPath;
    CommandLineParser parser = new BasicParser();
    String trainingImagesPath;
    String trainingLabelsPath;
    String testImagesPath;
    String testLabelsPath;
    Map<String, String> mnistPaths = new HashMap<>();
    mnistPaths.put(MNIST_TRAIN_IMAGES, "train-images-idx3-ubyte");
    mnistPaths.put(MNIST_TRAIN_LABELS, "train-labels-idx1-ubyte");
    mnistPaths.put(MNIST_TEST_IMAGES, "t10k-images-idx3-ubyte");
    mnistPaths.put(MNIST_TEST_LABELS, "t10k-labels-idx1-ubyte");
    try {
        // Parse the command line arguments.
        CommandLine line = parser.parse(buildOptions(), args);
        if (line.hasOption(MLExamplesCommonArgs.UNATTENDED)) {
            System.out.println(">>> Skipped example execution because 'unattended' mode is used.");
            System.out.println(">>> Decision trees example finished.");
            return;
        }
        igniteCfgPath = line.getOptionValue(CONFIG, DEFAULT_CONFIG);
    } catch (ParseException e) {
        e.printStackTrace();
        return;
    }
    if (!getMNIST(mnistPaths.values())) {
        System.out.println(">>> You should have MNIST dataset in " + MNIST_DIR + " to run this example.");
        return;
    }
    trainingImagesPath = Objects.requireNonNull(IgniteUtils.resolveIgnitePath(MNIST_DIR + "/" + mnistPaths.get(MNIST_TRAIN_IMAGES))).getPath();
    trainingLabelsPath = Objects.requireNonNull(IgniteUtils.resolveIgnitePath(MNIST_DIR + "/" + mnistPaths.get(MNIST_TRAIN_LABELS))).getPath();
    testImagesPath = Objects.requireNonNull(IgniteUtils.resolveIgnitePath(MNIST_DIR + "/" + mnistPaths.get(MNIST_TEST_IMAGES))).getPath();
    testLabelsPath = Objects.requireNonNull(IgniteUtils.resolveIgnitePath(MNIST_DIR + "/" + mnistPaths.get(MNIST_TEST_LABELS))).getPath();
    try (Ignite ignite = Ignition.start(igniteCfgPath)) {
        IgniteUtils.setCurrentIgniteName(ignite.configuration().getIgniteInstanceName());
        int ptsCnt = 60000;
        int featCnt = 28 * 28;
        Stream<DenseLocalOnHeapVector> trainingMnistStream = MnistUtils.mnist(trainingImagesPath, trainingLabelsPath, new Random(123L), ptsCnt);
        Stream<DenseLocalOnHeapVector> testMnistStream = MnistUtils.mnist(testImagesPath, testLabelsPath, new Random(123L), 10_000);
        IgniteCache<BiIndex, Double> cache = createBiIndexedCache(ignite);
        loadVectorsIntoBiIndexedCache(cache.getName(), trainingMnistStream.iterator(), featCnt + 1, ignite);
        ColumnDecisionTreeTrainer<GiniSplitCalculator.GiniData> trainer = new ColumnDecisionTreeTrainer<>(10, ContinuousSplitCalculators.GINI.apply(ignite), RegionCalculators.GINI, RegionCalculators.MOST_COMMON, ignite);
        System.out.println(">>> Training started");
        long before = System.currentTimeMillis();
        DecisionTreeModel mdl = trainer.train(new BiIndexedCacheColumnDecisionTreeTrainerInput(cache, new HashMap<>(), ptsCnt, featCnt));
        System.out.println(">>> Training finished in " + (System.currentTimeMillis() - before));
        IgniteTriFunction<Model<Vector, Double>, Stream<IgniteBiTuple<Vector, Double>>, Function<Double, Double>, Double> mse = Estimators.errorsPercentage();
        Double accuracy = mse.apply(mdl, testMnistStream.map(v -> new IgniteBiTuple<>(v.viewPart(0, featCnt), v.getX(featCnt))), Function.identity());
        System.out.println(">>> Errs percentage: " + accuracy);
    } catch (IOException e) {
        e.printStackTrace();
    }
    System.out.println(">>> Decision trees example finished.");
}
Also used : GZIPInputStream(java.util.zip.GZIPInputStream) URL(java.net.URL) Scanner(java.util.Scanner) Random(java.util.Random) BiIndex(org.apache.ignite.ml.trees.trainers.columnbased.BiIndex) ExampleNodeStartup(org.apache.ignite.examples.ExampleNodeStartup) Vector(org.apache.ignite.ml.math.Vector) Estimators(org.apache.ignite.ml.estimators.Estimators) Map(java.util.Map) DenseLocalOnHeapVector(org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector) Collection(java.util.Collection) IgniteTriFunction(org.apache.ignite.ml.math.functions.IgniteTriFunction) Collectors(java.util.stream.Collectors) IgniteCache(org.apache.ignite.IgniteCache) IgniteBiTuple(org.apache.ignite.lang.IgniteBiTuple) Objects(java.util.Objects) List(java.util.List) Stream(java.util.stream.Stream) ParseException(org.apache.commons.cli.ParseException) RegionCalculators(org.apache.ignite.ml.trees.trainers.columnbased.regcalcs.RegionCalculators) NotNull(org.jetbrains.annotations.NotNull) DecisionTreeModel(org.apache.ignite.ml.trees.models.DecisionTreeModel) Model(org.apache.ignite.ml.Model) Options(org.apache.commons.cli.Options) HashMap(java.util.HashMap) Function(java.util.function.Function) GiniSplitCalculator(org.apache.ignite.ml.trees.trainers.columnbased.contsplitcalcs.GiniSplitCalculator) BiIndexedCacheColumnDecisionTreeTrainerInput(org.apache.ignite.ml.trees.trainers.columnbased.BiIndexedCacheColumnDecisionTreeTrainerInput) OptionBuilder(org.apache.commons.cli.OptionBuilder) CacheWriteSynchronizationMode(org.apache.ignite.cache.CacheWriteSynchronizationMode) IgniteUtils(org.apache.ignite.internal.util.IgniteUtils) BasicParser(org.apache.commons.cli.BasicParser) CommandLine(org.apache.commons.cli.CommandLine) MnistUtils(org.apache.ignite.ml.util.MnistUtils) Option(org.apache.commons.cli.Option) ReadableByteChannel(java.nio.channels.ReadableByteChannel) Iterator(java.util.Iterator) ContinuousSplitCalculators(org.apache.ignite.ml.trees.trainers.columnbased.contsplitcalcs.ContinuousSplitCalculators) CommandLineParser(org.apache.commons.cli.CommandLineParser) Channels(java.nio.channels.Channels) FileOutputStream(java.io.FileOutputStream) IOException(java.io.IOException) FileInputStream(java.io.FileInputStream) Ignite(org.apache.ignite.Ignite) MLExamplesCommonArgs(org.apache.ignite.examples.ml.MLExamplesCommonArgs) Ignition(org.apache.ignite.Ignition) CacheConfiguration(org.apache.ignite.configuration.CacheConfiguration) IgniteDataStreamer(org.apache.ignite.IgniteDataStreamer) ColumnDecisionTreeTrainer(org.apache.ignite.ml.trees.trainers.columnbased.ColumnDecisionTreeTrainer) HashMap(java.util.HashMap) IgniteBiTuple(org.apache.ignite.lang.IgniteBiTuple) DecisionTreeModel(org.apache.ignite.ml.trees.models.DecisionTreeModel) IgniteTriFunction(org.apache.ignite.ml.math.functions.IgniteTriFunction) Function(java.util.function.Function) Random(java.util.Random) Ignite(org.apache.ignite.Ignite) GZIPInputStream(java.util.zip.GZIPInputStream) Stream(java.util.stream.Stream) FileOutputStream(java.io.FileOutputStream) FileInputStream(java.io.FileInputStream) CommandLineParser(org.apache.commons.cli.CommandLineParser) Vector(org.apache.ignite.ml.math.Vector) DenseLocalOnHeapVector(org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector) ColumnDecisionTreeTrainer(org.apache.ignite.ml.trees.trainers.columnbased.ColumnDecisionTreeTrainer) BiIndex(org.apache.ignite.ml.trees.trainers.columnbased.BiIndex) IOException(java.io.IOException) BiIndexedCacheColumnDecisionTreeTrainerInput(org.apache.ignite.ml.trees.trainers.columnbased.BiIndexedCacheColumnDecisionTreeTrainerInput) BasicParser(org.apache.commons.cli.BasicParser) CommandLine(org.apache.commons.cli.CommandLine) DecisionTreeModel(org.apache.ignite.ml.trees.models.DecisionTreeModel) Model(org.apache.ignite.ml.Model) ParseException(org.apache.commons.cli.ParseException) DenseLocalOnHeapVector(org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector)

Aggregations

Vector (org.apache.ignite.ml.math.Vector)116 DenseLocalOnHeapVector (org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector)62 Test (org.junit.Test)29 EuclideanDistance (org.apache.ignite.ml.math.distances.EuclideanDistance)20 DenseLocalOnHeapMatrix (org.apache.ignite.ml.math.impls.matrix.DenseLocalOnHeapMatrix)20 Matrix (org.apache.ignite.ml.math.Matrix)19 SparseDistributedMatrix (org.apache.ignite.ml.math.impls.matrix.SparseDistributedMatrix)17 Random (java.util.Random)12 ArrayList (java.util.ArrayList)11 DistanceMeasure (org.apache.ignite.ml.math.distances.DistanceMeasure)10 Arrays (java.util.Arrays)9 Ignite (org.apache.ignite.Ignite)9 SparseDistributedMatrixStorage (org.apache.ignite.ml.math.impls.storage.matrix.SparseDistributedMatrixStorage)9 LabeledDataset (org.apache.ignite.ml.structures.LabeledDataset)9 UUID (java.util.UUID)8 Collections (java.util.Collections)7 List (java.util.List)7 MathIllegalArgumentException (org.apache.ignite.ml.math.exceptions.MathIllegalArgumentException)7 DenseLocalOffHeapVector (org.apache.ignite.ml.math.impls.vector.DenseLocalOffHeapVector)7 HashMap (java.util.HashMap)6