Search in sources :

Example 11 with LabeledDataset

use of org.apache.ignite.ml.structures.LabeledDataset in project ignite by apache.

the class LabeledDatasetTest method testFeatureNames.

/**
 */
public void testFeatureNames() {
    IgniteUtils.setCurrentIgniteName(ignite.configuration().getIgniteInstanceName());
    double[][] mtx = new double[][] { { 1.0, 1.0 }, { 1.0, 2.0 }, { 2.0, 1.0 }, { -1.0, -1.0 }, { -1.0, -2.0 }, { -2.0, -1.0 } };
    double[] lbs = new double[] { 1.0, 1.0, 1.0, 2.0, 2.0, 2.0 };
    String[] featureNames = new String[] { "x", "y" };
    final LabeledDataset dataset = new LabeledDataset(mtx, lbs, featureNames, false);
    assertEquals(dataset.getFeatureName(0), "x");
}
Also used : LabeledDataset(org.apache.ignite.ml.structures.LabeledDataset)

Example 12 with LabeledDataset

use of org.apache.ignite.ml.structures.LabeledDataset in project ignite by apache.

the class LabeledDatasetTest method testSplitting.

/**
 */
public void testSplitting() {
    IgniteUtils.setCurrentIgniteName(ignite.configuration().getIgniteInstanceName());
    double[][] mtx = new double[][] { { 1.0, 1.0 }, { 1.0, 2.0 }, { 2.0, 1.0 }, { -1.0, -1.0 }, { -1.0, -2.0 }, { -2.0, -1.0 } };
    double[] lbs = new double[] { 1.0, 1.0, 1.0, 2.0, 2.0, 2.0 };
    LabeledDataset training = new LabeledDataset(mtx, lbs);
    LabeledDatasetTestTrainPair split1 = new LabeledDatasetTestTrainPair(training, 0.67);
    assertEquals(4, split1.test().rowSize());
    assertEquals(2, split1.train().rowSize());
    LabeledDatasetTestTrainPair split2 = new LabeledDatasetTestTrainPair(training, 0.65);
    assertEquals(3, split2.test().rowSize());
    assertEquals(3, split2.train().rowSize());
    LabeledDatasetTestTrainPair split3 = new LabeledDatasetTestTrainPair(training, 0.4);
    assertEquals(2, split3.test().rowSize());
    assertEquals(4, split3.train().rowSize());
    LabeledDatasetTestTrainPair split4 = new LabeledDatasetTestTrainPair(training, 0.3);
    assertEquals(1, split4.test().rowSize());
    assertEquals(5, split4.train().rowSize());
}
Also used : LabeledDatasetTestTrainPair(org.apache.ignite.ml.structures.LabeledDatasetTestTrainPair) LabeledDataset(org.apache.ignite.ml.structures.LabeledDataset)

Example 13 with LabeledDataset

use of org.apache.ignite.ml.structures.LabeledDataset in project ignite by apache.

the class KNNClassificationExample method main.

/**
 * Executes example.
 *
 * @param args Command line arguments, none required.
 */
public static void main(String[] args) throws InterruptedException {
    System.out.println(">>> kNN classification example started.");
    // Start ignite grid.
    try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) {
        System.out.println(">>> Ignite grid started.");
        IgniteThread igniteThread = new IgniteThread(ignite.configuration().getIgniteInstanceName(), KNNClassificationExample.class.getSimpleName(), () -> {
            try {
                // Prepare path to read
                File file = IgniteUtils.resolveIgnitePath(KNN_IRIS_TXT);
                if (file == null)
                    throw new RuntimeException("Can't find file: " + KNN_IRIS_TXT);
                Path path = file.toPath();
                // Read dataset from file
                LabeledDataset dataset = LabeledDatasetLoader.loadFromTxtFile(path, SEPARATOR, true, false);
                // Random splitting of iris data as 70% train and 30% test datasets
                LabeledDatasetTestTrainPair split = new LabeledDatasetTestTrainPair(dataset, 0.3);
                System.out.println("\n>>> Amount of observations in train dataset " + split.train().rowSize());
                System.out.println("\n>>> Amount of observations in test dataset " + split.test().rowSize());
                LabeledDataset test = split.test();
                LabeledDataset train = split.train();
                KNNModel knnMdl = new KNNModel(5, new EuclideanDistance(), KNNStrategy.SIMPLE, train);
                // Clone labels
                final double[] labels = test.labels();
                // Save predicted classes to test dataset
                LabellingMachine.assignLabels(test, knnMdl);
                // Calculate amount of errors on test dataset
                int amountOfErrors = 0;
                for (int i = 0; i < test.rowSize(); i++) {
                    if (test.label(i) != labels[i])
                        amountOfErrors++;
                }
                System.out.println("\n>>> Absolute amount of errors " + amountOfErrors);
                System.out.println("\n>>> Accuracy " + amountOfErrors / (double) test.rowSize());
                // Build confusion matrix. See https://en.wikipedia.org/wiki/Confusion_matrix
                int[][] confusionMtx = { { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 } };
                for (int i = 0; i < test.rowSize(); i++) {
                    int idx1 = (int) test.label(i);
                    int idx2 = (int) labels[i];
                    confusionMtx[idx1 - 1][idx2 - 1]++;
                }
                System.out.println("\n>>> Confusion matrix is " + Arrays.deepToString(confusionMtx));
                // Calculate precision, recall and F-metric for each class
                for (int i = 0; i < 3; i++) {
                    double precision = 0.0;
                    for (int j = 0; j < 3; j++) precision += confusionMtx[i][j];
                    precision = confusionMtx[i][i] / precision;
                    double clsLb = (double) (i + 1);
                    System.out.println("\n>>> Precision for class " + clsLb + " is " + precision);
                    double recall = 0.0;
                    for (int j = 0; j < 3; j++) recall += confusionMtx[j][i];
                    recall = confusionMtx[i][i] / recall;
                    System.out.println("\n>>> Recall for class " + clsLb + " is " + recall);
                    double fScore = 2 * precision * recall / (precision + recall);
                    System.out.println("\n>>> F-score for class " + clsLb + " is " + fScore);
                }
            } catch (IOException e) {
                e.printStackTrace();
                System.out.println("\n>>> Unexpected exception, check resources: " + e);
            } finally {
                System.out.println("\n>>> kNN classification example completed.");
            }
        });
        igniteThread.start();
        igniteThread.join();
    }
}
Also used : Path(java.nio.file.Path) IOException(java.io.IOException) LabeledDataset(org.apache.ignite.ml.structures.LabeledDataset) EuclideanDistance(org.apache.ignite.ml.math.distances.EuclideanDistance) LabeledDatasetTestTrainPair(org.apache.ignite.ml.structures.LabeledDatasetTestTrainPair) KNNModel(org.apache.ignite.ml.knn.models.KNNModel) Ignite(org.apache.ignite.Ignite) IgniteThread(org.apache.ignite.thread.IgniteThread) File(java.io.File)

Example 14 with LabeledDataset

use of org.apache.ignite.ml.structures.LabeledDataset in project ignite by apache.

the class KNNRegressionExample method main.

/**
 * Executes example.
 *
 * @param args Command line arguments, none required.
 */
public static void main(String[] args) throws InterruptedException {
    System.out.println(">>> kNN regression example started.");
    // Start ignite grid.
    try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) {
        System.out.println(">>> Ignite grid started.");
        IgniteThread igniteThread = new IgniteThread(ignite.configuration().getIgniteInstanceName(), KNNRegressionExample.class.getSimpleName(), () -> {
            try {
                // Prepare path to read
                File file = IgniteUtils.resolveIgnitePath(KNN_CLEARED_MACHINES_TXT);
                if (file == null)
                    throw new RuntimeException("Can't find file: " + KNN_CLEARED_MACHINES_TXT);
                Path path = file.toPath();
                // Read dataset from file
                LabeledDataset dataset = LabeledDatasetLoader.loadFromTxtFile(path, SEPARATOR, false, false);
                // Normalize dataset
                Normalizer.normalizeWithMiniMax(dataset);
                // Random splitting of iris data as 80% train and 20% test datasets
                LabeledDatasetTestTrainPair split = new LabeledDatasetTestTrainPair(dataset, 0.2);
                System.out.println("\n>>> Amount of observations in train dataset: " + split.train().rowSize());
                System.out.println("\n>>> Amount of observations in test dataset: " + split.test().rowSize());
                LabeledDataset test = split.test();
                LabeledDataset train = split.train();
                // Builds weighted kNN-regression with Manhattan Distance
                KNNMultipleLinearRegression knnMdl = new KNNMultipleLinearRegression(7, new ManhattanDistance(), KNNStrategy.WEIGHTED, train);
                // Clone labels
                final double[] labels = test.labels();
                // Save predicted classes to test dataset
                LabellingMachine.assignLabels(test, knnMdl);
                // Calculate mean squared error (MSE)
                double mse = 0.0;
                for (int i = 0; i < test.rowSize(); i++) mse += Math.pow(test.label(i) - labels[i], 2.0);
                mse = mse / test.rowSize();
                System.out.println("\n>>> Mean squared error (MSE) " + mse);
                // Calculate mean absolute error (MAE)
                double mae = 0.0;
                for (int i = 0; i < test.rowSize(); i++) mae += Math.abs(test.label(i) - labels[i]);
                mae = mae / test.rowSize();
                System.out.println("\n>>> Mean absolute error (MAE) " + mae);
                // Calculate R^2 as 1 - RSS/TSS
                double avg = 0.0;
                for (int i = 0; i < test.rowSize(); i++) avg += test.label(i);
                avg = avg / test.rowSize();
                double detCf = 0.0;
                double tss = 0.0;
                for (int i = 0; i < test.rowSize(); i++) {
                    detCf += Math.pow(test.label(i) - labels[i], 2.0);
                    tss += Math.pow(test.label(i) - avg, 2.0);
                }
                detCf = 1 - detCf / tss;
                System.out.println("\n>>> R^2 " + detCf);
            } catch (IOException e) {
                e.printStackTrace();
                System.out.println("\n>>> Unexpected exception, check resources: " + e);
            } finally {
                System.out.println("\n>>> kNN regression example completed.");
            }
        });
        igniteThread.start();
        igniteThread.join();
    }
}
Also used : Path(java.nio.file.Path) KNNMultipleLinearRegression(org.apache.ignite.ml.knn.regression.KNNMultipleLinearRegression) IOException(java.io.IOException) LabeledDataset(org.apache.ignite.ml.structures.LabeledDataset) LabeledDatasetTestTrainPair(org.apache.ignite.ml.structures.LabeledDatasetTestTrainPair) Ignite(org.apache.ignite.Ignite) IgniteThread(org.apache.ignite.thread.IgniteThread) File(java.io.File) ManhattanDistance(org.apache.ignite.ml.math.distances.ManhattanDistance)

Example 15 with LabeledDataset

use of org.apache.ignite.ml.structures.LabeledDataset in project ignite by apache.

the class IgniteKNNClassificationBenchmark method test.

/**
 * {@inheritDoc}
 */
@Override
public boolean test(Map<Object, Object> ctx) throws Exception {
    // Create IgniteThread, we must work with SparseDistributedMatrix inside IgniteThread
    // because we create ignite cache internally.
    IgniteThread igniteThread = new IgniteThread(ignite.configuration().getIgniteInstanceName(), this.getClass().getSimpleName(), new Runnable() {

        /**
         * {@inheritDoc}
         */
        @Override
        public void run() {
            // IMPL NOTE originally taken from KNNClassificationExample.
            // Obtain shuffled dataset.
            LabeledDataset dataset = new Datasets().shuffleIris((int) (DataChanger.next()));
            // Random splitting of iris data as 70% train and 30% test datasets.
            LabeledDatasetTestTrainPair split = new LabeledDatasetTestTrainPair(dataset, 0.3);
            LabeledDataset test = split.test();
            LabeledDataset train = split.train();
            KNNModel knnMdl = new KNNModel(5, new EuclideanDistance(), KNNStrategy.SIMPLE, train);
            // Calculate predicted classes.
            for (int i = 0; i < test.rowSize() - 1; i++) knnMdl.apply(test.getRow(i).features());
        }
    });
    igniteThread.start();
    igniteThread.join();
    return true;
}
Also used : EuclideanDistance(org.apache.ignite.ml.math.distances.EuclideanDistance) LabeledDatasetTestTrainPair(org.apache.ignite.ml.structures.LabeledDatasetTestTrainPair) KNNModel(org.apache.ignite.ml.knn.models.KNNModel) IgniteThread(org.apache.ignite.thread.IgniteThread) LabeledDataset(org.apache.ignite.ml.structures.LabeledDataset)

Aggregations

LabeledDataset (org.apache.ignite.ml.structures.LabeledDataset)25 EuclideanDistance (org.apache.ignite.ml.math.distances.EuclideanDistance)13 Vector (org.apache.ignite.ml.math.Vector)12 KNNModel (org.apache.ignite.ml.knn.models.KNNModel)10 DenseLocalOnHeapVector (org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector)10 KNNMultipleLinearRegression (org.apache.ignite.ml.knn.regression.KNNMultipleLinearRegression)6 LabeledDatasetTestTrainPair (org.apache.ignite.ml.structures.LabeledDatasetTestTrainPair)5 SparseBlockDistributedVector (org.apache.ignite.ml.math.impls.vector.SparseBlockDistributedVector)4 IgniteThread (org.apache.ignite.thread.IgniteThread)4 Path (java.nio.file.Path)3 LabeledVector (org.apache.ignite.ml.structures.LabeledVector)3 File (java.io.File)2 IOException (java.io.IOException)2 Ignite (org.apache.ignite.Ignite)2 ManhattanDistance (org.apache.ignite.ml.math.distances.ManhattanDistance)2 ArrayList (java.util.ArrayList)1 KNNModelFormat (org.apache.ignite.ml.knn.models.KNNModelFormat)1 NoDataException (org.apache.ignite.ml.math.exceptions.NoDataException)1 EmptyFileException (org.apache.ignite.ml.math.exceptions.knn.EmptyFileException)1 FileParsingException (org.apache.ignite.ml.math.exceptions.knn.FileParsingException)1