Examples with LabeledDataset - org.apache.ignite.ml.structures.LabeledDataset

Example 16 with LabeledDataset

use of org.apache.ignite.ml.structures.LabeledDataset in project ignite by apache.

the class IgniteKNNRegressionBenchmark method test.

/**
 * {@inheritDoc}
 */
@Override
public boolean test(Map<Object, Object> ctx) throws Exception {
    // Create IgniteThread, we must work with SparseDistributedMatrix inside IgniteThread
    // because we create ignite cache internally.
    IgniteThread igniteThread = new IgniteThread(ignite.configuration().getIgniteInstanceName(), this.getClass().getSimpleName(), new Runnable() {

        /**
         * {@inheritDoc}
         */
        @Override
        public void run() {
            // IMPL NOTE originally taken from KNNRegressionExample.
            // Obtain shuffled dataset.
            LabeledDataset dataset = new Datasets().shuffleClearedMachines((int) (DataChanger.next()));
            // Normalize dataset
            Normalizer.normalizeWithMiniMax(dataset);
            // Random splitting of iris data as 80% train and 20% test datasets.
            LabeledDatasetTestTrainPair split = new LabeledDatasetTestTrainPair(dataset, 0.2);
            LabeledDataset test = split.test();
            LabeledDataset train = split.train();
            // Builds weighted kNN-regression with Manhattan Distance.
            KNNModel knnMdl = new KNNMultipleLinearRegression(7, new ManhattanDistance(), KNNStrategy.WEIGHTED, train);
            // Clone labels
            final double[] labels = test.labels();
            // Calculate predicted classes.
            for (int i = 0; i < test.rowSize() - 1; i++) knnMdl.apply(test.getRow(i).features());
        }
    });
    igniteThread.start();
    igniteThread.join();
    return true;
}

Also used : KNNMultipleLinearRegression(org.apache.ignite.ml.knn.regression.KNNMultipleLinearRegression) LabeledDatasetTestTrainPair(org.apache.ignite.ml.structures.LabeledDatasetTestTrainPair) KNNModel(org.apache.ignite.ml.knn.models.KNNModel) IgniteThread(org.apache.ignite.thread.IgniteThread) LabeledDataset(org.apache.ignite.ml.structures.LabeledDataset) ManhattanDistance(org.apache.ignite.ml.math.distances.ManhattanDistance)

Example 17 with LabeledDataset

use of org.apache.ignite.ml.structures.LabeledDataset in project ignite by apache.

the class LabeledDatasetLoader method loadFromTxtFile.

/**
 * Datafile should keep class labels in the first column.
 *
 * @param pathToFile Path to file.
 * @param separator Element to tokenize row on separate tokens.
 * @param isDistributed Generates distributed dataset if true.
 * @param isFallOnBadData Fall on incorrect data if true.
 * @return Labeled Dataset parsed from file.
 */
public static LabeledDataset loadFromTxtFile(Path pathToFile, String separator, boolean isDistributed, boolean isFallOnBadData) throws IOException {
    Stream<String> stream = Files.lines(pathToFile);
    List<String> list = new ArrayList<>();
    stream.forEach(list::add);
    final int rowSize = list.size();
    List<Double> labels = new ArrayList<>();
    List<Vector> vectors = new ArrayList<>();
    if (rowSize > 0) {
        final int colSize = getColumnSize(separator, list) - 1;
        if (colSize > 0) {
            for (int i = 0; i < rowSize; i++) {
                Double clsLb;
                String[] rowData = list.get(i).split(separator);
                try {
                    clsLb = Double.parseDouble(rowData[0]);
                    Vector vec = parseFeatures(pathToFile, isDistributed, isFallOnBadData, colSize, i, rowData);
                    labels.add(clsLb);
                    vectors.add(vec);
                } catch (NumberFormatException e) {
                    if (isFallOnBadData)
                        throw new FileParsingException(rowData[0], i, pathToFile);
                }
            }
            LabeledVector[] data = new LabeledVector[vectors.size()];
            for (int i = 0; i < vectors.size(); i++) data[i] = new LabeledVector(vectors.get(i), labels.get(i));
            return new LabeledDataset(data, colSize);
        } else
            throw new NoDataException("File should contain first row with data");
    } else
        throw new EmptyFileException(pathToFile.toString());
}

Also used : FileParsingException(org.apache.ignite.ml.math.exceptions.knn.FileParsingException) ArrayList(java.util.ArrayList) LabeledVector(org.apache.ignite.ml.structures.LabeledVector) LabeledDataset(org.apache.ignite.ml.structures.LabeledDataset) EmptyFileException(org.apache.ignite.ml.math.exceptions.knn.EmptyFileException) NoDataException(org.apache.ignite.ml.math.exceptions.NoDataException) LabeledVector(org.apache.ignite.ml.structures.LabeledVector) Vector(org.apache.ignite.ml.math.Vector)

Example 18 with LabeledDataset

use of org.apache.ignite.ml.structures.LabeledDataset in project ignite by apache.

the class Deltas method fit.

/**
 * Trains model based on the specified data.
 *
 * @param datasetBuilder   Dataset builder.
 * @param featureExtractor Feature extractor.
 * @param lbExtractor      Label extractor.
 * @param cols             Number of columns.
 * @return Model.
 */
@Override
public SVMLinearBinaryClassificationModel fit(DatasetBuilder<K, V> datasetBuilder, IgniteBiFunction<K, V, double[]> featureExtractor, IgniteBiFunction<K, V, Double> lbExtractor, int cols) {
    assert datasetBuilder != null;
    PartitionDataBuilder<K, V, SVMPartitionContext, LabeledDataset<Double, LabeledVector>> partDataBuilder = new SVMPartitionDataBuilderOnHeap<>(featureExtractor, lbExtractor, cols);
    Vector weights;
    try (Dataset<SVMPartitionContext, LabeledDataset<Double, LabeledVector>> dataset = datasetBuilder.build((upstream, upstreamSize) -> new SVMPartitionContext(), partDataBuilder)) {
        final int weightVectorSizeWithIntercept = cols + 1;
        weights = initializeWeightsWithZeros(weightVectorSizeWithIntercept);
        for (int i = 0; i < this.getAmountOfIterations(); i++) {
            Vector deltaWeights = calculateUpdates(weights, dataset);
            // creates new vector
            weights = weights.plus(deltaWeights);
        }
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
    return new SVMLinearBinaryClassificationModel(weights.viewPart(1, weights.size() - 1), weights.get(0));
}

Also used : LabeledDataset(org.apache.ignite.ml.structures.LabeledDataset) LabeledVector(org.apache.ignite.ml.structures.LabeledVector) Vector(org.apache.ignite.ml.math.Vector) DenseLocalOnHeapVector(org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector)

Example 19 with LabeledDataset

use of org.apache.ignite.ml.structures.LabeledDataset in project ignite by apache.

the class KNNClassificationTest method testBinaryClassificationTest.

/**
 */
public void testBinaryClassificationTest() {
    IgniteUtils.setCurrentIgniteName(ignite.configuration().getIgniteInstanceName());
    double[][] mtx = new double[][] { { 1.0, 1.0 }, { 1.0, 2.0 }, { 2.0, 1.0 }, { -1.0, -1.0 }, { -1.0, -2.0 }, { -2.0, -1.0 } };
    double[] lbs = new double[] { 1.0, 1.0, 1.0, 2.0, 2.0, 2.0 };
    LabeledDataset training = new LabeledDataset(mtx, lbs);
    KNNModel knnMdl = new KNNModel(3, new EuclideanDistance(), KNNStrategy.SIMPLE, training);
    Vector firstVector = new DenseLocalOnHeapVector(new double[] { 2.0, 2.0 });
    assertEquals(knnMdl.apply(firstVector), 1.0);
    Vector secondVector = new DenseLocalOnHeapVector(new double[] { -2.0, -2.0 });
    assertEquals(knnMdl.apply(secondVector), 2.0);
}

Also used : EuclideanDistance(org.apache.ignite.ml.math.distances.EuclideanDistance) KNNModel(org.apache.ignite.ml.knn.models.KNNModel) DenseLocalOnHeapVector(org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector) LabeledDataset(org.apache.ignite.ml.structures.LabeledDataset) Vector(org.apache.ignite.ml.math.Vector) DenseLocalOnHeapVector(org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector)

Example 20 with LabeledDataset

use of org.apache.ignite.ml.structures.LabeledDataset in project ignite by apache.

the class KNNClassificationTest method testBinaryClassificationFarPointsWithWeightedStrategy.

/**
 */
public void testBinaryClassificationFarPointsWithWeightedStrategy() {
    IgniteUtils.setCurrentIgniteName(ignite.configuration().getIgniteInstanceName());
    double[][] mtx = new double[][] { { 10.0, 10.0 }, { 10.0, 20.0 }, { -1, -1 }, { -2, -2 }, { -1.0, -2.0 }, { -2.0, -1.0 } };
    double[] lbs = new double[] { 1.0, 1.0, 1.0, 2.0, 2.0, 2.0 };
    LabeledDataset training = new LabeledDataset(mtx, lbs);
    KNNModel knnMdl = new KNNModel(3, new EuclideanDistance(), KNNStrategy.WEIGHTED, training);
    Vector vector = new DenseLocalOnHeapVector(new double[] { -1.01, -1.01 });
    assertEquals(knnMdl.apply(vector), 1.0);
}

Aggregations

LabeledDataset (org.apache.ignite.ml.structures.LabeledDataset)25 EuclideanDistance (org.apache.ignite.ml.math.distances.EuclideanDistance)13 Vector (org.apache.ignite.ml.math.Vector)12 KNNModel (org.apache.ignite.ml.knn.models.KNNModel)10 DenseLocalOnHeapVector (org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector)10 KNNMultipleLinearRegression (org.apache.ignite.ml.knn.regression.KNNMultipleLinearRegression)6 LabeledDatasetTestTrainPair (org.apache.ignite.ml.structures.LabeledDatasetTestTrainPair)5 SparseBlockDistributedVector (org.apache.ignite.ml.math.impls.vector.SparseBlockDistributedVector)4 IgniteThread (org.apache.ignite.thread.IgniteThread)4 Path (java.nio.file.Path)3 LabeledVector (org.apache.ignite.ml.structures.LabeledVector)3 File (java.io.File)2 IOException (java.io.IOException)2 Ignite (org.apache.ignite.Ignite)2 ManhattanDistance (org.apache.ignite.ml.math.distances.ManhattanDistance)2 ArrayList (java.util.ArrayList)1 KNNModelFormat (org.apache.ignite.ml.knn.models.KNNModelFormat)1 NoDataException (org.apache.ignite.ml.math.exceptions.NoDataException)1 EmptyFileException (org.apache.ignite.ml.math.exceptions.knn.EmptyFileException)1 FileParsingException (org.apache.ignite.ml.math.exceptions.knn.FileParsingException)1