Search in sources :

Example 6 with LabeledVector

use of org.apache.ignite.ml.structures.LabeledVector in project ignite by apache.

the class MnistDistributed method loadIntoCache.

/**
 * Load MNIST into cache.
 *
 * @param trainingMnistLst List with mnist data.
 * @param labeledVectorsCache Cache to load MNIST into.
 */
private void loadIntoCache(List<DenseLocalOnHeapVector> trainingMnistLst, IgniteCache<Integer, LabeledVector<Vector, Vector>> labeledVectorsCache) {
    String cacheName = labeledVectorsCache.getName();
    try (IgniteDataStreamer<Integer, LabeledVector<Vector, Vector>> streamer = ignite.dataStreamer(cacheName)) {
        int sampleIdx = 0;
        streamer.perNodeBufferSize(10000);
        for (DenseLocalOnHeapVector vector : trainingMnistLst) {
            streamer.addData(sampleIdx, asLabeledVector(vector, FEATURES_CNT));
            if (sampleIdx % 5000 == 0)
                X.println("Loaded " + sampleIdx + " samples.");
            sampleIdx++;
        }
    }
}
Also used : LabeledVector(org.apache.ignite.ml.structures.LabeledVector) DenseLocalOnHeapVector(org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector)

Example 7 with LabeledVector

use of org.apache.ignite.ml.structures.LabeledVector in project ignite by apache.

the class LabeledDatasetTest method testAccessMethods.

/**
 */
public void testAccessMethods() {
    IgniteUtils.setCurrentIgniteName(ignite.configuration().getIgniteInstanceName());
    double[][] mtx = new double[][] { { 1.0, 1.0 }, { 1.0, 2.0 }, { 2.0, 1.0 }, { -1.0, -1.0 }, { -1.0, -2.0 }, { -2.0, -1.0 } };
    double[] lbs = new double[] { 1.0, 1.0, 1.0, 2.0, 2.0, 2.0 };
    final LabeledDataset dataset = new LabeledDataset(mtx, lbs, null, false);
    assertEquals(dataset.colSize(), 2);
    assertEquals(dataset.rowSize(), 6);
    final LabeledVector<Vector, Double> row = (LabeledVector<Vector, Double>) dataset.getRow(0);
    assertEquals(row.features().get(0), 1.0);
    assertEquals(row.label(), 1.0);
    dataset.setLabel(0, 2.0);
    assertEquals(row.label(), 2.0);
}
Also used : LabeledVector(org.apache.ignite.ml.structures.LabeledVector) LabeledDataset(org.apache.ignite.ml.structures.LabeledDataset) LabeledVector(org.apache.ignite.ml.structures.LabeledVector) Vector(org.apache.ignite.ml.math.Vector)

Example 8 with LabeledVector

use of org.apache.ignite.ml.structures.LabeledVector in project ignite by apache.

the class MLPGroupTrainerExample method main.

/**
 * Executes example.
 *
 * @param args Command line arguments, none required.
 */
public static void main(String[] args) throws InterruptedException {
    // IMPL NOTE based on MLPGroupTrainerTest#testXOR
    System.out.println(">>> Distributed multilayer perceptron example started.");
    // Start ignite grid.
    try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) {
        System.out.println(">>> Ignite grid started.");
        // Create IgniteThread, we must work with SparseDistributedMatrix inside IgniteThread
        // because we create ignite cache internally.
        IgniteThread igniteThread = new IgniteThread(ignite.configuration().getIgniteInstanceName(), MLPGroupTrainerExample.class.getSimpleName(), () -> {
            int samplesCnt = 10000;
            Matrix xorInputs = new DenseLocalOnHeapMatrix(new double[][] { { 0.0, 0.0 }, { 0.0, 1.0 }, { 1.0, 0.0 }, { 1.0, 1.0 } }, StorageConstants.ROW_STORAGE_MODE).transpose();
            Matrix xorOutputs = new DenseLocalOnHeapMatrix(new double[][] { { 0.0 }, { 1.0 }, { 1.0 }, { 0.0 } }, StorageConstants.ROW_STORAGE_MODE).transpose();
            MLPArchitecture conf = new MLPArchitecture(2).withAddedLayer(10, true, Activators.RELU).withAddedLayer(1, false, Activators.SIGMOID);
            IgniteCache<Integer, LabeledVector<Vector, Vector>> cache = LabeledVectorsCache.createNew(ignite);
            String cacheName = cache.getName();
            Random rnd = new Random(12345L);
            try (IgniteDataStreamer<Integer, LabeledVector<Vector, Vector>> streamer = ignite.dataStreamer(cacheName)) {
                streamer.perNodeBufferSize(100);
                for (int i = 0; i < samplesCnt; i++) {
                    int col = Math.abs(rnd.nextInt()) % 4;
                    streamer.addData(i, new LabeledVector<>(xorInputs.getCol(col), xorOutputs.getCol(col)));
                }
            }
            int totalCnt = 100;
            int failCnt = 0;
            MLPGroupUpdateTrainer<RPropParameterUpdate> trainer = MLPGroupUpdateTrainer.getDefault(ignite).withSyncPeriod(3).withTolerance(0.001).withMaxGlobalSteps(20);
            for (int i = 0; i < totalCnt; i++) {
                MLPGroupUpdateTrainerCacheInput trainerInput = new MLPGroupUpdateTrainerCacheInput(conf, new RandomInitializer(rnd), 6, cache, 10);
                MultilayerPerceptron mlp = trainer.train(trainerInput);
                Matrix predict = mlp.apply(xorInputs);
                System.out.println(">>> Prediction data at step " + i + " of total " + totalCnt + ":");
                Tracer.showAscii(predict);
                System.out.println("Difference estimate: " + xorOutputs.getRow(0).minus(predict.getRow(0)).kNorm(2));
                failCnt += closeEnough(xorOutputs.getRow(0), predict.getRow(0)) ? 0 : 1;
            }
            double failRatio = (double) failCnt / totalCnt;
            System.out.println("\n>>> Fail percentage: " + (failRatio * 100) + "%.");
            System.out.println("\n>>> Distributed multilayer perceptron example completed.");
        });
        igniteThread.start();
        igniteThread.join();
    }
}
Also used : MLPArchitecture(org.apache.ignite.ml.nn.architecture.MLPArchitecture) LabeledVector(org.apache.ignite.ml.structures.LabeledVector) DenseLocalOnHeapMatrix(org.apache.ignite.ml.math.impls.matrix.DenseLocalOnHeapMatrix) MultilayerPerceptron(org.apache.ignite.ml.nn.MultilayerPerceptron) Matrix(org.apache.ignite.ml.math.Matrix) DenseLocalOnHeapMatrix(org.apache.ignite.ml.math.impls.matrix.DenseLocalOnHeapMatrix) Random(java.util.Random) RPropParameterUpdate(org.apache.ignite.ml.optimization.updatecalculators.RPropParameterUpdate) Ignite(org.apache.ignite.Ignite) IgniteThread(org.apache.ignite.thread.IgniteThread) RandomInitializer(org.apache.ignite.ml.nn.initializers.RandomInitializer) MLPGroupUpdateTrainerCacheInput(org.apache.ignite.ml.nn.MLPGroupUpdateTrainerCacheInput)

Example 9 with LabeledVector

use of org.apache.ignite.ml.structures.LabeledVector in project ignite by apache.

the class LabeledDatasetLoader method loadFromTxtFile.

/**
 * Datafile should keep class labels in the first column.
 *
 * @param pathToFile Path to file.
 * @param separator Element to tokenize row on separate tokens.
 * @param isDistributed Generates distributed dataset if true.
 * @param isFallOnBadData Fall on incorrect data if true.
 * @return Labeled Dataset parsed from file.
 */
public static LabeledDataset loadFromTxtFile(Path pathToFile, String separator, boolean isDistributed, boolean isFallOnBadData) throws IOException {
    Stream<String> stream = Files.lines(pathToFile);
    List<String> list = new ArrayList<>();
    stream.forEach(list::add);
    final int rowSize = list.size();
    List<Double> labels = new ArrayList<>();
    List<Vector> vectors = new ArrayList<>();
    if (rowSize > 0) {
        final int colSize = getColumnSize(separator, list) - 1;
        if (colSize > 0) {
            for (int i = 0; i < rowSize; i++) {
                Double clsLb;
                String[] rowData = list.get(i).split(separator);
                try {
                    clsLb = Double.parseDouble(rowData[0]);
                    Vector vec = parseFeatures(pathToFile, isDistributed, isFallOnBadData, colSize, i, rowData);
                    labels.add(clsLb);
                    vectors.add(vec);
                } catch (NumberFormatException e) {
                    if (isFallOnBadData)
                        throw new FileParsingException(rowData[0], i, pathToFile);
                }
            }
            LabeledVector[] data = new LabeledVector[vectors.size()];
            for (int i = 0; i < vectors.size(); i++) data[i] = new LabeledVector(vectors.get(i), labels.get(i));
            return new LabeledDataset(data, colSize);
        } else
            throw new NoDataException("File should contain first row with data");
    } else
        throw new EmptyFileException(pathToFile.toString());
}
Also used : FileParsingException(org.apache.ignite.ml.math.exceptions.knn.FileParsingException) ArrayList(java.util.ArrayList) LabeledVector(org.apache.ignite.ml.structures.LabeledVector) LabeledDataset(org.apache.ignite.ml.structures.LabeledDataset) EmptyFileException(org.apache.ignite.ml.math.exceptions.knn.EmptyFileException) NoDataException(org.apache.ignite.ml.math.exceptions.NoDataException) LabeledVector(org.apache.ignite.ml.structures.LabeledVector) Vector(org.apache.ignite.ml.math.Vector)

Example 10 with LabeledVector

use of org.apache.ignite.ml.structures.LabeledVector in project ignite by apache.

the class Deltas method fit.

/**
 * Trains model based on the specified data.
 *
 * @param datasetBuilder   Dataset builder.
 * @param featureExtractor Feature extractor.
 * @param lbExtractor      Label extractor.
 * @param cols             Number of columns.
 * @return Model.
 */
@Override
public SVMLinearBinaryClassificationModel fit(DatasetBuilder<K, V> datasetBuilder, IgniteBiFunction<K, V, double[]> featureExtractor, IgniteBiFunction<K, V, Double> lbExtractor, int cols) {
    assert datasetBuilder != null;
    PartitionDataBuilder<K, V, SVMPartitionContext, LabeledDataset<Double, LabeledVector>> partDataBuilder = new SVMPartitionDataBuilderOnHeap<>(featureExtractor, lbExtractor, cols);
    Vector weights;
    try (Dataset<SVMPartitionContext, LabeledDataset<Double, LabeledVector>> dataset = datasetBuilder.build((upstream, upstreamSize) -> new SVMPartitionContext(), partDataBuilder)) {
        final int weightVectorSizeWithIntercept = cols + 1;
        weights = initializeWeightsWithZeros(weightVectorSizeWithIntercept);
        for (int i = 0; i < this.getAmountOfIterations(); i++) {
            Vector deltaWeights = calculateUpdates(weights, dataset);
            // creates new vector
            weights = weights.plus(deltaWeights);
        }
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
    return new SVMLinearBinaryClassificationModel(weights.viewPart(1, weights.size() - 1), weights.get(0));
}
Also used : LabeledDataset(org.apache.ignite.ml.structures.LabeledDataset) LabeledVector(org.apache.ignite.ml.structures.LabeledVector) Vector(org.apache.ignite.ml.math.Vector) DenseLocalOnHeapVector(org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector)

Aggregations

LabeledVector (org.apache.ignite.ml.structures.LabeledVector)14 Vector (org.apache.ignite.ml.math.Vector)9 DenseLocalOnHeapVector (org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector)7 Matrix (org.apache.ignite.ml.math.Matrix)4 Random (java.util.Random)3 DenseLocalOnHeapMatrix (org.apache.ignite.ml.math.impls.matrix.DenseLocalOnHeapMatrix)3 MLPArchitecture (org.apache.ignite.ml.nn.architecture.MLPArchitecture)3 LabeledDataset (org.apache.ignite.ml.structures.LabeledDataset)3 ArrayList (java.util.ArrayList)2 Ignite (org.apache.ignite.Ignite)2 MLPGroupUpdateTrainerCacheInput (org.apache.ignite.ml.nn.MLPGroupUpdateTrainerCacheInput)2 MultilayerPerceptron (org.apache.ignite.ml.nn.MultilayerPerceptron)2 RandomInitializer (org.apache.ignite.ml.nn.initializers.RandomInitializer)2 RPropParameterUpdate (org.apache.ignite.ml.optimization.updatecalculators.RPropParameterUpdate)2 HashMap (java.util.HashMap)1 HashSet (java.util.HashSet)1 List (java.util.List)1 Set (java.util.Set)1 TreeMap (java.util.TreeMap)1 Stream (java.util.stream.Stream)1