Search in sources :

Example 6 with LabeledVectorSet

use of org.apache.ignite.ml.structures.LabeledVectorSet in project ignite by apache.

the class CollectionsTest method test.

/**
 */
@Test
@SuppressWarnings("unchecked")
public void test() {
    test(new VectorizedViewMatrix(new DenseMatrix(2, 2), 1, 1, 1, 1), new VectorizedViewMatrix(new DenseMatrix(3, 2), 2, 1, 1, 1));
    specialTest(new ManhattanDistance(), new ManhattanDistance());
    specialTest(new HammingDistance(), new HammingDistance());
    specialTest(new EuclideanDistance(), new EuclideanDistance());
    FeatureMetadata data = new FeatureMetadata("name2");
    data.setName("name1");
    test(data, new FeatureMetadata("name2"));
    test(new DatasetRow<>(new DenseVector()), new DatasetRow<>(new DenseVector(1)));
    test(new LabeledVector<>(new DenseVector(), null), new LabeledVector<>(new DenseVector(1), null));
    test(new Dataset<DatasetRow<Vector>>(new DatasetRow[] {}, new FeatureMetadata[] {}), new Dataset<DatasetRow<Vector>>(new DatasetRow[] { new DatasetRow() }, new FeatureMetadata[] { new FeatureMetadata() }));
    test(new LogisticRegressionModel(new DenseVector(), 1.0), new LogisticRegressionModel(new DenseVector(), 0.5));
    test(new KMeansModelFormat(new Vector[] {}, new ManhattanDistance()), new KMeansModelFormat(new Vector[] {}, new HammingDistance()));
    test(new KMeansModel(new Vector[] {}, new ManhattanDistance()), new KMeansModel(new Vector[] {}, new HammingDistance()));
    test(new SVMLinearClassificationModel(null, 1.0), new SVMLinearClassificationModel(null, 0.5));
    test(new ANNClassificationModel(new LabeledVectorSet<>(), new ANNClassificationTrainer.CentroidStat()), new ANNClassificationModel(new LabeledVectorSet<>(1, 1), new ANNClassificationTrainer.CentroidStat()));
    test(new ANNModelFormat(1, new ManhattanDistance(), false, new LabeledVectorSet<>(), new ANNClassificationTrainer.CentroidStat()), new ANNModelFormat(2, new ManhattanDistance(), false, new LabeledVectorSet<>(), new ANNClassificationTrainer.CentroidStat()));
}
Also used : FeatureMetadata(org.apache.ignite.ml.structures.FeatureMetadata) HammingDistance(org.apache.ignite.ml.math.distances.HammingDistance) KMeansModel(org.apache.ignite.ml.clustering.kmeans.KMeansModel) LogisticRegressionModel(org.apache.ignite.ml.regressions.logistic.LogisticRegressionModel) ANNModelFormat(org.apache.ignite.ml.knn.ann.ANNModelFormat) LabeledVectorSet(org.apache.ignite.ml.structures.LabeledVectorSet) KMeansModelFormat(org.apache.ignite.ml.clustering.kmeans.KMeansModelFormat) DenseMatrix(org.apache.ignite.ml.math.primitives.matrix.impl.DenseMatrix) EuclideanDistance(org.apache.ignite.ml.math.distances.EuclideanDistance) DatasetRow(org.apache.ignite.ml.structures.DatasetRow) VectorizedViewMatrix(org.apache.ignite.ml.math.primitives.vector.impl.VectorizedViewMatrix) ANNClassificationModel(org.apache.ignite.ml.knn.ann.ANNClassificationModel) SVMLinearClassificationModel(org.apache.ignite.ml.svm.SVMLinearClassificationModel) Vector(org.apache.ignite.ml.math.primitives.vector.Vector) LabeledVector(org.apache.ignite.ml.structures.LabeledVector) DenseVector(org.apache.ignite.ml.math.primitives.vector.impl.DenseVector) ManhattanDistance(org.apache.ignite.ml.math.distances.ManhattanDistance) DenseVector(org.apache.ignite.ml.math.primitives.vector.impl.DenseVector) Test(org.junit.Test)

Example 7 with LabeledVectorSet

use of org.apache.ignite.ml.structures.LabeledVectorSet in project ignite by apache.

the class Deltas method updateModel.

/**
 * {@inheritDoc}
 */
@Override
protected <K, V> SVMLinearClassificationModel updateModel(SVMLinearClassificationModel mdl, DatasetBuilder<K, V> datasetBuilder, Preprocessor<K, V> preprocessor) {
    assert datasetBuilder != null;
    IgniteFunction<Double, Double> lbTransformer = lb -> {
        if (lb == 0.0)
            return -1.0;
        else
            return lb;
    };
    IgniteFunction<LabeledVector<Double>, LabeledVector<Double>> func = lv -> new LabeledVector<>(lv.features(), lbTransformer.apply(lv.label()));
    PatchedPreprocessor<K, V, Double, Double> patchedPreprocessor = new PatchedPreprocessor<>(func, preprocessor);
    PartitionDataBuilder<K, V, EmptyContext, LabeledVectorSet<LabeledVector>> partDataBuilder = new LabeledDatasetPartitionDataBuilderOnHeap<>(patchedPreprocessor);
    Vector weights;
    try (Dataset<EmptyContext, LabeledVectorSet<LabeledVector>> dataset = datasetBuilder.build(envBuilder, (env, upstream, upstreamSize) -> new EmptyContext(), partDataBuilder, learningEnvironment())) {
        if (mdl == null) {
            final int cols = dataset.compute(org.apache.ignite.ml.structures.Dataset::colSize, (a, b) -> {
                if (a == null)
                    return b == null ? 0 : b;
                if (b == null)
                    return a;
                return b;
            });
            final int weightVectorSizeWithIntercept = cols + 1;
            weights = initializeWeightsWithZeros(weightVectorSizeWithIntercept);
        } else
            weights = getStateVector(mdl);
        for (int i = 0; i < this.getAmountOfIterations(); i++) {
            Vector deltaWeights = calculateUpdates(weights, dataset);
            if (deltaWeights == null)
                return getLastTrainedModelOrThrowEmptyDatasetException(mdl);
            // creates new vector
            weights = weights.plus(deltaWeights);
        }
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
    return new SVMLinearClassificationModel(weights.copyOfRange(1, weights.size()), weights.get(0));
}
Also used : IgniteFunction(org.apache.ignite.ml.math.functions.IgniteFunction) Vector(org.apache.ignite.ml.math.primitives.vector.Vector) Preprocessor(org.apache.ignite.ml.preprocessing.Preprocessor) Random(java.util.Random) DatasetBuilder(org.apache.ignite.ml.dataset.DatasetBuilder) SparseVector(org.apache.ignite.ml.math.primitives.vector.impl.SparseVector) LabeledVector(org.apache.ignite.ml.structures.LabeledVector) Dataset(org.apache.ignite.ml.dataset.Dataset) SingleLabelDatasetTrainer(org.apache.ignite.ml.trainers.SingleLabelDatasetTrainer) LabeledVectorSet(org.apache.ignite.ml.structures.LabeledVectorSet) PatchedPreprocessor(org.apache.ignite.ml.preprocessing.developer.PatchedPreprocessor) PartitionDataBuilder(org.apache.ignite.ml.dataset.PartitionDataBuilder) NotNull(org.jetbrains.annotations.NotNull) DenseVector(org.apache.ignite.ml.math.primitives.vector.impl.DenseVector) UpstreamEntry(org.apache.ignite.ml.dataset.UpstreamEntry) EmptyContext(org.apache.ignite.ml.dataset.primitive.context.EmptyContext) LabeledDatasetPartitionDataBuilderOnHeap(org.apache.ignite.ml.structures.partition.LabeledDatasetPartitionDataBuilderOnHeap) EmptyContext(org.apache.ignite.ml.dataset.primitive.context.EmptyContext) LabeledDatasetPartitionDataBuilderOnHeap(org.apache.ignite.ml.structures.partition.LabeledDatasetPartitionDataBuilderOnHeap) Dataset(org.apache.ignite.ml.dataset.Dataset) LabeledVector(org.apache.ignite.ml.structures.LabeledVector) LabeledVectorSet(org.apache.ignite.ml.structures.LabeledVectorSet) PatchedPreprocessor(org.apache.ignite.ml.preprocessing.developer.PatchedPreprocessor) Vector(org.apache.ignite.ml.math.primitives.vector.Vector) SparseVector(org.apache.ignite.ml.math.primitives.vector.impl.SparseVector) LabeledVector(org.apache.ignite.ml.structures.LabeledVector) DenseVector(org.apache.ignite.ml.math.primitives.vector.impl.DenseVector)

Example 8 with LabeledVectorSet

use of org.apache.ignite.ml.structures.LabeledVectorSet in project ignite by apache.

the class LabeledDatasetLoader method loadFromTxtFile.

/**
 * Datafile should keep class labels in the first column.
 *
 * @param pathToFile Path to file.
 * @param separator Element to tokenize row on separate tokens.
 * @param isFallOnBadData Fall on incorrect data if true.
 * @return Labeled Dataset parsed from file.
 */
public static LabeledVectorSet loadFromTxtFile(Path pathToFile, String separator, boolean isFallOnBadData) throws IOException {
    Stream<String> stream = Files.lines(pathToFile);
    List<String> list = new ArrayList<>();
    stream.forEach(list::add);
    final int rowSize = list.size();
    List<Double> labels = new ArrayList<>();
    List<Vector> vectors = new ArrayList<>();
    if (rowSize > 0) {
        final int colSize = getColumnSize(separator, list) - 1;
        if (colSize > 0) {
            for (int i = 0; i < rowSize; i++) {
                Double clsLb;
                String[] rowData = list.get(i).split(separator);
                try {
                    clsLb = Double.parseDouble(rowData[0]);
                    Vector vec = parseFeatures(pathToFile, isFallOnBadData, colSize, i, rowData);
                    labels.add(clsLb);
                    vectors.add(vec);
                } catch (NumberFormatException e) {
                    if (isFallOnBadData)
                        throw new FileParsingException(rowData[0], i, pathToFile);
                }
            }
            LabeledVector[] data = new LabeledVector[vectors.size()];
            for (int i = 0; i < vectors.size(); i++) data[i] = new LabeledVector(vectors.get(i), labels.get(i));
            return new LabeledVectorSet(data, colSize);
        } else
            throw new NoDataException("File should contain first row with data");
    } else
        throw new EmptyFileException(pathToFile.toString());
}
Also used : FileParsingException(org.apache.ignite.ml.math.exceptions.datastructures.FileParsingException) ArrayList(java.util.ArrayList) LabeledVector(org.apache.ignite.ml.structures.LabeledVector) LabeledVectorSet(org.apache.ignite.ml.structures.LabeledVectorSet) EmptyFileException(org.apache.ignite.ml.math.exceptions.datastructures.EmptyFileException) NoDataException(org.apache.ignite.ml.math.exceptions.math.NoDataException) Vector(org.apache.ignite.ml.math.primitives.vector.Vector) LabeledVector(org.apache.ignite.ml.structures.LabeledVector)

Example 9 with LabeledVectorSet

use of org.apache.ignite.ml.structures.LabeledVectorSet in project ignite by apache.

the class LocalModelsTest method importExportANNModelTest.

/**
 */
@Test
public void importExportANNModelTest() throws IOException {
    executeModelTest(mdlFilePath -> {
        final LabeledVectorSet<LabeledVector> centers = new LabeledVectorSet<>();
        NNClassificationModel mdl = new ANNClassificationModel(centers, new ANNClassificationTrainer.CentroidStat()).withK(4).withDistanceMeasure(new ManhattanDistance()).withWeighted(true);
        Exporter<KNNModelFormat, String> exporter = new FileExporter<>();
        mdl.saveModel(exporter, mdlFilePath);
        ANNModelFormat load = (ANNModelFormat) exporter.load(mdlFilePath);
        Assert.assertNotNull(load);
        NNClassificationModel importedMdl = new ANNClassificationModel(load.getCandidates(), new ANNClassificationTrainer.CentroidStat()).withK(load.getK()).withDistanceMeasure(load.getDistanceMeasure()).withWeighted(true);
        Assert.assertEquals("", mdl, importedMdl);
        return null;
    });
}
Also used : ANNClassificationModel(org.apache.ignite.ml.knn.ann.ANNClassificationModel) NNClassificationModel(org.apache.ignite.ml.knn.NNClassificationModel) FileExporter(org.apache.ignite.ml.FileExporter) KNNModelFormat(org.apache.ignite.ml.knn.ann.KNNModelFormat) LabeledVector(org.apache.ignite.ml.structures.LabeledVector) ANNClassificationModel(org.apache.ignite.ml.knn.ann.ANNClassificationModel) ANNModelFormat(org.apache.ignite.ml.knn.ann.ANNModelFormat) LabeledVectorSet(org.apache.ignite.ml.structures.LabeledVectorSet) ManhattanDistance(org.apache.ignite.ml.math.distances.ManhattanDistance) Test(org.junit.Test)

Aggregations

LabeledVector (org.apache.ignite.ml.structures.LabeledVector)9 LabeledVectorSet (org.apache.ignite.ml.structures.LabeledVectorSet)9 Vector (org.apache.ignite.ml.math.primitives.vector.Vector)6 Dataset (org.apache.ignite.ml.dataset.Dataset)4 EmptyContext (org.apache.ignite.ml.dataset.primitive.context.EmptyContext)4 LabeledDatasetPartitionDataBuilderOnHeap (org.apache.ignite.ml.structures.partition.LabeledDatasetPartitionDataBuilderOnHeap)4 DatasetBuilder (org.apache.ignite.ml.dataset.DatasetBuilder)3 PartitionDataBuilder (org.apache.ignite.ml.dataset.PartitionDataBuilder)3 DenseVector (org.apache.ignite.ml.math.primitives.vector.impl.DenseVector)3 Preprocessor (org.apache.ignite.ml.preprocessing.Preprocessor)3 NotNull (org.jetbrains.annotations.NotNull)3 Serializable (java.io.Serializable)2 TreeMap (java.util.TreeMap)2 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)2 KMeansModel (org.apache.ignite.ml.clustering.kmeans.KMeansModel)2 LearningEnvironmentBuilder (org.apache.ignite.ml.environment.LearningEnvironmentBuilder)2 ANNClassificationModel (org.apache.ignite.ml.knn.ann.ANNClassificationModel)2 ANNModelFormat (org.apache.ignite.ml.knn.ann.ANNModelFormat)2 EuclideanDistance (org.apache.ignite.ml.math.distances.EuclideanDistance)2 ManhattanDistance (org.apache.ignite.ml.math.distances.ManhattanDistance)2