use of org.apache.ignite.ml.math.Vector in project ignite by apache.
the class GenericLinearRegressionTrainerTest method testTrainOnDiabetesDataset.
/**
* Test trainer on diabetes dataset.
*/
@Test
public void testTrainOnDiabetesDataset() {
Matrix data = loadDataset("datasets/regression/diabetes.csv", 442, 10);
LinearRegressionModel mdl = trainer.train(data);
Vector expWeights = vectorCreator.apply(new double[] { -10.01219782, -239.81908937, 519.83978679, 324.39042769, -792.18416163, 476.74583782, 101.04457032, 177.06417623, 751.27932109, 67.62538639 });
double expIntercept = 152.13348416;
TestUtils.assertEquals("Wrong weights", expWeights, mdl.getWeights(), precision);
TestUtils.assertEquals("Wrong intercept", expIntercept, mdl.getIntercept(), precision);
}
use of org.apache.ignite.ml.math.Vector in project ignite by apache.
the class SVMModelTest method testPredictOnAnObservationWithWrongCardinality.
/**
*/
@Test(expected = CardinalityException.class)
public void testPredictOnAnObservationWithWrongCardinality() {
Vector weights = new DenseLocalOnHeapVector(new double[] { 2.0, 3.0 });
SVMLinearBinaryClassificationModel mdl = new SVMLinearBinaryClassificationModel(weights, 1.0);
Vector observation = new DenseLocalOnHeapVector(new double[] { 1.0 });
mdl.apply(observation);
}
use of org.apache.ignite.ml.math.Vector in project ignite by apache.
the class ColumnDecisionTreeTrainerBenchmark method tstMNISTSparseDistributedMatrix.
/**
* Run decision tree classifier on MNIST using sparse distributed matrix as a storage for dataset.
* To run this test rename this method so it starts from 'test'.
*
* @throws IOException In case of loading MNIST dataset errors.
*/
public void tstMNISTSparseDistributedMatrix() throws IOException {
IgniteUtils.setCurrentIgniteName(ignite.configuration().getIgniteInstanceName());
int ptsCnt = 30_000;
int featCnt = 28 * 28;
Properties props = loadMNISTProperties();
Stream<DenseLocalOnHeapVector> trainingMnistStream = MnistUtils.mnist(props.getProperty(PROP_TRAINING_IMAGES), props.getProperty(PROP_TRAINING_LABELS), new Random(123L), ptsCnt);
Stream<DenseLocalOnHeapVector> testMnistStream = MnistUtils.mnist(props.getProperty(PROP_TEST_IMAGES), props.getProperty(PROP_TEST_LABELS), new Random(123L), 10_000);
SparseDistributedMatrix m = new SparseDistributedMatrix(ptsCnt, featCnt + 1, StorageConstants.COLUMN_STORAGE_MODE, StorageConstants.RANDOM_ACCESS_MODE);
SparseDistributedMatrixStorage sto = (SparseDistributedMatrixStorage) m.getStorage();
loadVectorsIntoSparseDistributedMatrixCache(sto.cache().getName(), sto.getUUID(), trainingMnistStream.iterator(), featCnt + 1);
ColumnDecisionTreeTrainer<GiniSplitCalculator.GiniData> trainer = new ColumnDecisionTreeTrainer<>(10, ContinuousSplitCalculators.GINI.apply(ignite), RegionCalculators.GINI, RegionCalculators.MOST_COMMON, ignite);
X.println("Training started");
long before = System.currentTimeMillis();
DecisionTreeModel mdl = trainer.train(new MatrixColumnDecisionTreeTrainerInput(m, new HashMap<>()));
X.println("Training finished in " + (System.currentTimeMillis() - before));
IgniteTriFunction<Model<Vector, Double>, Stream<IgniteBiTuple<Vector, Double>>, Function<Double, Double>, Double> mse = Estimators.errorsPercentage();
Double accuracy = mse.apply(mdl, testMnistStream.map(v -> new IgniteBiTuple<>(v.viewPart(0, featCnt), v.getX(featCnt))), Function.identity());
X.println("Errors percentage: " + accuracy);
Assert.assertEquals(0, SplitCache.getOrCreate(ignite).size());
Assert.assertEquals(0, FeaturesCache.getOrCreate(ignite).size());
Assert.assertEquals(0, ContextCache.getOrCreate(ignite).size());
Assert.assertEquals(0, ProjectionsCache.getOrCreate(ignite).size());
}
use of org.apache.ignite.ml.math.Vector in project ignite by apache.
the class ColumnDecisionTreeTrainerBenchmark method tstMNISTBiIndexedCache.
/**
* Run decision tree classifier on MNIST using bi-indexed cache as a storage for dataset.
* To run this test rename this method so it starts from 'test'.
*
* @throws IOException In case of loading MNIST dataset errors.
*/
public void tstMNISTBiIndexedCache() throws IOException {
IgniteUtils.setCurrentIgniteName(ignite.configuration().getIgniteInstanceName());
int ptsCnt = 40_000;
int featCnt = 28 * 28;
Properties props = loadMNISTProperties();
Stream<DenseLocalOnHeapVector> trainingMnistStream = MnistUtils.mnist(props.getProperty(PROP_TRAINING_IMAGES), props.getProperty(PROP_TRAINING_LABELS), new Random(123L), ptsCnt);
Stream<DenseLocalOnHeapVector> testMnistStream = MnistUtils.mnist(props.getProperty(PROP_TEST_IMAGES), props.getProperty(PROP_TEST_LABELS), new Random(123L), 10_000);
IgniteCache<BiIndex, Double> cache = createBiIndexedCache();
loadVectorsIntoBiIndexedCache(cache.getName(), trainingMnistStream.iterator(), featCnt + 1);
ColumnDecisionTreeTrainer<GiniSplitCalculator.GiniData> trainer = new ColumnDecisionTreeTrainer<>(10, ContinuousSplitCalculators.GINI.apply(ignite), RegionCalculators.GINI, RegionCalculators.MOST_COMMON, ignite);
X.println("Training started.");
long before = System.currentTimeMillis();
DecisionTreeModel mdl = trainer.train(new BiIndexedCacheColumnDecisionTreeTrainerInput(cache, new HashMap<>(), ptsCnt, featCnt));
X.println("Training finished in " + (System.currentTimeMillis() - before));
IgniteTriFunction<Model<Vector, Double>, Stream<IgniteBiTuple<Vector, Double>>, Function<Double, Double>, Double> mse = Estimators.errorsPercentage();
Double accuracy = mse.apply(mdl, testMnistStream.map(v -> new IgniteBiTuple<>(v.viewPart(0, featCnt), v.getX(featCnt))), Function.identity());
X.println("Errors percentage: " + accuracy);
Assert.assertEquals(0, SplitCache.getOrCreate(ignite).size());
Assert.assertEquals(0, FeaturesCache.getOrCreate(ignite).size());
Assert.assertEquals(0, ContextCache.getOrCreate(ignite).size());
Assert.assertEquals(0, ProjectionsCache.getOrCreate(ignite).size());
}
use of org.apache.ignite.ml.math.Vector in project ignite by apache.
the class FuzzyCMeansLocalClustererTest method checkCentersOfTheSamePointsTwoDimensions.
/**
* Test FCM on points which have the equal coordinates.
*/
@Test
public void checkCentersOfTheSamePointsTwoDimensions() {
FuzzyCMeansLocalClusterer clusterer = new FuzzyCMeansLocalClusterer(new EuclideanDistance(), 2, BaseFuzzyCMeansClusterer.StopCondition.STABLE_MEMBERSHIPS, 0.01, 10, null);
double[][] points = new double[][] { { 3.3, 10 }, { 3.3, 10 }, { 3.3, 10 }, { 3.3, 10 }, { 3.3, 10 } };
DenseLocalOnHeapMatrix pntMatrix = new DenseLocalOnHeapMatrix(points);
int k = 2;
FuzzyCMeansModel mdl = clusterer.cluster(pntMatrix, k);
Vector exp = new DenseLocalOnHeapVector(new double[] { 3.3, 10 });
for (int i = 0; i < k; i++) {
Vector center = mdl.centers()[i];
for (int j = 0; j < 2; j++) assertEquals(exp.getX(j), center.getX(j), 1);
}
}
Aggregations