use of org.apache.ignite.ml.math.Vector in project ignite by apache.
the class KMeansDistributedClustererTest method testClusterizationOnDatasetWithObviousStructure.
/** */
@Test
public void testClusterizationOnDatasetWithObviousStructure() throws IOException {
IgniteUtils.setCurrentIgniteName(ignite.configuration().getIgniteInstanceName());
int ptsCnt = 10000;
int squareSideLen = 10000;
Random rnd = new Random(123456L);
// Let centers be in the vertices of square.
Map<Integer, Vector> centers = new HashMap<>();
centers.put(100, new DenseLocalOnHeapVector(new double[] { 0.0, 0.0 }));
centers.put(900, new DenseLocalOnHeapVector(new double[] { squareSideLen, 0.0 }));
centers.put(3000, new DenseLocalOnHeapVector(new double[] { 0.0, squareSideLen }));
centers.put(6000, new DenseLocalOnHeapVector(new double[] { squareSideLen, squareSideLen }));
int centersCnt = centers.size();
SparseDistributedMatrix points = new SparseDistributedMatrix(ptsCnt, 2, StorageConstants.ROW_STORAGE_MODE, StorageConstants.RANDOM_ACCESS_MODE);
List<Integer> permutation = IntStream.range(0, ptsCnt).boxed().collect(Collectors.toList());
Collections.shuffle(permutation, rnd);
Vector[] mc = new Vector[centersCnt];
Arrays.fill(mc, VectorUtils.zeroes(2));
int centIndex = 0;
int totalCount = 0;
List<Vector> massCenters = new ArrayList<>();
for (Integer count : centers.keySet()) {
for (int i = 0; i < count; i++) {
DenseLocalOnHeapVector pnt = (DenseLocalOnHeapVector) new DenseLocalOnHeapVector(2).assign(centers.get(count));
// pertrubate point on random value.
pnt.map(val -> val + rnd.nextDouble() * squareSideLen / 100);
mc[centIndex] = mc[centIndex].plus(pnt);
points.assignRow(permutation.get(totalCount), pnt);
totalCount++;
}
massCenters.add(mc[centIndex].times(1 / (double) count));
centIndex++;
}
EuclideanDistance dist = new EuclideanDistance();
OrderedNodesComparator comp = new OrderedNodesComparator(centers.values().toArray(new Vector[] {}), dist);
massCenters.sort(comp);
KMeansDistributedClusterer clusterer = new KMeansDistributedClusterer(dist, 3, 100, 1L);
KMeansModel mdl = clusterer.cluster(points, 4);
Vector[] resCenters = mdl.centers();
Arrays.sort(resCenters, comp);
checkIsInEpsilonNeighbourhood(resCenters, massCenters.toArray(new Vector[] {}), 30.0);
}
use of org.apache.ignite.ml.math.Vector in project ignite by apache.
the class KMeansLocalClustererExample method main.
/**
* Executes example.
*
* @param args Command line arguments, none required.
*/
public static void main(String[] args) {
// IMPL NOTE based on KMeansDistributedClustererTestSingleNode#testClusterizationOnDatasetWithObviousStructure
System.out.println(">>> K-means local clusterer example started.");
int ptsCnt = 10000;
DenseLocalOnHeapMatrix points = new DenseLocalOnHeapMatrix(ptsCnt, 2);
DatasetWithObviousStructure dataset = new DatasetWithObviousStructure(10000);
List<Vector> massCenters = dataset.generate(points);
EuclideanDistance dist = new EuclideanDistance();
OrderedNodesComparator comp = new OrderedNodesComparator(dataset.centers().values().toArray(new Vector[] {}), dist);
massCenters.sort(comp);
KMeansLocalClusterer clusterer = new KMeansLocalClusterer(dist, 100, 1L);
KMeansModel mdl = clusterer.cluster(points, 4);
Vector[] resCenters = mdl.centers();
Arrays.sort(resCenters, comp);
System.out.println("Mass centers:");
massCenters.forEach(Tracer::showAscii);
System.out.println("Cluster centers:");
Arrays.asList(resCenters).forEach(Tracer::showAscii);
System.out.println("\n>>> K-means local clusterer example completed.");
}
use of org.apache.ignite.ml.math.Vector in project ignite by apache.
the class OffHeapVectorExample method main.
/**
* Executes example.
*
* @param args Command line arguments, none required.
*/
public static void main(String[] args) {
System.out.println();
System.out.println(">>> Off-heap vector API usage example started.");
System.out.println("\n>>> Creating perpendicular off-heap vectors.");
double[] data1 = new double[] { 1, 0, 3, 0, 5, 0 };
double[] data2 = new double[] { 0, 2, 0, 4, 0, 6 };
Vector v1 = new DenseLocalOffHeapVector(data1.length);
Vector v2 = new DenseLocalOffHeapVector(data2.length);
v1.assign(data1);
v2.assign(data2);
System.out.println(">>> First vector: " + Arrays.toString(data1));
System.out.println(">>> Second vector: " + Arrays.toString(data2));
double dotProduct = v1.dot(v2);
boolean dotProductIsAsExp = dotProduct == 0;
System.out.println("\n>>> Dot product of vectors: [" + dotProduct + "], it is 0 as expected: [" + dotProductIsAsExp + "].");
assert dotProductIsAsExp : "Expect dot product of perpendicular vectors to be 0.";
Vector hypotenuse = v1.plus(v2);
System.out.println("\n>>> Hypotenuse (sum of vectors): " + Arrays.toString(hypotenuse.getStorage().data()));
double lenSquared1 = v1.getLengthSquared();
double lenSquared2 = v2.getLengthSquared();
double lenSquaredHypotenuse = hypotenuse.getLengthSquared();
boolean lenSquaredHypotenuseIsAsExp = lenSquaredHypotenuse == lenSquared1 + lenSquared2;
System.out.println(">>> Squared length of first vector: [" + lenSquared1 + "].");
System.out.println(">>> Squared length of second vector: [" + lenSquared2 + "].");
System.out.println(">>> Squared length of hypotenuse: [" + lenSquaredHypotenuse + "], equals sum of squared lengths of two original vectors as expected: [" + lenSquaredHypotenuseIsAsExp + "].");
assert lenSquaredHypotenuseIsAsExp : "Expect squared length of hypotenuse to be as per Pythagorean theorem.";
System.out.println("\n>>> Off-heap vector API usage example completed.");
}
use of org.apache.ignite.ml.math.Vector in project ignite by apache.
the class DistributedLinearRegressionWithSGDTrainerExample method main.
/**
* Run example.
*/
public static void main(String[] args) throws InterruptedException {
System.out.println();
System.out.println(">>> Linear regression model over sparse distributed matrix API usage example started.");
// Start ignite grid.
try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) {
System.out.println(">>> Ignite grid started.");
// Create IgniteThread, we must work with SparseDistributedMatrix inside IgniteThread
// because we create ignite cache internally.
IgniteThread igniteThread = new IgniteThread(ignite.configuration().getIgniteInstanceName(), SparseDistributedMatrixExample.class.getSimpleName(), () -> {
// Create SparseDistributedMatrix, new cache will be created automagically.
System.out.println(">>> Create new SparseDistributedMatrix inside IgniteThread.");
SparseDistributedMatrix distributedMatrix = new SparseDistributedMatrix(data);
System.out.println(">>> Create new linear regression trainer object.");
Trainer<LinearRegressionModel, Matrix> trainer = new LinearRegressionSGDTrainer(100_000, 1e-12);
System.out.println(">>> Perform the training to get the model.");
LinearRegressionModel model = trainer.train(distributedMatrix);
System.out.println(">>> Linear regression model: " + model);
System.out.println(">>> ---------------------------------");
System.out.println(">>> | Prediction\t| Ground Truth\t|");
System.out.println(">>> ---------------------------------");
for (double[] observation : data) {
Vector inputs = new SparseDistributedVector(Arrays.copyOfRange(observation, 1, observation.length));
double prediction = model.apply(inputs);
double groundTruth = observation[0];
System.out.printf(">>> | %.4f\t\t| %.4f\t\t|\n", prediction, groundTruth);
}
System.out.println(">>> ---------------------------------");
});
igniteThread.start();
igniteThread.join();
}
}
use of org.apache.ignite.ml.math.Vector in project ignite by apache.
the class DecisionTreesExample method main.
/**
* Launches example.
*
* @param args Program arguments.
*/
public static void main(String[] args) throws IOException {
System.out.println(">>> Decision trees example started.");
String igniteCfgPath;
CommandLineParser parser = new BasicParser();
String trainingImagesPath;
String trainingLabelsPath;
String testImagesPath;
String testLabelsPath;
Map<String, String> mnistPaths = new HashMap<>();
mnistPaths.put(MNIST_TRAIN_IMAGES, "train-images-idx3-ubyte");
mnistPaths.put(MNIST_TRAIN_LABELS, "train-labels-idx1-ubyte");
mnistPaths.put(MNIST_TEST_IMAGES, "t10k-images-idx3-ubyte");
mnistPaths.put(MNIST_TEST_LABELS, "t10k-labels-idx1-ubyte");
try {
// Parse the command line arguments.
CommandLine line = parser.parse(buildOptions(), args);
if (line.hasOption(MLExamplesCommonArgs.UNATTENDED)) {
System.out.println(">>> Skipped example execution because 'unattended' mode is used.");
System.out.println(">>> Decision trees example finished.");
return;
}
igniteCfgPath = line.getOptionValue(CONFIG, DEFAULT_CONFIG);
} catch (ParseException e) {
e.printStackTrace();
return;
}
if (!getMNIST(mnistPaths.values())) {
System.out.println(">>> You should have MNIST dataset in " + MNIST_DIR + " to run this example.");
return;
}
trainingImagesPath = Objects.requireNonNull(IgniteUtils.resolveIgnitePath(MNIST_DIR + "/" + mnistPaths.get(MNIST_TRAIN_IMAGES))).getPath();
trainingLabelsPath = Objects.requireNonNull(IgniteUtils.resolveIgnitePath(MNIST_DIR + "/" + mnistPaths.get(MNIST_TRAIN_LABELS))).getPath();
testImagesPath = Objects.requireNonNull(IgniteUtils.resolveIgnitePath(MNIST_DIR + "/" + mnistPaths.get(MNIST_TEST_IMAGES))).getPath();
testLabelsPath = Objects.requireNonNull(IgniteUtils.resolveIgnitePath(MNIST_DIR + "/" + mnistPaths.get(MNIST_TEST_LABELS))).getPath();
try (Ignite ignite = Ignition.start(igniteCfgPath)) {
IgniteUtils.setCurrentIgniteName(ignite.configuration().getIgniteInstanceName());
int ptsCnt = 60000;
int featCnt = 28 * 28;
Stream<DenseLocalOnHeapVector> trainingMnistStream = MnistUtils.mnist(trainingImagesPath, trainingLabelsPath, new Random(123L), ptsCnt);
Stream<DenseLocalOnHeapVector> testMnistStream = MnistUtils.mnist(testImagesPath, testLabelsPath, new Random(123L), 10_000);
IgniteCache<BiIndex, Double> cache = createBiIndexedCache(ignite);
loadVectorsIntoBiIndexedCache(cache.getName(), trainingMnistStream.iterator(), featCnt + 1, ignite);
ColumnDecisionTreeTrainer<GiniSplitCalculator.GiniData> trainer = new ColumnDecisionTreeTrainer<>(10, ContinuousSplitCalculators.GINI.apply(ignite), RegionCalculators.GINI, RegionCalculators.MOST_COMMON, ignite);
System.out.println(">>> Training started");
long before = System.currentTimeMillis();
DecisionTreeModel mdl = trainer.train(new BiIndexedCacheColumnDecisionTreeTrainerInput(cache, new HashMap<>(), ptsCnt, featCnt));
System.out.println(">>> Training finished in " + (System.currentTimeMillis() - before));
IgniteTriFunction<Model<Vector, Double>, Stream<IgniteBiTuple<Vector, Double>>, Function<Double, Double>, Double> mse = Estimators.errorsPercentage();
Double accuracy = mse.apply(mdl, testMnistStream.map(v -> new IgniteBiTuple<>(v.viewPart(0, featCnt), v.getX(featCnt))), Function.identity());
System.out.println(">>> Errs percentage: " + accuracy);
} catch (IOException e) {
e.printStackTrace();
}
System.out.println(">>> Decision trees example finished.");
}
Aggregations