use of org.apache.ignite.ml.math.primitives.vector.impl.DenseVector in project ignite by apache.
the class KMeansTrainer method initClusterCentersRandomly.
/**
* K cluster centers are initialized randomly.
*
* @param dataset The dataset to pick up random centers.
* @param k Amount of clusters.
* @return K cluster centers.
*/
private Vector[] initClusterCentersRandomly(Dataset<EmptyContext, LabeledVectorSet<LabeledVector>> dataset, int k) {
Vector[] initCenters = new DenseVector[k];
// Gets k or less vectors from each partition.
List<LabeledVector> rndPnts = dataset.compute(data -> {
List<LabeledVector> rndPnt = new ArrayList<>();
if (data.rowSize() != 0) {
if (data.rowSize() > k) {
// If it's enough rows in partition to pick k vectors.
final Random random = environment.randomNumbersGenerator();
for (int i = 0; i < k; i++) {
Set<Integer> uniqueIndices = new HashSet<>();
int nextIdx = random.nextInt(data.rowSize());
// It required to make the next cycle is finite.
int maxRandomSearch = k;
int cntr = 0;
// Repeat nextIdx generation if it was picked earlier.
while (uniqueIndices.contains(nextIdx) && cntr < maxRandomSearch) {
nextIdx = random.nextInt(data.rowSize());
cntr++;
}
uniqueIndices.add(nextIdx);
rndPnt.add(data.getRow(nextIdx));
}
} else
// If it's not enough vectors to pick k vectors.
for (int i = 0; i < data.rowSize(); i++) rndPnt.add(data.getRow(i));
}
return rndPnt;
}, (a, b) -> {
if (a == null)
return b == null ? new ArrayList<>() : b;
if (b == null)
return a;
return Stream.concat(a.stream(), b.stream()).collect(Collectors.toList());
});
// Shuffle them.
Collections.shuffle(rndPnts);
// Pick k vectors randomly.
if (rndPnts.size() >= k) {
for (int i = 0; i < k; i++) {
final LabeledVector rndPnt = rndPnts.get(environment.randomNumbersGenerator().nextInt(rndPnts.size()));
rndPnts.remove(rndPnt);
initCenters[i] = rndPnt.features();
}
} else
throw new RuntimeException("The KMeans Trainer required more than " + k + " vectors to find " + k + " clusters");
return initCenters;
}
use of org.apache.ignite.ml.math.primitives.vector.impl.DenseVector in project ignite by apache.
the class KMeansTrainer method updateModel.
/**
* {@inheritDoc}
*/
@Override
protected <K, V> KMeansModel updateModel(KMeansModel mdl, DatasetBuilder<K, V> datasetBuilder, Preprocessor<K, V> preprocessor) {
assert datasetBuilder != null;
PartitionDataBuilder<K, V, EmptyContext, LabeledVectorSet<LabeledVector>> partDataBuilder = new LabeledDatasetPartitionDataBuilderOnHeap<>(preprocessor);
Vector[] centers;
try (Dataset<EmptyContext, LabeledVectorSet<LabeledVector>> dataset = datasetBuilder.build(envBuilder, (env, upstream, upstreamSize) -> new EmptyContext(), partDataBuilder, learningEnvironment())) {
final Integer cols = dataset.compute(org.apache.ignite.ml.structures.Dataset::colSize, (a, b) -> {
if (a == null)
return b == null ? 0 : b;
if (b == null)
return a;
return b;
});
if (cols == null)
return getLastTrainedModelOrThrowEmptyDatasetException(mdl);
centers = Optional.ofNullable(mdl).map(KMeansModel::centers).orElseGet(() -> initClusterCentersRandomly(dataset, k));
boolean converged = false;
int iteration = 0;
while (iteration < maxIterations && !converged) {
Vector[] newCentroids = new DenseVector[k];
TotalCostAndCounts totalRes = calcDataForNewCentroids(centers, dataset, cols);
converged = true;
for (Map.Entry<Integer, Vector> entry : totalRes.sums.entrySet()) {
Vector massCenter = entry.getValue().times(1.0 / totalRes.counts.get(entry.getKey()));
if (converged && distance.compute(massCenter, centers[entry.getKey()]) > epsilon * epsilon)
converged = false;
newCentroids[entry.getKey()] = massCenter;
}
iteration++;
for (int i = 0; i < centers.length; i++) {
if (newCentroids[i] != null)
centers[i] = newCentroids[i];
}
}
} catch (Exception e) {
throw new RuntimeException(e);
}
return new KMeansModel(centers, distance);
}
use of org.apache.ignite.ml.math.primitives.vector.impl.DenseVector in project ignite by apache.
the class SparkModelParser method readSVMCoefficients.
/**
* Read coefficient matrix from parquet.
*
* @param g Coefficient group.
* @return Vector of coefficients.
*/
private static Vector readSVMCoefficients(SimpleGroup g) {
Vector coefficients;
Group coeffGroup = g.getGroup(0, 0).getGroup(3, 0);
final int amountOfCoefficients = coeffGroup.getFieldRepetitionCount(0);
coefficients = new DenseVector(amountOfCoefficients);
for (int j = 0; j < amountOfCoefficients; j++) {
double coefficient = coeffGroup.getGroup(0, j).getDouble(0, 0);
coefficients.set(j, coefficient);
}
return coefficients;
}
use of org.apache.ignite.ml.math.primitives.vector.impl.DenseVector in project ignite by apache.
the class LogisticRegressionModelTest method testPredictOnAnObservationWithWrongCardinality.
/**
*/
@Test(expected = CardinalityException.class)
public void testPredictOnAnObservationWithWrongCardinality() {
Vector weights = new DenseVector(new double[] { 2.0, 3.0 });
LogisticRegressionModel mdl = new LogisticRegressionModel(weights, 1.0);
Vector observation = new DenseVector(new double[] { 1.0 });
mdl.predict(observation);
}
use of org.apache.ignite.ml.math.primitives.vector.impl.DenseVector in project ignite by apache.
the class SVMModelTest method testPredictOnAnObservationWithWrongCardinality.
/**
*/
@Test(expected = CardinalityException.class)
public void testPredictOnAnObservationWithWrongCardinality() {
Vector weights = new DenseVector(new double[] { 2.0, 3.0 });
SVMLinearClassificationModel mdl = new SVMLinearClassificationModel(weights, 1.0);
Vector observation = new DenseVector(new double[] { 1.0 });
mdl.predict(observation);
}
Aggregations