use of de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.IntegerArray in project elki by elki-project.
the class IntegerRankTieNormalizationTest method defaultParameters.
/**
* Test with default parameters.
*/
@Test
public void defaultParameters() {
String filename = UNITTEST + "normalization-test-1.csv";
IntegerRankTieNormalization filter = new ELKIBuilder<>(IntegerRankTieNormalization.class).build();
MultipleObjectsBundle bundle = readBundle(filename, filter);
int dim = getFieldDimensionality(bundle, 0, TypeUtil.NUMBER_VECTOR_FIELD);
IntegerArray coldata = new IntegerArray(bundle.dataLength());
for (int col = 0; col < dim; col++) {
coldata.clear();
// Extract the column:
for (int row = 0; row < bundle.dataLength(); row++) {
IntegerVector obj = get(bundle, row, 0, IntegerVector.class);
coldata.add(obj.intValue(col));
}
// Sort values:
coldata.sort();
// Verify that the gap matches the frequency of each value.
final int size = coldata.size;
assertEquals("First value", coldata.get(0), coldata.get(coldata.get(0)));
for (int i = 0; i < size; ) {
// s: Start, i: end, v: value, f: frequency
int s = i, v = coldata.get(i), f = 1;
while (++i < size && v == coldata.get(i)) {
f++;
}
// Only iff the frequencies is even, the values will be odd.
assertNotSame("Even/odd rule", (f & 1), (v & 1));
assertEquals("Bad value at position " + s, s + i - 1, v);
assertEquals("Bad frequency at position " + s, i - s, f);
}
}
}
use of de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.IntegerArray in project elki by elki-project.
the class NNChain method nnChainCore.
/**
* Uses NNChain as in "Modern hierarchical, agglomerative clustering
* algorithms" by Daniel Müllner
*
* @param mat Matrix view
* @param builder Result builder
*/
private void nnChainCore(MatrixParadigm mat, PointerHierarchyRepresentationBuilder builder) {
final DBIDArrayIter ix = mat.ix;
final double[] distances = mat.matrix;
final int size = mat.size;
// The maximum chain size = number of ids + 1
IntegerArray chain = new IntegerArray(size + 1);
FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Running NNChain", size - 1, LOG) : null;
for (int k = 1, end = size; k < size; k++) {
int a = -1, b = -1;
if (chain.size() <= 3) {
// Accessing two arbitrary not yet merged elements could be optimized to
// work in O(1) like in Müllner;
// however this usually does not have a huge impact (empirically just
// about 1/5000 of total performance)
a = findUnlinked(0, end, ix, builder);
b = findUnlinked(a + 1, end, ix, builder);
chain.clear();
chain.add(a);
} else {
// Chain is expected to look like (.... a, b, c, b) with b and c merged.
int lastIndex = chain.size;
int c = chain.get(lastIndex - 2);
b = chain.get(lastIndex - 3);
a = chain.get(lastIndex - 4);
// Ensure we had a loop at the end:
assert (chain.get(lastIndex - 1) == c || chain.get(lastIndex - 1) == b);
// if c < b, then we merged b -> c, otherwise c -> b
b = c < b ? c : b;
// Cut the tail:
chain.size -= 3;
}
// For ties, always prefer the second-last element b:
double minDist = mat.get(a, b);
do {
int c = b;
final int ta = MatrixParadigm.triangleSize(a);
for (int i = 0; i < a; i++) {
if (i != b && !builder.isLinked(ix.seek(i))) {
double dist = distances[ta + i];
if (dist < minDist) {
minDist = dist;
c = i;
}
}
}
for (int i = a + 1; i < size; i++) {
if (i != b && !builder.isLinked(ix.seek(i))) {
double dist = distances[MatrixParadigm.triangleSize(i) + a];
if (dist < minDist) {
minDist = dist;
c = i;
}
}
}
b = a;
a = c;
chain.add(a);
} while (chain.size() < 3 || a != chain.get(chain.size - 1 - 2));
// We always merge the larger into the smaller index:
if (a < b) {
int tmp = a;
a = b;
b = tmp;
}
assert (minDist == mat.get(a, b));
assert (b < a);
merge(size, mat, builder, minDist, a, b);
// Shrink working set
end = AGNES.shrinkActiveSet(ix, builder, end, a);
LOG.incrementProcessed(progress);
}
LOG.ensureCompleted(progress);
}
use of de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.IntegerArray in project elki by elki-project.
the class NearestNeighborAffinityMatrixBuilder method computePij.
/**
* Compute the sparse pij using the nearest neighbors only.
*
* @param ids ID range
* @param knnq kNN query
* @param square Use squared distances
* @param numberOfNeighbours Number of neighbors to get
* @param pij Output of distances
* @param indices Output of indexes
* @param initialScale Initial scaling factor
*/
protected void computePij(DBIDRange ids, KNNQuery<?> knnq, boolean square, int numberOfNeighbours, double[][] pij, int[][] indices, double initialScale) {
Duration timer = LOG.isStatistics() ? LOG.newDuration(this.getClass().getName() + ".runtime.neighborspijmatrix").begin() : null;
final double logPerp = FastMath.log(perplexity);
// Scratch arrays, resizable
DoubleArray dists = new DoubleArray(numberOfNeighbours + 10);
IntegerArray inds = new IntegerArray(numberOfNeighbours + 10);
// Compute nearest-neighbor sparse affinity matrix
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Finding neighbors and optimizing perplexity", ids.size(), LOG) : null;
MeanVariance mv = LOG.isStatistics() ? new MeanVariance() : null;
for (DBIDArrayIter ix = ids.iter(); ix.valid(); ix.advance()) {
dists.clear();
inds.clear();
KNNList neighbours = knnq.getKNNForDBID(ix, numberOfNeighbours + 1);
convertNeighbors(ids, ix, square, neighbours, dists, inds);
double beta = computeSigma(//
ix.getOffset(), //
dists, //
perplexity, //
logPerp, pij[ix.getOffset()] = new double[dists.size()]);
if (mv != null) {
// Sigma
mv.put(beta > 0 ? FastMath.sqrt(.5 / beta) : 0.);
}
indices[ix.getOffset()] = inds.toArray();
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
// Sum of the sparse affinity matrix:
double sum = 0.;
for (int i = 0; i < pij.length; i++) {
final double[] pij_i = pij[i];
for (int j = 0; j < pij_i.length; j++) {
sum += pij_i[j];
}
}
final double scale = initialScale / (2 * sum);
for (int i = 0; i < pij.length; i++) {
final double[] pij_i = pij[i];
for (int offi = 0; offi < pij_i.length; offi++) {
int j = indices[i][offi];
assert (i != j);
int offj = containsIndex(indices[j], i);
if (offj >= 0) {
// Found
assert (indices[j][offj] == i);
// Exploit symmetry:
if (i < j) {
// Symmetrize
final double val = pij_i[offi] + pij[j][offj];
pij_i[offi] = pij[j][offj] = MathUtil.max(val * scale, MIN_PIJ);
}
} else {
// Not found
// TODO: the original code produces a symmetric matrix
// And it will now not sum to EARLY_EXAGGERATION anymore.
pij_i[offi] = MathUtil.max(pij_i[offi] * scale, MIN_PIJ);
}
}
}
if (LOG.isStatistics()) {
// timer != null, mv != null
LOG.statistics(timer.end());
LOG.statistics(new DoubleStatistic(NearestNeighborAffinityMatrixBuilder.class.getName() + ".sigma.average", mv.getMean()));
LOG.statistics(new DoubleStatistic(NearestNeighborAffinityMatrixBuilder.class.getName() + ".sigma.stddev", mv.getSampleStddev()));
}
}
use of de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.IntegerArray in project elki by elki-project.
the class IntrinsicNearestNeighborAffinityMatrixBuilder method computePij.
/**
* Compute the sparse pij using the nearest neighbors only.
*
* @param ids ID range
* @param knnq kNN query
* @param square Use squared distances
* @param numberOfNeighbours Number of neighbors to get
* @param pij Output of distances
* @param indices Output of indexes
* @param initialScale Initial scaling factor
*/
protected void computePij(DBIDRange ids, KNNQuery<?> knnq, boolean square, int numberOfNeighbours, double[][] pij, int[][] indices, double initialScale) {
Duration timer = LOG.isStatistics() ? LOG.newDuration(this.getClass().getName() + ".runtime.neighborspijmatrix").begin() : null;
final double logPerp = FastMath.log(perplexity);
// Scratch arrays, resizable
DoubleArray dists = new DoubleArray(numberOfNeighbours + 10);
IntegerArray inds = new IntegerArray(numberOfNeighbours + 10);
// Compute nearest-neighbor sparse affinity matrix
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Finding neighbors and optimizing perplexity", ids.size(), LOG) : null;
MeanVariance mv = LOG.isStatistics() ? new MeanVariance() : null;
Mean mid = LOG.isStatistics() ? new Mean() : null;
for (DBIDArrayIter ix = ids.iter(); ix.valid(); ix.advance()) {
dists.clear();
inds.clear();
KNNList neighbours = knnq.getKNNForDBID(ix, numberOfNeighbours + 1);
convertNeighbors(ids, ix, square, neighbours, dists, inds, mid);
double beta = computeSigma(//
ix.getOffset(), //
dists, //
perplexity, //
logPerp, pij[ix.getOffset()] = new double[dists.size()]);
if (mv != null) {
// Sigma
mv.put(beta > 0 ? FastMath.sqrt(.5 / beta) : 0.);
}
indices[ix.getOffset()] = inds.toArray();
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
if (mid != null) {
LOG.statistics(new DoubleStatistic(getClass() + ".average-original-id", mid.getMean()));
}
// Sum of the sparse affinity matrix:
double sum = 0.;
for (int i = 0; i < pij.length; i++) {
final double[] pij_i = pij[i];
for (int offi = 0; offi < pij_i.length; offi++) {
int j = indices[i][offi];
if (j > i) {
// Exploit symmetry.
continue;
}
assert (i != j);
int offj = containsIndex(indices[j], i);
if (offj >= 0) {
// Found
sum += FastMath.sqrt(pij_i[offi] * pij[j][offj]);
}
}
}
final double scale = initialScale / (2 * sum);
for (int i = 0; i < pij.length; i++) {
final double[] pij_i = pij[i];
for (int offi = 0; offi < pij_i.length; offi++) {
int j = indices[i][offi];
assert (i != j);
int offj = containsIndex(indices[j], i);
if (offj >= 0) {
// Found
assert (indices[j][offj] == i);
// Exploit symmetry:
if (i < j) {
// Symmetrize
final double val = FastMath.sqrt(pij_i[offi] * pij[j][offj]);
pij_i[offi] = pij[j][offj] = MathUtil.max(val * scale, MIN_PIJ);
}
} else {
// Not found, so zero.
pij_i[offi] = 0;
}
}
}
if (LOG.isStatistics()) {
// timer != null, mv != null
LOG.statistics(timer.end());
LOG.statistics(new DoubleStatistic(NearestNeighborAffinityMatrixBuilder.class.getName() + ".sigma.average", mv.getMean()));
LOG.statistics(new DoubleStatistic(NearestNeighborAffinityMatrixBuilder.class.getName() + ".sigma.stddev", mv.getSampleStddev()));
}
}
use of de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.IntegerArray in project elki by elki-project.
the class MiniMaxNNChain method nnChainCore.
/**
* Uses NNChain as in "Modern hierarchical, agglomerative clustering
* algorithms" by Daniel Müllner
*
* @param mat distance matrix
* @param prots computed prototypes
* @param dq distance query of the data set
* @param builder Result builder
* @param clusters current clusters
*/
private void nnChainCore(MatrixParadigm mat, DBIDArrayMIter prots, DistanceQuery<O> dq, PointerHierarchyRepresentationBuilder builder, Int2ObjectOpenHashMap<ModifiableDBIDs> clusters) {
final DBIDArrayIter ix = mat.ix;
final double[] distances = mat.matrix;
final int size = mat.size;
// The maximum chain size = number of ids + 1
IntegerArray chain = new IntegerArray(size + 1);
FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Running MiniMax-NNChain", size - 1, LOG) : null;
for (int k = 1, end = size; k < size; k++) {
int a = -1, b = -1;
if (chain.size() <= 3) {
// Accessing two arbitrary not yet merged elements could be optimized to
// work in O(1) like in Müllner;
// however this usually does not have a huge impact (empirically just
// about 1/5000 of total performance)
a = NNChain.findUnlinked(0, end, ix, builder);
b = NNChain.findUnlinked(a + 1, end, ix, builder);
chain.clear();
chain.add(a);
} else {
// Chain is expected to look like (.... a, b, c, b) with b and c merged.
int lastIndex = chain.size;
int c = chain.get(lastIndex - 2);
b = chain.get(lastIndex - 3);
a = chain.get(lastIndex - 4);
// Ensure we had a loop at the end:
assert (chain.get(lastIndex - 1) == c || chain.get(lastIndex - 1) == b);
// if c < b, then we merged b -> c, otherwise c -> b
b = c < b ? c : b;
// Cut the tail:
chain.size -= 3;
}
// For ties, always prefer the second-last element b:
double minDist = mat.get(a, b);
do {
int c = b;
final int ta = MatrixParadigm.triangleSize(a);
for (int i = 0; i < a; i++) {
if (i != b && !builder.isLinked(ix.seek(i))) {
double dist = distances[ta + i];
if (dist < minDist) {
minDist = dist;
c = i;
}
}
}
for (int i = a + 1; i < size; i++) {
if (i != b && !builder.isLinked(ix.seek(i))) {
double dist = distances[MatrixParadigm.triangleSize(i) + a];
if (dist < minDist) {
minDist = dist;
c = i;
}
}
}
b = a;
a = c;
chain.add(a);
} while (chain.size() < 3 || a != chain.get(chain.size - 1 - 2));
// We always merge the larger into the smaller index:
if (a < b) {
int tmp = a;
a = b;
b = tmp;
}
assert (minDist == mat.get(a, b));
assert (b < a);
MiniMax.merge(size, mat, prots, builder, clusters, dq, a, b);
// Shrink working set
end = AGNES.shrinkActiveSet(ix, builder, end, a);
LOG.incrementProcessed(progress);
}
LOG.ensureCompleted(progress);
}
Aggregations