use of de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter in project elki by elki-project.
the class GaussianAffinityMatrixBuilder method buildDistanceMatrix.
/**
* Build a distance matrix of squared distances.
*
* @param ids DBIDs
* @param dq Distance query
* @return Distance matrix
*/
protected double[][] buildDistanceMatrix(ArrayDBIDs ids, DistanceQuery<?> dq) {
final int size = ids.size();
double[][] dmat = new double[size][size];
final boolean square = !dq.getDistanceFunction().isSquared();
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Computing distance matrix", (size * (size - 1)) >>> 1, LOG) : null;
Duration timer = LOG.isStatistics() ? LOG.newDuration(this.getClass().getName() + ".runtime.distancematrix").begin() : null;
DBIDArrayIter ix = ids.iter(), iy = ids.iter();
for (ix.seek(0); ix.valid(); ix.advance()) {
double[] dmat_x = dmat[ix.getOffset()];
for (iy.seek(ix.getOffset() + 1); iy.valid(); iy.advance()) {
final double dist = dq.distance(ix, iy);
dmat[iy.getOffset()][ix.getOffset()] = dmat_x[iy.getOffset()] = square ? (dist * dist) : dist;
}
if (prog != null) {
int row = ix.getOffset() + 1;
prog.setProcessed(row * size - ((row * (row + 1)) >>> 1), LOG);
}
}
LOG.ensureCompleted(prog);
if (timer != null) {
LOG.statistics(timer.end());
}
return dmat;
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter in project elki by elki-project.
the class SNE method run.
public Relation<DoubleVector> run(Relation<O> relation) {
AffinityMatrix pij = affinity.computeAffinityMatrix(relation, 1.);
// Create initial solution.
final int size = pij.size();
double[][] sol = randomInitialSolution(size, dim, random.getSingleThreadedRandom());
projectedDistances.setLong(0L);
optimizeSNE(pij, sol);
LOG.statistics(projectedDistances);
// Remove the original (unprojected) data unless configured otherwise.
removePreviousRelation(relation);
// Transform into output data format.
DBIDs ids = relation.getDBIDs();
WritableDataStore<DoubleVector> proj = DataStoreFactory.FACTORY.makeStorage(ids, DataStoreFactory.HINT_DB | DataStoreFactory.HINT_SORTED, DoubleVector.class);
VectorFieldTypeInformation<DoubleVector> otype = new VectorFieldTypeInformation<>(DoubleVector.FACTORY, dim);
for (DBIDArrayIter it = pij.iterDBIDs(); it.valid(); it.advance()) {
proj.put(it, DoubleVector.wrap(sol[it.getOffset()]));
}
return new MaterializedRelation<>("SNE", "SNE", otype, proj, ids);
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter in project elki by elki-project.
the class DistanceQuantileSampler method run.
/**
* Run the distance quantile sampler.
*
* @param database
* @param rel
* @return Distances sample
*/
public CollectionResult<double[]> run(Database database, Relation<O> rel) {
DistanceQuery<O> dq = rel.getDistanceQuery(getDistanceFunction());
int size = rel.size();
long pairs = (size * (long) size) >> 1;
final long ssize = sampling <= 1 ? (long) Math.ceil(sampling * pairs) : (long) sampling;
if (ssize > Integer.MAX_VALUE) {
throw new AbortException("Sampling size too large.");
}
final int qsize = quantile <= 0 ? 1 : (int) Math.ceil(quantile * ssize);
DoubleMaxHeap heap = new DoubleMaxHeap(qsize);
ArrayDBIDs ids = DBIDUtil.ensureArray(rel.getDBIDs());
DBIDArrayIter i1 = ids.iter(), i2 = ids.iter();
Random r = rand.getSingleThreadedRandom();
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Sampling", (int) ssize, LOG) : null;
for (long i = 0; i < ssize; i++) {
int x = r.nextInt(size - 1) + 1, y = r.nextInt(x);
double dist = dq.distance(i1.seek(x), i2.seek(y));
// Skip NaN, and/or zeros.
if (dist != dist || (nozeros && dist < Double.MIN_NORMAL)) {
continue;
}
heap.add(dist, qsize);
LOG.incrementProcessed(prog);
}
LOG.statistics(new DoubleStatistic(PREFIX + ".quantile", quantile));
LOG.statistics(new LongStatistic(PREFIX + ".samplesize", ssize));
LOG.statistics(new DoubleStatistic(PREFIX + ".distance", heap.peek()));
LOG.ensureCompleted(prog);
Collection<String> header = Arrays.asList(new String[] { "Distance" });
Collection<double[]> data = Arrays.asList(new double[][] { new double[] { heap.peek() } });
return new CollectionResult<double[]>("Distances sample", "distance-sample", data, header);
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter in project elki by elki-project.
the class EvaluateRankingQuality method run.
@Override
public HistogramResult run(Database database) {
final Relation<V> relation = database.getRelation(getInputTypeRestriction()[0]);
final DistanceQuery<V> distQuery = database.getDistanceQuery(relation, getDistanceFunction());
final KNNQuery<V> knnQuery = database.getKNNQuery(distQuery, relation.size());
if (LOG.isVerbose()) {
LOG.verbose("Preprocessing clusters...");
}
// Cluster by labels
Collection<Cluster<Model>> split = (new ByLabelOrAllInOneClustering()).run(database).getAllClusters();
// Compute cluster averages and covariance matrix
HashMap<Cluster<?>, double[]> averages = new HashMap<>(split.size());
HashMap<Cluster<?>, double[][]> covmats = new HashMap<>(split.size());
for (Cluster<?> clus : split) {
CovarianceMatrix covmat = CovarianceMatrix.make(relation, clus.getIDs());
averages.put(clus, covmat.getMeanVector());
covmats.put(clus, covmat.destroyToPopulationMatrix());
}
MeanVarianceStaticHistogram hist = new MeanVarianceStaticHistogram(numbins, 0.0, 1.0);
if (LOG.isVerbose()) {
LOG.verbose("Processing points...");
}
FiniteProgress rocloop = LOG.isVerbose() ? new FiniteProgress("Computing ROC AUC values", relation.size(), LOG) : null;
ROCEvaluation roc = new ROCEvaluation();
// sort neighbors
for (Cluster<?> clus : split) {
ModifiableDoubleDBIDList cmem = DBIDUtil.newDistanceDBIDList(clus.size());
double[] av = averages.get(clus);
double[][] covm = covmats.get(clus);
for (DBIDIter iter = clus.getIDs().iter(); iter.valid(); iter.advance()) {
double d = mahalanobisDistance(covm, relation.get(iter).toArray(), av);
cmem.add(d, iter);
}
cmem.sort();
for (DBIDArrayIter it = cmem.iter(); it.valid(); it.advance()) {
KNNList knn = knnQuery.getKNNForDBID(it, relation.size());
double result = EvaluateClustering.evaluateRanking(roc, clus, knn);
hist.put(((double) it.getOffset()) / clus.size(), result);
LOG.incrementProcessed(rocloop);
}
}
LOG.ensureCompleted(rocloop);
// Collections.sort(results);
// Transform Histogram into a Double Vector array.
Collection<double[]> res = new ArrayList<>(relation.size());
for (ObjHistogram.Iter<MeanVariance> iter = hist.iter(); iter.valid(); iter.advance()) {
res.add(new double[] { iter.getCenter(), iter.getValue().getCount(), iter.getValue().getMean(), iter.getValue().getSampleVariance() });
}
return new HistogramResult("Ranking Quality Histogram", "ranking-histogram", res);
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter in project elki by elki-project.
the class HiCS method calculateContrast.
/**
* Calculates the actual contrast of a given subspace.
*
* @param relation Relation to process
* @param subspace Subspace
* @param subspaceIndex Subspace indexes
*/
private void calculateContrast(Relation<? extends NumberVector> relation, HiCSSubspace subspace, ArrayList<ArrayDBIDs> subspaceIndex, Random random) {
final int card = subspace.cardinality();
final double alpha1 = FastMath.pow(alpha, (1.0 / card));
final int windowsize = (int) (relation.size() * alpha1);
final FiniteProgress prog = LOG.isDebugging() ? new FiniteProgress("Monte-Carlo iterations", m, LOG) : null;
int retries = 0;
double deviationSum = 0.0;
for (int i = 0; i < m; i++) {
// Choose a random set bit.
int chosen = -1;
for (int tmp = random.nextInt(card); tmp >= 0; tmp--) {
chosen = subspace.nextSetBit(chosen + 1);
}
// initialize sample
DBIDs conditionalSample = relation.getDBIDs();
for (int j = subspace.nextSetBit(0); j >= 0; j = subspace.nextSetBit(j + 1)) {
if (j == chosen) {
continue;
}
ArrayDBIDs sortedIndices = subspaceIndex.get(j);
ArrayModifiableDBIDs indexBlock = DBIDUtil.newArray(windowsize);
// initialize index block
DBIDArrayIter iter = sortedIndices.iter();
iter.seek(random.nextInt(relation.size() - windowsize));
for (int k = 0; k < windowsize; k++, iter.advance()) {
// select index block
indexBlock.add(iter);
}
conditionalSample = DBIDUtil.intersection(conditionalSample, indexBlock);
}
if (conditionalSample.size() < 10) {
retries++;
if (LOG.isDebugging()) {
LOG.debug("Sample size very small. Retry no. " + retries);
}
if (retries >= MAX_RETRIES) {
LOG.warning("Too many retries, for small samples: " + retries);
} else {
i--;
continue;
}
}
// Project conditional set
double[] sampleValues = new double[conditionalSample.size()];
{
int l = 0;
for (DBIDIter iter = conditionalSample.iter(); iter.valid(); iter.advance()) {
sampleValues[l] = relation.get(iter).doubleValue(chosen);
l++;
}
}
// Project full set
double[] fullValues = new double[relation.size()];
{
int l = 0;
for (DBIDIter iter = subspaceIndex.get(chosen).iter(); iter.valid(); iter.advance()) {
fullValues[l] = relation.get(iter).doubleValue(chosen);
l++;
}
}
double contrast = statTest.deviation(fullValues, sampleValues);
if (Double.isNaN(contrast)) {
i--;
LOG.warning("Contrast was NaN");
continue;
}
deviationSum += contrast;
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
subspace.contrast = deviationSum / m;
}
Aggregations