use of de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic in project elki by elki-project.
the class SpacefillingKNNPreprocessor method preprocess.
protected void preprocess() {
final long starttime = System.currentTimeMillis();
final int size = relation.size();
final int numgen = curvegen.size();
// numgen * variants;
final int numcurves = variants;
curves = new ArrayList<>(numcurves);
for (int i = 0; i < numcurves; i++) {
curves.add(new ArrayList<SpatialPair<DBID, NumberVector>>(size));
}
if (proj == null) {
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
final NumberVector v = relation.get(iditer);
SpatialPair<DBID, NumberVector> ref = new SpatialPair<DBID, NumberVector>(DBIDUtil.deref(iditer), v);
for (List<SpatialPair<DBID, NumberVector>> curve : curves) {
curve.add(ref);
}
}
// Sort spatially
final double[] mms = SpatialSorter.computeMinMax(curves.get(0));
// Find maximum extend.
double extend = 0;
for (int d2 = 0; d2 < mms.length; d2 += 2) {
extend = Math.max(extend, mms[d2 + 1] - mms[d2]);
}
final double[] mmscratch = new double[mms.length];
final int idim = mms.length >>> 1;
final int dim = (odim < 0) ? idim : Math.min(odim, idim);
final int[] permutation = range(0, idim);
final int[] apermutation = (dim != idim) ? new int[dim] : permutation;
for (int j = 0; j < numcurves; j++) {
final int ctype = numgen > 1 ? random.nextInt(numgen) : 0;
// Scale all axes by the same factor:
final double scale = 1. + random.nextDouble();
for (int d2 = 0; d2 < mms.length; d2 += 2) {
// Note: use global extend, to be unbiased against different scales.
mmscratch[d2] = mms[d2] - extend * random.nextDouble();
mmscratch[d2 + 1] = mmscratch[d2] + extend * scale;
}
// Generate permutation:
randomPermutation(permutation, random);
System.arraycopy(permutation, 0, apermutation, 0, dim);
curvegen.get(ctype).sort(curves.get(j), 0, size, mmscratch, apermutation);
}
} else {
// With projections, min/max management gets more tricky and expensive.
final int idim = RelationUtil.dimensionality(relation);
final int dim = (odim < 0) ? idim : odim;
final int[] permutation = range(0, dim);
NumberVector.Factory<O> factory = RelationUtil.getNumberVectorFactory(relation);
final double[] mms = new double[odim << 1];
for (int j = 0; j < numcurves; j++) {
final List<SpatialPair<DBID, NumberVector>> curve = curves.get(j);
final RandomProjectionFamily.Projection mat = proj.generateProjection(idim, dim);
final int ctype = numgen > 1 ? random.nextInt(numgen) : 0;
// Initialize min/max:
for (int d2 = 0; d2 < mms.length; d2 += 2) {
mms[d2] = Double.POSITIVE_INFINITY;
mms[d2 + 1] = Double.NEGATIVE_INFINITY;
}
// Project data set:
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
double[] proj = mat.project(relation.get(iditer));
curve.add(new SpatialPair<DBID, NumberVector>(DBIDUtil.deref(iditer), factory.newNumberVector(proj)));
for (int d2 = 0, d = 0; d2 < mms.length; d2 += 2, d++) {
mms[d2] = Math.min(mms[d2], proj[d]);
mms[d2 + 1] = Math.max(mms[d2 + 1], proj[d]);
}
}
// Find maximum extend.
double extend = 0.;
for (int d2 = 0; d2 < mms.length; d2 += 2) {
extend = Math.max(extend, mms[d2 + 1] - mms[d2]);
}
// Scale all axes by the same factor:
final double scale = 1. + random.nextDouble();
for (int d2 = 0; d2 < mms.length; d2 += 2) {
// Note: use global extend, to be unbiased against different scales.
mms[d2] -= extend * random.nextDouble();
mms[d2 + 1] = mms[d2] + extend * scale;
}
// Generate permutation:
randomPermutation(permutation, random);
// Sort spatially.
curvegen.get(ctype).sort(curve, 0, size, mms, permutation);
}
}
// Build position index, DBID -> position in the three curves
positions = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, int[].class);
for (int cnum = 0; cnum < numcurves; cnum++) {
Iterator<SpatialPair<DBID, NumberVector>> it = curves.get(cnum).iterator();
for (int i = 0; it.hasNext(); i++) {
SpatialPair<DBID, NumberVector> r = it.next();
final int[] data;
if (cnum == 0) {
data = new int[numcurves];
positions.put(r.first, data);
} else {
data = positions.get(r.first);
}
data[cnum] = i;
}
}
final long end = System.currentTimeMillis();
if (LOG.isStatistics()) {
LOG.statistics(new LongStatistic(this.getClass().getCanonicalName() + ".construction-time.ms", end - starttime));
}
}
use of de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic in project elki by elki-project.
the class SpacefillingMaterializeKNNPreprocessor method preprocess.
@Override
protected void preprocess() {
// Prepare space filling curve:
final long starttime = System.currentTimeMillis();
final int size = relation.size();
final int numgen = curvegen.size();
final int numcurves = numgen * variants;
List<List<SpatialPair<DBID, NumberVector>>> curves = new ArrayList<>(numcurves);
for (int i = 0; i < numcurves; i++) {
curves.add(new ArrayList<SpatialPair<DBID, NumberVector>>(size));
}
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
final NumberVector v = relation.get(iditer);
SpatialPair<DBID, NumberVector> ref = new SpatialPair<DBID, NumberVector>(DBIDUtil.deref(iditer), v);
for (List<SpatialPair<DBID, NumberVector>> curve : curves) {
curve.add(ref);
}
}
// Sort spatially
final double[] mms = SpatialSorter.computeMinMax(curves.get(0));
final double[] mmscratch = new double[mms.length];
final int numdim = mms.length >>> 1;
final int[] permutation = new int[numdim];
for (int j = 0; j < variants; j++) {
for (int i = 0; i < mms.length; i += 2) {
double len = mms[i + 1] - mms[i];
mmscratch[i] = mms[i] - len * random.nextDouble();
mmscratch[i + 1] = mms[i + 1] + len * random.nextDouble();
}
// Generate permutation:
for (int i = 0; i < numdim; i++) {
permutation[i] = i;
}
// Knuth / Fisher-Yates style shuffle
for (int i = numdim - 1; i > 0; i--) {
// Swap with random preceeding element.
int ri = random.nextInt(i + 1);
int tmp = permutation[ri];
permutation[ri] = permutation[i];
permutation[i] = tmp;
}
for (int i = 0; i < numgen; i++) {
curvegen.get(i).sort(curves.get(i + numgen * j), 0, size, mmscratch, permutation);
}
}
// Build position index, DBID -> position in the three curves
WritableDataStore<int[]> positions = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, int[].class);
for (int cnum = 0; cnum < numcurves; cnum++) {
Iterator<SpatialPair<DBID, NumberVector>> it = curves.get(cnum).iterator();
for (int i = 0; it.hasNext(); i++) {
SpatialPair<DBID, NumberVector> r = it.next();
final int[] data;
if (cnum == 0) {
data = new int[numcurves];
positions.put(r.first, data);
} else {
data = positions.get(r.first);
}
data[cnum] = i;
}
}
// Convert to final storage
final int wsize = (int) Math.ceil(window * k);
storage = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC, KNNList.class);
HashSetModifiableDBIDs cands = DBIDUtil.newHashSet(2 * wsize * numcurves);
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
// Get candidates.
cands.clear();
int[] posi = positions.get(iditer);
for (int i = 0; i < posi.length; i++) {
List<SpatialPair<DBID, NumberVector>> curve = curves.get(i);
final int start = Math.max(0, posi[i] - wsize);
final int end = Math.min(posi[i] + wsize + 1, curve.size());
for (int pos = start; pos < end; pos++) {
cands.add(curve.get(pos).first);
}
}
int distc = 0;
KNNHeap heap = DBIDUtil.newHeap(k);
O vec = relation.get(iditer);
for (DBIDIter iter = cands.iter(); iter.valid(); iter.advance()) {
heap.insert(distanceQuery.distance(vec, iter), iter);
distc++;
}
storage.put(iditer, heap.toKNNList());
mean.put(distc / (double) k);
}
final long end = System.currentTimeMillis();
if (LOG.isStatistics()) {
LOG.statistics(new LongStatistic(this.getClass().getCanonicalName() + ".construction-time.ms", end - starttime));
}
}
use of de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic in project elki by elki-project.
the class MaterializeKNNPreprocessor method preprocess.
/**
* The actual preprocessing step.
*/
@Override
protected void preprocess() {
// Could be subclass
final Logging log = getLogger();
createStorage();
ArrayDBIDs ids = DBIDUtil.ensureArray(relation.getDBIDs());
if (log.isStatistics()) {
log.statistics(new LongStatistic(this.getClass().getName() + ".k", k));
}
Duration duration = log.isStatistics() ? log.newDuration(this.getClass().getName() + ".precomputation-time").begin() : null;
FiniteProgress progress = getLogger().isVerbose() ? new FiniteProgress("Materializing k nearest neighbors (k=" + k + ")", ids.size(), getLogger()) : null;
// Try bulk
List<? extends KNNList> kNNList = null;
if (usebulk) {
kNNList = knnQuery.getKNNForBulkDBIDs(ids, k);
if (kNNList != null) {
int i = 0;
for (DBIDIter id = ids.iter(); id.valid(); id.advance(), i++) {
storage.put(id, kNNList.get(i));
log.incrementProcessed(progress);
}
}
} else {
final boolean ismetric = getDistanceQuery().getDistanceFunction().isMetric();
for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
if (ismetric && storage.get(iter) != null) {
log.incrementProcessed(progress);
// Previously computed (duplicate point?)
continue;
}
KNNList knn = knnQuery.getKNNForDBID(iter, k);
storage.put(iter, knn);
if (ismetric) {
for (DoubleDBIDListIter it = knn.iter(); it.valid() && it.doubleValue() == 0.; it.advance()) {
// Reuse
storage.put(it, knn);
}
}
log.incrementProcessed(progress);
}
}
log.ensureCompleted(progress);
if (duration != null) {
log.statistics(duration.end());
}
}
use of de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic in project elki by elki-project.
the class AbstractRStarTree method logStatistics.
@Override
public void logStatistics() {
Logging log = getLogger();
if (log.isStatistics()) {
super.logStatistics();
log.statistics(new LongStatistic(this.getClass().getName() + ".height", height));
statistics.logStatistics();
}
}
Aggregations