Search in sources :

Example 41 with LongStatistic

use of de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic in project elki by elki-project.

the class SpacefillingKNNPreprocessor method preprocess.

protected void preprocess() {
    final long starttime = System.currentTimeMillis();
    final int size = relation.size();
    final int numgen = curvegen.size();
    // numgen * variants;
    final int numcurves = variants;
    curves = new ArrayList<>(numcurves);
    for (int i = 0; i < numcurves; i++) {
        curves.add(new ArrayList<SpatialPair<DBID, NumberVector>>(size));
    }
    if (proj == null) {
        for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
            final NumberVector v = relation.get(iditer);
            SpatialPair<DBID, NumberVector> ref = new SpatialPair<DBID, NumberVector>(DBIDUtil.deref(iditer), v);
            for (List<SpatialPair<DBID, NumberVector>> curve : curves) {
                curve.add(ref);
            }
        }
        // Sort spatially
        final double[] mms = SpatialSorter.computeMinMax(curves.get(0));
        // Find maximum extend.
        double extend = 0;
        for (int d2 = 0; d2 < mms.length; d2 += 2) {
            extend = Math.max(extend, mms[d2 + 1] - mms[d2]);
        }
        final double[] mmscratch = new double[mms.length];
        final int idim = mms.length >>> 1;
        final int dim = (odim < 0) ? idim : Math.min(odim, idim);
        final int[] permutation = range(0, idim);
        final int[] apermutation = (dim != idim) ? new int[dim] : permutation;
        for (int j = 0; j < numcurves; j++) {
            final int ctype = numgen > 1 ? random.nextInt(numgen) : 0;
            // Scale all axes by the same factor:
            final double scale = 1. + random.nextDouble();
            for (int d2 = 0; d2 < mms.length; d2 += 2) {
                // Note: use global extend, to be unbiased against different scales.
                mmscratch[d2] = mms[d2] - extend * random.nextDouble();
                mmscratch[d2 + 1] = mmscratch[d2] + extend * scale;
            }
            // Generate permutation:
            randomPermutation(permutation, random);
            System.arraycopy(permutation, 0, apermutation, 0, dim);
            curvegen.get(ctype).sort(curves.get(j), 0, size, mmscratch, apermutation);
        }
    } else {
        // With projections, min/max management gets more tricky and expensive.
        final int idim = RelationUtil.dimensionality(relation);
        final int dim = (odim < 0) ? idim : odim;
        final int[] permutation = range(0, dim);
        NumberVector.Factory<O> factory = RelationUtil.getNumberVectorFactory(relation);
        final double[] mms = new double[odim << 1];
        for (int j = 0; j < numcurves; j++) {
            final List<SpatialPair<DBID, NumberVector>> curve = curves.get(j);
            final RandomProjectionFamily.Projection mat = proj.generateProjection(idim, dim);
            final int ctype = numgen > 1 ? random.nextInt(numgen) : 0;
            // Initialize min/max:
            for (int d2 = 0; d2 < mms.length; d2 += 2) {
                mms[d2] = Double.POSITIVE_INFINITY;
                mms[d2 + 1] = Double.NEGATIVE_INFINITY;
            }
            // Project data set:
            for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
                double[] proj = mat.project(relation.get(iditer));
                curve.add(new SpatialPair<DBID, NumberVector>(DBIDUtil.deref(iditer), factory.newNumberVector(proj)));
                for (int d2 = 0, d = 0; d2 < mms.length; d2 += 2, d++) {
                    mms[d2] = Math.min(mms[d2], proj[d]);
                    mms[d2 + 1] = Math.max(mms[d2 + 1], proj[d]);
                }
            }
            // Find maximum extend.
            double extend = 0.;
            for (int d2 = 0; d2 < mms.length; d2 += 2) {
                extend = Math.max(extend, mms[d2 + 1] - mms[d2]);
            }
            // Scale all axes by the same factor:
            final double scale = 1. + random.nextDouble();
            for (int d2 = 0; d2 < mms.length; d2 += 2) {
                // Note: use global extend, to be unbiased against different scales.
                mms[d2] -= extend * random.nextDouble();
                mms[d2 + 1] = mms[d2] + extend * scale;
            }
            // Generate permutation:
            randomPermutation(permutation, random);
            // Sort spatially.
            curvegen.get(ctype).sort(curve, 0, size, mms, permutation);
        }
    }
    // Build position index, DBID -> position in the three curves
    positions = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, int[].class);
    for (int cnum = 0; cnum < numcurves; cnum++) {
        Iterator<SpatialPair<DBID, NumberVector>> it = curves.get(cnum).iterator();
        for (int i = 0; it.hasNext(); i++) {
            SpatialPair<DBID, NumberVector> r = it.next();
            final int[] data;
            if (cnum == 0) {
                data = new int[numcurves];
                positions.put(r.first, data);
            } else {
                data = positions.get(r.first);
            }
            data[cnum] = i;
        }
    }
    final long end = System.currentTimeMillis();
    if (LOG.isStatistics()) {
        LOG.statistics(new LongStatistic(this.getClass().getCanonicalName() + ".construction-time.ms", end - starttime));
    }
}
Also used : RandomProjectionFamily(de.lmu.ifi.dbs.elki.data.projection.random.RandomProjectionFamily) NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) LongStatistic(de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic)

Example 42 with LongStatistic

use of de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic in project elki by elki-project.

the class SpacefillingMaterializeKNNPreprocessor method preprocess.

@Override
protected void preprocess() {
    // Prepare space filling curve:
    final long starttime = System.currentTimeMillis();
    final int size = relation.size();
    final int numgen = curvegen.size();
    final int numcurves = numgen * variants;
    List<List<SpatialPair<DBID, NumberVector>>> curves = new ArrayList<>(numcurves);
    for (int i = 0; i < numcurves; i++) {
        curves.add(new ArrayList<SpatialPair<DBID, NumberVector>>(size));
    }
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        final NumberVector v = relation.get(iditer);
        SpatialPair<DBID, NumberVector> ref = new SpatialPair<DBID, NumberVector>(DBIDUtil.deref(iditer), v);
        for (List<SpatialPair<DBID, NumberVector>> curve : curves) {
            curve.add(ref);
        }
    }
    // Sort spatially
    final double[] mms = SpatialSorter.computeMinMax(curves.get(0));
    final double[] mmscratch = new double[mms.length];
    final int numdim = mms.length >>> 1;
    final int[] permutation = new int[numdim];
    for (int j = 0; j < variants; j++) {
        for (int i = 0; i < mms.length; i += 2) {
            double len = mms[i + 1] - mms[i];
            mmscratch[i] = mms[i] - len * random.nextDouble();
            mmscratch[i + 1] = mms[i + 1] + len * random.nextDouble();
        }
        // Generate permutation:
        for (int i = 0; i < numdim; i++) {
            permutation[i] = i;
        }
        // Knuth / Fisher-Yates style shuffle
        for (int i = numdim - 1; i > 0; i--) {
            // Swap with random preceeding element.
            int ri = random.nextInt(i + 1);
            int tmp = permutation[ri];
            permutation[ri] = permutation[i];
            permutation[i] = tmp;
        }
        for (int i = 0; i < numgen; i++) {
            curvegen.get(i).sort(curves.get(i + numgen * j), 0, size, mmscratch, permutation);
        }
    }
    // Build position index, DBID -> position in the three curves
    WritableDataStore<int[]> positions = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, int[].class);
    for (int cnum = 0; cnum < numcurves; cnum++) {
        Iterator<SpatialPair<DBID, NumberVector>> it = curves.get(cnum).iterator();
        for (int i = 0; it.hasNext(); i++) {
            SpatialPair<DBID, NumberVector> r = it.next();
            final int[] data;
            if (cnum == 0) {
                data = new int[numcurves];
                positions.put(r.first, data);
            } else {
                data = positions.get(r.first);
            }
            data[cnum] = i;
        }
    }
    // Convert to final storage
    final int wsize = (int) Math.ceil(window * k);
    storage = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC, KNNList.class);
    HashSetModifiableDBIDs cands = DBIDUtil.newHashSet(2 * wsize * numcurves);
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        // Get candidates.
        cands.clear();
        int[] posi = positions.get(iditer);
        for (int i = 0; i < posi.length; i++) {
            List<SpatialPair<DBID, NumberVector>> curve = curves.get(i);
            final int start = Math.max(0, posi[i] - wsize);
            final int end = Math.min(posi[i] + wsize + 1, curve.size());
            for (int pos = start; pos < end; pos++) {
                cands.add(curve.get(pos).first);
            }
        }
        int distc = 0;
        KNNHeap heap = DBIDUtil.newHeap(k);
        O vec = relation.get(iditer);
        for (DBIDIter iter = cands.iter(); iter.valid(); iter.advance()) {
            heap.insert(distanceQuery.distance(vec, iter), iter);
            distc++;
        }
        storage.put(iditer, heap.toKNNList());
        mean.put(distc / (double) k);
    }
    final long end = System.currentTimeMillis();
    if (LOG.isStatistics()) {
        LOG.statistics(new LongStatistic(this.getClass().getCanonicalName() + ".construction-time.ms", end - starttime));
    }
}
Also used : DBID(de.lmu.ifi.dbs.elki.database.ids.DBID) ArrayList(java.util.ArrayList) KNNHeap(de.lmu.ifi.dbs.elki.database.ids.KNNHeap) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) HashSetModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs) NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) LongStatistic(de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic) ArrayList(java.util.ArrayList) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) List(java.util.List)

Example 43 with LongStatistic

use of de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic in project elki by elki-project.

the class MaterializeKNNPreprocessor method preprocess.

/**
 * The actual preprocessing step.
 */
@Override
protected void preprocess() {
    // Could be subclass
    final Logging log = getLogger();
    createStorage();
    ArrayDBIDs ids = DBIDUtil.ensureArray(relation.getDBIDs());
    if (log.isStatistics()) {
        log.statistics(new LongStatistic(this.getClass().getName() + ".k", k));
    }
    Duration duration = log.isStatistics() ? log.newDuration(this.getClass().getName() + ".precomputation-time").begin() : null;
    FiniteProgress progress = getLogger().isVerbose() ? new FiniteProgress("Materializing k nearest neighbors (k=" + k + ")", ids.size(), getLogger()) : null;
    // Try bulk
    List<? extends KNNList> kNNList = null;
    if (usebulk) {
        kNNList = knnQuery.getKNNForBulkDBIDs(ids, k);
        if (kNNList != null) {
            int i = 0;
            for (DBIDIter id = ids.iter(); id.valid(); id.advance(), i++) {
                storage.put(id, kNNList.get(i));
                log.incrementProcessed(progress);
            }
        }
    } else {
        final boolean ismetric = getDistanceQuery().getDistanceFunction().isMetric();
        for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
            if (ismetric && storage.get(iter) != null) {
                log.incrementProcessed(progress);
                // Previously computed (duplicate point?)
                continue;
            }
            KNNList knn = knnQuery.getKNNForDBID(iter, k);
            storage.put(iter, knn);
            if (ismetric) {
                for (DoubleDBIDListIter it = knn.iter(); it.valid() && it.doubleValue() == 0.; it.advance()) {
                    // Reuse
                    storage.put(it, knn);
                }
            }
            log.incrementProcessed(progress);
        }
    }
    log.ensureCompleted(progress);
    if (duration != null) {
        log.statistics(duration.end());
    }
}
Also used : Logging(de.lmu.ifi.dbs.elki.logging.Logging) DoubleDBIDListIter(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) LongStatistic(de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) ArrayDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs) Duration(de.lmu.ifi.dbs.elki.logging.statistics.Duration) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 44 with LongStatistic

use of de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic in project elki by elki-project.

the class AbstractRStarTree method logStatistics.

@Override
public void logStatistics() {
    Logging log = getLogger();
    if (log.isStatistics()) {
        super.logStatistics();
        log.statistics(new LongStatistic(this.getClass().getName() + ".height", height));
        statistics.logStatistics();
    }
}
Also used : Logging(de.lmu.ifi.dbs.elki.logging.Logging) LongStatistic(de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic)

Aggregations

LongStatistic (de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic)44 DoubleStatistic (de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic)27 ArrayList (java.util.ArrayList)20 StringStatistic (de.lmu.ifi.dbs.elki.logging.statistics.StringStatistic)19 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)17 Clustering (de.lmu.ifi.dbs.elki.data.Clustering)14 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)14 IndefiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress)14 ModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)12 WritableIntegerDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore)11 KMeansModel (de.lmu.ifi.dbs.elki.data.model.KMeansModel)10 NumberVector (de.lmu.ifi.dbs.elki.data.NumberVector)9 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)8 EvaluationResult (de.lmu.ifi.dbs.elki.result.EvaluationResult)7 MeasurementGroup (de.lmu.ifi.dbs.elki.result.EvaluationResult.MeasurementGroup)7 ArrayDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs)5 MeanVariance (de.lmu.ifi.dbs.elki.math.MeanVariance)5 WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)4 Logging (de.lmu.ifi.dbs.elki.logging.Logging)4 Duration (de.lmu.ifi.dbs.elki.logging.statistics.Duration)4