Search in sources :

Example 71 with NumberVector

use of de.lmu.ifi.dbs.elki.data.NumberVector in project elki by elki-project.

the class NaiveProjectedKNNPreprocessor method preprocess.

protected void preprocess() {
    final long starttime = System.nanoTime();
    final int size = relation.size();
    final int idim = RelationUtil.dimensionality(relation);
    final int odim = (projections > 0) ? projections : idim;
    projected = new ArrayList<>(odim);
    for (int j = 0; j < odim; j++) {
        projected.add(DBIDUtil.newDistanceDBIDList(size));
    }
    if (proj == null) {
        // Generate permutation:
        final int[] permutation = range(0, idim);
        if (odim < idim) {
            randomPermutation(permutation, random);
        }
        for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
            final NumberVector v = relation.get(iditer);
            for (int j = 0; j < odim; j++) {
                projected.get(j).add(v.doubleValue(permutation[j]), iditer);
            }
        }
    } else {
        final RandomProjectionFamily.Projection mat = proj.generateProjection(idim, odim);
        for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
            double[] v = mat.project(relation.get(iditer));
            for (int j = 0; j < odim; j++) {
                projected.get(j).add(v[j], iditer);
            }
        }
    }
    // Sort
    for (int j = 0; j < odim; j++) {
        projected.get(j).sort();
    }
    // Build position index, DBID -> position in the three curves
    positions = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, int[].class);
    for (int cnum = 0; cnum < odim; cnum++) {
        DoubleDBIDListIter it = projected.get(cnum).iter();
        for (int i = 0; it.valid(); i++, it.advance()) {
            final int[] data;
            if (cnum == 0) {
                data = new int[odim];
                positions.put(it, data);
            } else {
                data = positions.get(it);
            }
            data[cnum] = i;
        }
    }
    final long end = System.nanoTime();
    if (LOG.isVerbose()) {
        LOG.verbose("SFC preprocessor took " + ((end - starttime) / 1.E6) + " milliseconds.");
    }
}
Also used : NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) RandomProjectionFamily(de.lmu.ifi.dbs.elki.data.projection.random.RandomProjectionFamily)

Example 72 with NumberVector

use of de.lmu.ifi.dbs.elki.data.NumberVector in project elki by elki-project.

the class SpacefillingKNNPreprocessor method preprocess.

protected void preprocess() {
    final long starttime = System.currentTimeMillis();
    final int size = relation.size();
    final int numgen = curvegen.size();
    // numgen * variants;
    final int numcurves = variants;
    curves = new ArrayList<>(numcurves);
    for (int i = 0; i < numcurves; i++) {
        curves.add(new ArrayList<SpatialPair<DBID, NumberVector>>(size));
    }
    if (proj == null) {
        for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
            final NumberVector v = relation.get(iditer);
            SpatialPair<DBID, NumberVector> ref = new SpatialPair<DBID, NumberVector>(DBIDUtil.deref(iditer), v);
            for (List<SpatialPair<DBID, NumberVector>> curve : curves) {
                curve.add(ref);
            }
        }
        // Sort spatially
        final double[] mms = SpatialSorter.computeMinMax(curves.get(0));
        // Find maximum extend.
        double extend = 0;
        for (int d2 = 0; d2 < mms.length; d2 += 2) {
            extend = Math.max(extend, mms[d2 + 1] - mms[d2]);
        }
        final double[] mmscratch = new double[mms.length];
        final int idim = mms.length >>> 1;
        final int dim = (odim < 0) ? idim : Math.min(odim, idim);
        final int[] permutation = range(0, idim);
        final int[] apermutation = (dim != idim) ? new int[dim] : permutation;
        for (int j = 0; j < numcurves; j++) {
            final int ctype = numgen > 1 ? random.nextInt(numgen) : 0;
            // Scale all axes by the same factor:
            final double scale = 1. + random.nextDouble();
            for (int d2 = 0; d2 < mms.length; d2 += 2) {
                // Note: use global extend, to be unbiased against different scales.
                mmscratch[d2] = mms[d2] - extend * random.nextDouble();
                mmscratch[d2 + 1] = mmscratch[d2] + extend * scale;
            }
            // Generate permutation:
            randomPermutation(permutation, random);
            System.arraycopy(permutation, 0, apermutation, 0, dim);
            curvegen.get(ctype).sort(curves.get(j), 0, size, mmscratch, apermutation);
        }
    } else {
        // With projections, min/max management gets more tricky and expensive.
        final int idim = RelationUtil.dimensionality(relation);
        final int dim = (odim < 0) ? idim : odim;
        final int[] permutation = range(0, dim);
        NumberVector.Factory<O> factory = RelationUtil.getNumberVectorFactory(relation);
        final double[] mms = new double[odim << 1];
        for (int j = 0; j < numcurves; j++) {
            final List<SpatialPair<DBID, NumberVector>> curve = curves.get(j);
            final RandomProjectionFamily.Projection mat = proj.generateProjection(idim, dim);
            final int ctype = numgen > 1 ? random.nextInt(numgen) : 0;
            // Initialize min/max:
            for (int d2 = 0; d2 < mms.length; d2 += 2) {
                mms[d2] = Double.POSITIVE_INFINITY;
                mms[d2 + 1] = Double.NEGATIVE_INFINITY;
            }
            // Project data set:
            for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
                double[] proj = mat.project(relation.get(iditer));
                curve.add(new SpatialPair<DBID, NumberVector>(DBIDUtil.deref(iditer), factory.newNumberVector(proj)));
                for (int d2 = 0, d = 0; d2 < mms.length; d2 += 2, d++) {
                    mms[d2] = Math.min(mms[d2], proj[d]);
                    mms[d2 + 1] = Math.max(mms[d2 + 1], proj[d]);
                }
            }
            // Find maximum extend.
            double extend = 0.;
            for (int d2 = 0; d2 < mms.length; d2 += 2) {
                extend = Math.max(extend, mms[d2 + 1] - mms[d2]);
            }
            // Scale all axes by the same factor:
            final double scale = 1. + random.nextDouble();
            for (int d2 = 0; d2 < mms.length; d2 += 2) {
                // Note: use global extend, to be unbiased against different scales.
                mms[d2] -= extend * random.nextDouble();
                mms[d2 + 1] = mms[d2] + extend * scale;
            }
            // Generate permutation:
            randomPermutation(permutation, random);
            // Sort spatially.
            curvegen.get(ctype).sort(curve, 0, size, mms, permutation);
        }
    }
    // Build position index, DBID -> position in the three curves
    positions = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, int[].class);
    for (int cnum = 0; cnum < numcurves; cnum++) {
        Iterator<SpatialPair<DBID, NumberVector>> it = curves.get(cnum).iterator();
        for (int i = 0; it.hasNext(); i++) {
            SpatialPair<DBID, NumberVector> r = it.next();
            final int[] data;
            if (cnum == 0) {
                data = new int[numcurves];
                positions.put(r.first, data);
            } else {
                data = positions.get(r.first);
            }
            data[cnum] = i;
        }
    }
    final long end = System.currentTimeMillis();
    if (LOG.isStatistics()) {
        LOG.statistics(new LongStatistic(this.getClass().getCanonicalName() + ".construction-time.ms", end - starttime));
    }
}
Also used : RandomProjectionFamily(de.lmu.ifi.dbs.elki.data.projection.random.RandomProjectionFamily) NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) LongStatistic(de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic)

Example 73 with NumberVector

use of de.lmu.ifi.dbs.elki.data.NumberVector in project elki by elki-project.

the class SpacefillingMaterializeKNNPreprocessor method preprocess.

@Override
protected void preprocess() {
    // Prepare space filling curve:
    final long starttime = System.currentTimeMillis();
    final int size = relation.size();
    final int numgen = curvegen.size();
    final int numcurves = numgen * variants;
    List<List<SpatialPair<DBID, NumberVector>>> curves = new ArrayList<>(numcurves);
    for (int i = 0; i < numcurves; i++) {
        curves.add(new ArrayList<SpatialPair<DBID, NumberVector>>(size));
    }
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        final NumberVector v = relation.get(iditer);
        SpatialPair<DBID, NumberVector> ref = new SpatialPair<DBID, NumberVector>(DBIDUtil.deref(iditer), v);
        for (List<SpatialPair<DBID, NumberVector>> curve : curves) {
            curve.add(ref);
        }
    }
    // Sort spatially
    final double[] mms = SpatialSorter.computeMinMax(curves.get(0));
    final double[] mmscratch = new double[mms.length];
    final int numdim = mms.length >>> 1;
    final int[] permutation = new int[numdim];
    for (int j = 0; j < variants; j++) {
        for (int i = 0; i < mms.length; i += 2) {
            double len = mms[i + 1] - mms[i];
            mmscratch[i] = mms[i] - len * random.nextDouble();
            mmscratch[i + 1] = mms[i + 1] + len * random.nextDouble();
        }
        // Generate permutation:
        for (int i = 0; i < numdim; i++) {
            permutation[i] = i;
        }
        // Knuth / Fisher-Yates style shuffle
        for (int i = numdim - 1; i > 0; i--) {
            // Swap with random preceeding element.
            int ri = random.nextInt(i + 1);
            int tmp = permutation[ri];
            permutation[ri] = permutation[i];
            permutation[i] = tmp;
        }
        for (int i = 0; i < numgen; i++) {
            curvegen.get(i).sort(curves.get(i + numgen * j), 0, size, mmscratch, permutation);
        }
    }
    // Build position index, DBID -> position in the three curves
    WritableDataStore<int[]> positions = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, int[].class);
    for (int cnum = 0; cnum < numcurves; cnum++) {
        Iterator<SpatialPair<DBID, NumberVector>> it = curves.get(cnum).iterator();
        for (int i = 0; it.hasNext(); i++) {
            SpatialPair<DBID, NumberVector> r = it.next();
            final int[] data;
            if (cnum == 0) {
                data = new int[numcurves];
                positions.put(r.first, data);
            } else {
                data = positions.get(r.first);
            }
            data[cnum] = i;
        }
    }
    // Convert to final storage
    final int wsize = (int) Math.ceil(window * k);
    storage = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC, KNNList.class);
    HashSetModifiableDBIDs cands = DBIDUtil.newHashSet(2 * wsize * numcurves);
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        // Get candidates.
        cands.clear();
        int[] posi = positions.get(iditer);
        for (int i = 0; i < posi.length; i++) {
            List<SpatialPair<DBID, NumberVector>> curve = curves.get(i);
            final int start = Math.max(0, posi[i] - wsize);
            final int end = Math.min(posi[i] + wsize + 1, curve.size());
            for (int pos = start; pos < end; pos++) {
                cands.add(curve.get(pos).first);
            }
        }
        int distc = 0;
        KNNHeap heap = DBIDUtil.newHeap(k);
        O vec = relation.get(iditer);
        for (DBIDIter iter = cands.iter(); iter.valid(); iter.advance()) {
            heap.insert(distanceQuery.distance(vec, iter), iter);
            distc++;
        }
        storage.put(iditer, heap.toKNNList());
        mean.put(distc / (double) k);
    }
    final long end = System.currentTimeMillis();
    if (LOG.isStatistics()) {
        LOG.statistics(new LongStatistic(this.getClass().getCanonicalName() + ".construction-time.ms", end - starttime));
    }
}
Also used : DBID(de.lmu.ifi.dbs.elki.database.ids.DBID) ArrayList(java.util.ArrayList) KNNHeap(de.lmu.ifi.dbs.elki.database.ids.KNNHeap) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) HashSetModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs) NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) LongStatistic(de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic) ArrayList(java.util.ArrayList) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) List(java.util.List)

Example 74 with NumberVector

use of de.lmu.ifi.dbs.elki.data.NumberVector in project elki by elki-project.

the class KNNJoinTest method doKNNJoin.

/**
 * Actual test routine.
 *
 * @param inputparams
 */
void doKNNJoin(ListParameterization inputparams) {
    Database db = AbstractSimpleAlgorithmTest.makeSimpleDatabase(dataset, shoulds, inputparams);
    Relation<NumberVector> relation = db.getRelation(TypeUtil.NUMBER_VECTOR_FIELD);
    // Euclidean
    {
        KNNJoin<DoubleVector, ?, ?> knnjoin = new KNNJoin<DoubleVector, RStarTreeNode, SpatialEntry>(EuclideanDistanceFunction.STATIC, 2);
        Relation<KNNList> result = knnjoin.run(db);
        MeanVariance meansize = new MeanVariance();
        for (DBIDIter id = relation.getDBIDs().iter(); id.valid(); id.advance()) {
            meansize.put(result.get(id).size());
        }
        org.junit.Assert.assertEquals("Euclidean mean 2NN set size", mean2nnEuclid, meansize.getMean(), 0.00001);
        org.junit.Assert.assertEquals("Euclidean variance 2NN", var2nnEuclid, meansize.getSampleVariance(), 0.00001);
    }
    // Manhattan
    {
        KNNJoin<DoubleVector, ?, ?> knnjoin = new KNNJoin<DoubleVector, RStarTreeNode, SpatialEntry>(ManhattanDistanceFunction.STATIC, 2);
        Relation<KNNList> result = knnjoin.run(db);
        MeanVariance meansize = new MeanVariance();
        for (DBIDIter id = relation.getDBIDs().iter(); id.valid(); id.advance()) {
            meansize.put(result.get(id).size());
        }
        org.junit.Assert.assertEquals("Manhattan mean 2NN", mean2nnManhattan, meansize.getMean(), 0.00001);
        org.junit.Assert.assertEquals("Manhattan variance 2NN", var2nnManhattan, meansize.getSampleVariance(), 0.00001);
    }
}
Also used : RStarTreeNode(de.lmu.ifi.dbs.elki.index.tree.spatial.rstarvariants.rstar.RStarTreeNode) Relation(de.lmu.ifi.dbs.elki.database.relation.Relation) MeanVariance(de.lmu.ifi.dbs.elki.math.MeanVariance) NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) Database(de.lmu.ifi.dbs.elki.database.Database) StaticArrayDatabase(de.lmu.ifi.dbs.elki.database.StaticArrayDatabase) DoubleVector(de.lmu.ifi.dbs.elki.data.DoubleVector) SpatialEntry(de.lmu.ifi.dbs.elki.index.tree.spatial.SpatialEntry) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 75 with NumberVector

use of de.lmu.ifi.dbs.elki.data.NumberVector in project elki by elki-project.

the class KNNJoinTest method testLinearScan.

@Test
public void testLinearScan() {
    Database db = AbstractSimpleAlgorithmTest.makeSimpleDatabase(dataset, shoulds);
    Relation<NumberVector> relation = db.getRelation(TypeUtil.NUMBER_VECTOR_FIELD);
    // Euclidean
    {
        DistanceQuery<NumberVector> dq = db.getDistanceQuery(relation, EuclideanDistanceFunction.STATIC);
        KNNQuery<NumberVector> knnq = QueryUtil.getLinearScanKNNQuery(dq);
        MeanVariance meansize = new MeanVariance();
        for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
            meansize.put(knnq.getKNNForDBID(iditer, 2).size());
        }
        org.junit.Assert.assertEquals("Euclidean mean 2NN", mean2nnEuclid, meansize.getMean(), 0.00001);
        org.junit.Assert.assertEquals("Euclidean variance 2NN", var2nnEuclid, meansize.getSampleVariance(), 0.00001);
    }
    // Manhattan
    {
        DistanceQuery<NumberVector> dq = db.getDistanceQuery(relation, ManhattanDistanceFunction.STATIC);
        KNNQuery<NumberVector> knnq = QueryUtil.getLinearScanKNNQuery(dq);
        MeanVariance meansize = new MeanVariance();
        for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
            meansize.put(knnq.getKNNForDBID(iditer, 2).size());
        }
        org.junit.Assert.assertEquals("Manhattan mean 2NN", mean2nnManhattan, meansize.getMean(), 0.00001);
        org.junit.Assert.assertEquals("Manhattan variance 2NN", var2nnManhattan, meansize.getSampleVariance(), 0.00001);
    }
}
Also used : MeanVariance(de.lmu.ifi.dbs.elki.math.MeanVariance) NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) Database(de.lmu.ifi.dbs.elki.database.Database) StaticArrayDatabase(de.lmu.ifi.dbs.elki.database.StaticArrayDatabase) DistanceQuery(de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery) KNNQuery(de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) Test(org.junit.Test)

Aggregations

NumberVector (de.lmu.ifi.dbs.elki.data.NumberVector)85 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)40 ArrayList (java.util.ArrayList)16 LongStatistic (de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic)9 DoubleVector (de.lmu.ifi.dbs.elki.data.DoubleVector)8 MultipleObjectsBundle (de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle)8 AbortException (de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)8 Database (de.lmu.ifi.dbs.elki.database.Database)7 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)7 DoubleStatistic (de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic)7 Random (java.util.Random)7 Test (org.junit.Test)7 VectorFieldTypeInformation (de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation)5 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)5 MeanVariance (de.lmu.ifi.dbs.elki.math.MeanVariance)5 EvaluationResult (de.lmu.ifi.dbs.elki.result.EvaluationResult)5 MeasurementGroup (de.lmu.ifi.dbs.elki.result.EvaluationResult.MeasurementGroup)5 List (java.util.List)5 SparseNumberVector (de.lmu.ifi.dbs.elki.data.SparseNumberVector)4 RandomProjectionFamily (de.lmu.ifi.dbs.elki.data.projection.random.RandomProjectionFamily)4