Search in sources :

Example 31 with DBIDArrayIter

use of de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter in project elki by elki-project.

the class GaussianAffinityMatrixBuilder method buildDistanceMatrix.

/**
 * Build a distance matrix of squared distances.
 *
 * @param ids DBIDs
 * @param dq Distance query
 * @return Distance matrix
 */
protected double[][] buildDistanceMatrix(ArrayDBIDs ids, DistanceQuery<?> dq) {
    final int size = ids.size();
    double[][] dmat = new double[size][size];
    final boolean square = !dq.getDistanceFunction().isSquared();
    FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Computing distance matrix", (size * (size - 1)) >>> 1, LOG) : null;
    Duration timer = LOG.isStatistics() ? LOG.newDuration(this.getClass().getName() + ".runtime.distancematrix").begin() : null;
    DBIDArrayIter ix = ids.iter(), iy = ids.iter();
    for (ix.seek(0); ix.valid(); ix.advance()) {
        double[] dmat_x = dmat[ix.getOffset()];
        for (iy.seek(ix.getOffset() + 1); iy.valid(); iy.advance()) {
            final double dist = dq.distance(ix, iy);
            dmat[iy.getOffset()][ix.getOffset()] = dmat_x[iy.getOffset()] = square ? (dist * dist) : dist;
        }
        if (prog != null) {
            int row = ix.getOffset() + 1;
            prog.setProcessed(row * size - ((row * (row + 1)) >>> 1), LOG);
        }
    }
    LOG.ensureCompleted(prog);
    if (timer != null) {
        LOG.statistics(timer.end());
    }
    return dmat;
}
Also used : FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) Duration(de.lmu.ifi.dbs.elki.logging.statistics.Duration) DBIDArrayIter(de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter)

Example 32 with DBIDArrayIter

use of de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter in project elki by elki-project.

the class SNE method run.

public Relation<DoubleVector> run(Relation<O> relation) {
    AffinityMatrix pij = affinity.computeAffinityMatrix(relation, 1.);
    // Create initial solution.
    final int size = pij.size();
    double[][] sol = randomInitialSolution(size, dim, random.getSingleThreadedRandom());
    projectedDistances.setLong(0L);
    optimizeSNE(pij, sol);
    LOG.statistics(projectedDistances);
    // Remove the original (unprojected) data unless configured otherwise.
    removePreviousRelation(relation);
    // Transform into output data format.
    DBIDs ids = relation.getDBIDs();
    WritableDataStore<DoubleVector> proj = DataStoreFactory.FACTORY.makeStorage(ids, DataStoreFactory.HINT_DB | DataStoreFactory.HINT_SORTED, DoubleVector.class);
    VectorFieldTypeInformation<DoubleVector> otype = new VectorFieldTypeInformation<>(DoubleVector.FACTORY, dim);
    for (DBIDArrayIter it = pij.iterDBIDs(); it.valid(); it.advance()) {
        proj.put(it, DoubleVector.wrap(sol[it.getOffset()]));
    }
    return new MaterializedRelation<>("SNE", "SNE", otype, proj, ids);
}
Also used : VectorFieldTypeInformation(de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) DBIDArrayIter(de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter) DoubleVector(de.lmu.ifi.dbs.elki.data.DoubleVector) MaterializedRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation)

Example 33 with DBIDArrayIter

use of de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter in project elki by elki-project.

the class DistanceQuantileSampler method run.

/**
 * Run the distance quantile sampler.
 *
 * @param database
 * @param rel
 * @return Distances sample
 */
public CollectionResult<double[]> run(Database database, Relation<O> rel) {
    DistanceQuery<O> dq = rel.getDistanceQuery(getDistanceFunction());
    int size = rel.size();
    long pairs = (size * (long) size) >> 1;
    final long ssize = sampling <= 1 ? (long) Math.ceil(sampling * pairs) : (long) sampling;
    if (ssize > Integer.MAX_VALUE) {
        throw new AbortException("Sampling size too large.");
    }
    final int qsize = quantile <= 0 ? 1 : (int) Math.ceil(quantile * ssize);
    DoubleMaxHeap heap = new DoubleMaxHeap(qsize);
    ArrayDBIDs ids = DBIDUtil.ensureArray(rel.getDBIDs());
    DBIDArrayIter i1 = ids.iter(), i2 = ids.iter();
    Random r = rand.getSingleThreadedRandom();
    FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Sampling", (int) ssize, LOG) : null;
    for (long i = 0; i < ssize; i++) {
        int x = r.nextInt(size - 1) + 1, y = r.nextInt(x);
        double dist = dq.distance(i1.seek(x), i2.seek(y));
        // Skip NaN, and/or zeros.
        if (dist != dist || (nozeros && dist < Double.MIN_NORMAL)) {
            continue;
        }
        heap.add(dist, qsize);
        LOG.incrementProcessed(prog);
    }
    LOG.statistics(new DoubleStatistic(PREFIX + ".quantile", quantile));
    LOG.statistics(new LongStatistic(PREFIX + ".samplesize", ssize));
    LOG.statistics(new DoubleStatistic(PREFIX + ".distance", heap.peek()));
    LOG.ensureCompleted(prog);
    Collection<String> header = Arrays.asList(new String[] { "Distance" });
    Collection<double[]> data = Arrays.asList(new double[][] { new double[] { heap.peek() } });
    return new CollectionResult<double[]>("Distances sample", "distance-sample", data, header);
}
Also used : FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) DBIDArrayIter(de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter) DoubleMaxHeap(de.lmu.ifi.dbs.elki.utilities.datastructures.heap.DoubleMaxHeap) DoubleStatistic(de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic) CollectionResult(de.lmu.ifi.dbs.elki.result.CollectionResult) Random(java.util.Random) LongStatistic(de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic) ArrayDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Example 34 with DBIDArrayIter

use of de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter in project elki by elki-project.

the class EvaluateRankingQuality method run.

@Override
public HistogramResult run(Database database) {
    final Relation<V> relation = database.getRelation(getInputTypeRestriction()[0]);
    final DistanceQuery<V> distQuery = database.getDistanceQuery(relation, getDistanceFunction());
    final KNNQuery<V> knnQuery = database.getKNNQuery(distQuery, relation.size());
    if (LOG.isVerbose()) {
        LOG.verbose("Preprocessing clusters...");
    }
    // Cluster by labels
    Collection<Cluster<Model>> split = (new ByLabelOrAllInOneClustering()).run(database).getAllClusters();
    // Compute cluster averages and covariance matrix
    HashMap<Cluster<?>, double[]> averages = new HashMap<>(split.size());
    HashMap<Cluster<?>, double[][]> covmats = new HashMap<>(split.size());
    for (Cluster<?> clus : split) {
        CovarianceMatrix covmat = CovarianceMatrix.make(relation, clus.getIDs());
        averages.put(clus, covmat.getMeanVector());
        covmats.put(clus, covmat.destroyToPopulationMatrix());
    }
    MeanVarianceStaticHistogram hist = new MeanVarianceStaticHistogram(numbins, 0.0, 1.0);
    if (LOG.isVerbose()) {
        LOG.verbose("Processing points...");
    }
    FiniteProgress rocloop = LOG.isVerbose() ? new FiniteProgress("Computing ROC AUC values", relation.size(), LOG) : null;
    ROCEvaluation roc = new ROCEvaluation();
    // sort neighbors
    for (Cluster<?> clus : split) {
        ModifiableDoubleDBIDList cmem = DBIDUtil.newDistanceDBIDList(clus.size());
        double[] av = averages.get(clus);
        double[][] covm = covmats.get(clus);
        for (DBIDIter iter = clus.getIDs().iter(); iter.valid(); iter.advance()) {
            double d = mahalanobisDistance(covm, relation.get(iter).toArray(), av);
            cmem.add(d, iter);
        }
        cmem.sort();
        for (DBIDArrayIter it = cmem.iter(); it.valid(); it.advance()) {
            KNNList knn = knnQuery.getKNNForDBID(it, relation.size());
            double result = EvaluateClustering.evaluateRanking(roc, clus, knn);
            hist.put(((double) it.getOffset()) / clus.size(), result);
            LOG.incrementProcessed(rocloop);
        }
    }
    LOG.ensureCompleted(rocloop);
    // Collections.sort(results);
    // Transform Histogram into a Double Vector array.
    Collection<double[]> res = new ArrayList<>(relation.size());
    for (ObjHistogram.Iter<MeanVariance> iter = hist.iter(); iter.valid(); iter.advance()) {
        res.add(new double[] { iter.getCenter(), iter.getValue().getCount(), iter.getValue().getMean(), iter.getValue().getSampleVariance() });
    }
    return new HistogramResult("Ranking Quality Histogram", "ranking-histogram", res);
}
Also used : ObjHistogram(de.lmu.ifi.dbs.elki.utilities.datastructures.histogram.ObjHistogram) HistogramResult(de.lmu.ifi.dbs.elki.result.HistogramResult) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) ROCEvaluation(de.lmu.ifi.dbs.elki.evaluation.scores.ROCEvaluation) ModifiableDoubleDBIDList(de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) MeanVarianceStaticHistogram(de.lmu.ifi.dbs.elki.utilities.datastructures.histogram.MeanVarianceStaticHistogram) Cluster(de.lmu.ifi.dbs.elki.data.Cluster) ByLabelOrAllInOneClustering(de.lmu.ifi.dbs.elki.algorithm.clustering.trivial.ByLabelOrAllInOneClustering) DBIDArrayIter(de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter) CovarianceMatrix(de.lmu.ifi.dbs.elki.math.linearalgebra.CovarianceMatrix) MeanVariance(de.lmu.ifi.dbs.elki.math.MeanVariance) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList)

Example 35 with DBIDArrayIter

use of de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter in project elki by elki-project.

the class HiCS method calculateContrast.

/**
 * Calculates the actual contrast of a given subspace.
 *
 * @param relation Relation to process
 * @param subspace Subspace
 * @param subspaceIndex Subspace indexes
 */
private void calculateContrast(Relation<? extends NumberVector> relation, HiCSSubspace subspace, ArrayList<ArrayDBIDs> subspaceIndex, Random random) {
    final int card = subspace.cardinality();
    final double alpha1 = FastMath.pow(alpha, (1.0 / card));
    final int windowsize = (int) (relation.size() * alpha1);
    final FiniteProgress prog = LOG.isDebugging() ? new FiniteProgress("Monte-Carlo iterations", m, LOG) : null;
    int retries = 0;
    double deviationSum = 0.0;
    for (int i = 0; i < m; i++) {
        // Choose a random set bit.
        int chosen = -1;
        for (int tmp = random.nextInt(card); tmp >= 0; tmp--) {
            chosen = subspace.nextSetBit(chosen + 1);
        }
        // initialize sample
        DBIDs conditionalSample = relation.getDBIDs();
        for (int j = subspace.nextSetBit(0); j >= 0; j = subspace.nextSetBit(j + 1)) {
            if (j == chosen) {
                continue;
            }
            ArrayDBIDs sortedIndices = subspaceIndex.get(j);
            ArrayModifiableDBIDs indexBlock = DBIDUtil.newArray(windowsize);
            // initialize index block
            DBIDArrayIter iter = sortedIndices.iter();
            iter.seek(random.nextInt(relation.size() - windowsize));
            for (int k = 0; k < windowsize; k++, iter.advance()) {
                // select index block
                indexBlock.add(iter);
            }
            conditionalSample = DBIDUtil.intersection(conditionalSample, indexBlock);
        }
        if (conditionalSample.size() < 10) {
            retries++;
            if (LOG.isDebugging()) {
                LOG.debug("Sample size very small. Retry no. " + retries);
            }
            if (retries >= MAX_RETRIES) {
                LOG.warning("Too many retries, for small samples: " + retries);
            } else {
                i--;
                continue;
            }
        }
        // Project conditional set
        double[] sampleValues = new double[conditionalSample.size()];
        {
            int l = 0;
            for (DBIDIter iter = conditionalSample.iter(); iter.valid(); iter.advance()) {
                sampleValues[l] = relation.get(iter).doubleValue(chosen);
                l++;
            }
        }
        // Project full set
        double[] fullValues = new double[relation.size()];
        {
            int l = 0;
            for (DBIDIter iter = subspaceIndex.get(chosen).iter(); iter.valid(); iter.advance()) {
                fullValues[l] = relation.get(iter).doubleValue(chosen);
                l++;
            }
        }
        double contrast = statTest.deviation(fullValues, sampleValues);
        if (Double.isNaN(contrast)) {
            i--;
            LOG.warning("Contrast was NaN");
            continue;
        }
        deviationSum += contrast;
        LOG.incrementProcessed(prog);
    }
    LOG.ensureCompleted(prog);
    subspace.contrast = deviationSum / m;
}
Also used : ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) ArrayDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) ArrayDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs) DBIDArrayIter(de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Aggregations

DBIDArrayIter (de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter)64 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)17 ArrayModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs)15 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)15 ArrayDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs)14 DBIDRange (de.lmu.ifi.dbs.elki.database.ids.DBIDRange)13 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)12 AbortException (de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)9 Test (org.junit.Test)9 ModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)8 WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)6 MeanVariance (de.lmu.ifi.dbs.elki.math.MeanVariance)5 IOException (java.io.IOException)5 Clustering (de.lmu.ifi.dbs.elki.data.Clustering)4 DBIDVar (de.lmu.ifi.dbs.elki.database.ids.DBIDVar)4 DoubleRelation (de.lmu.ifi.dbs.elki.database.relation.DoubleRelation)4 Cluster (de.lmu.ifi.dbs.elki.data.Cluster)3 DoubleVector (de.lmu.ifi.dbs.elki.data.DoubleVector)3 SortDBIDsBySingleDimension (de.lmu.ifi.dbs.elki.data.VectorUtil.SortDBIDsBySingleDimension)3 ClusterModel (de.lmu.ifi.dbs.elki.data.model.ClusterModel)3