Search in sources :

Example 36 with DBIDArrayIter

use of de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter in project elki by elki-project.

the class AbstractAggarwalYuOutlier method buildRanges.

/**
 * Grid discretization of the data:<br />
 * Each attribute of data is divided into phi equi-depth ranges.<br />
 * Each range contains a fraction f=1/phi of the records.
 *
 * @param relation Relation to process
 * @return range map
 */
protected ArrayList<ArrayList<DBIDs>> buildRanges(Relation<V> relation) {
    final int dim = RelationUtil.dimensionality(relation);
    final int size = relation.size();
    final ArrayList<ArrayList<DBIDs>> ranges = new ArrayList<>();
    ArrayModifiableDBIDs ids = DBIDUtil.newArray(relation.getDBIDs());
    SortDBIDsBySingleDimension sorter = new SortDBIDsBySingleDimension(relation);
    // Split into cells
    final double part = size * 1.0 / phi;
    for (int d = 0; d < dim; d++) {
        sorter.setDimension(d);
        ids.sort(sorter);
        ArrayList<DBIDs> dimranges = new ArrayList<>(phi + 1);
        int start = 0;
        DBIDArrayIter iter = ids.iter();
        for (int r = 1; r <= phi; r++) {
            int end = (r < phi) ? (int) (part * r) : size;
            ArrayModifiableDBIDs currange = DBIDUtil.newArray(end - start);
            for (iter.seek(start); iter.getOffset() < end; iter.advance()) {
                currange.add(iter);
            }
            start = end;
            dimranges.add(currange);
        }
        ranges.add(dimranges);
    }
    return ranges;
}
Also used : SortDBIDsBySingleDimension(de.lmu.ifi.dbs.elki.data.VectorUtil.SortDBIDsBySingleDimension) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) HashSetModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs) ArrayList(java.util.ArrayList) DBIDArrayIter(de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter)

Example 37 with DBIDArrayIter

use of de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter in project elki by elki-project.

the class SelectionTableWindow method handleDelete.

/**
 * Handle delete. <br>
 * Delete the marked objects in the database.
 */
protected void handleDelete() {
    ModifiableDBIDs todel = DBIDUtil.newHashSet();
    ModifiableDBIDs remain = DBIDUtil.newHashSet(dbids);
    DBIDArrayIter it = dbids.iter();
    for (int row : table.getSelectedRows()) {
        it.seek(row);
        todel.add(it);
        remain.remove(it);
    }
    // Unselect first ...
    context.setSelection(new DBIDSelection(remain));
    // Now delete them.
    for (DBIDIter iter = todel.iter(); iter.valid(); iter.advance()) {
        database.delete(iter);
    }
}
Also used : ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) DBIDArrayIter(de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter) DBIDSelection(de.lmu.ifi.dbs.elki.result.DBIDSelection) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 38 with DBIDArrayIter

use of de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter in project elki by elki-project.

the class LuceneDistanceKNNQuery method getKNNForDBID.

@Override
public KNNList getKNNForDBID(DBIDRef id, int k) {
    try {
        Query query = mlt.like(range.getOffset(id));
        TopDocs topDocs = is.search(query, k);
        int rk = topDocs.scoreDocs.length;
        DoubleIntegerDBIDKNNList res = new DoubleIntegerDBIDKNNList(k, rk);
        DBIDArrayIter it = range.iter();
        for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
            double dist = (scoreDoc.score > 0.) ? (1. / scoreDoc.score) : Double.POSITIVE_INFINITY;
            it.seek(scoreDoc.doc);
            res.add(dist, it);
        }
        return res;
    } catch (IOException e) {
        throw new AbortException("I/O error in lucene.", e);
    }
}
Also used : TopDocs(org.apache.lucene.search.TopDocs) Query(org.apache.lucene.search.Query) AbstractDistanceKNNQuery(de.lmu.ifi.dbs.elki.database.query.knn.AbstractDistanceKNNQuery) DistanceQuery(de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery) DBIDArrayIter(de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter) IOException(java.io.IOException) DoubleIntegerDBIDKNNList(de.lmu.ifi.dbs.elki.database.ids.integer.DoubleIntegerDBIDKNNList) ScoreDoc(org.apache.lucene.search.ScoreDoc) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Example 39 with DBIDArrayIter

use of de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter in project elki by elki-project.

the class SameSizeKMeansAlgorithm method refineResult.

/**
 * Perform k-means style iterations to improve the clustering result.
 *
 * @param relation Data relation
 * @param means Means list
 * @param clusters Cluster list
 * @param metas Metadata storage
 * @param tids DBIDs array
 * @return final means
 */
protected double[][] refineResult(Relation<V> relation, double[][] means, List<ModifiableDBIDs> clusters, final WritableDataStore<Meta> metas, ArrayModifiableDBIDs tids) {
    NumberVectorDistanceFunction<? super V> df = getDistanceFunction();
    // Our desired cluster size:
    // rounded down
    final int minsize = tids.size() / k;
    // rounded up
    final int maxsize = (tids.size() + k - 1) / k;
    // Comparator: sort by largest gain by transfer
    final Comparator<DBIDRef> comp = new Comparator<DBIDRef>() {

        @Override
        public int compare(DBIDRef o1, DBIDRef o2) {
            Meta c1 = metas.get(o1), c2 = metas.get(o2);
            return Double.compare(c1.priority(), c2.priority());
        }
    };
    // List for sorting cluster preferences
    final int[] preferences = MathUtil.sequence(0, k);
    // Comparator for this list.
    final PreferenceComparator pcomp = new PreferenceComparator();
    // Initialize transfer lists:
    ArrayModifiableDBIDs[] transfers = new ArrayModifiableDBIDs[k];
    for (int i = 0; i < k; i++) {
        transfers[i] = DBIDUtil.newArray();
    }
    DBIDArrayIter id = tids.iter();
    for (int iter = 0; maxiter <= 0 || iter < maxiter; iter++) {
        updateDistances(relation, means, metas, df);
        tids.sort(comp);
        // Track if anything has changed
        int active = 0;
        for (id.seek(0); id.valid(); id.advance()) {
            Meta c = metas.get(id);
            IntegerArrayQuickSort.sort(preferences, pcomp.select(c));
            ModifiableDBIDs source = clusters.get(c.primary);
            assert (source.contains(id));
            tloop: for (int i : preferences) {
                if (i == c.primary) {
                    // Already assigned here
                    continue;
                }
                ModifiableDBIDs dest = clusters.get(i);
                // Can we pair this transfer?
                final double gain = c.gain(i);
                for (DBIDMIter other = transfers[i].iter(); other.valid(); other.advance()) {
                    Meta c2 = metas.get(other);
                    if (gain + c2.gain(c.primary) > 0) {
                        transfer(metas, c2, dest, source, other, c.primary);
                        transfer(metas, c, source, dest, id, i);
                        active += 2;
                        // last, as this invalidates the reference!
                        other.remove();
                        // We are assigned here now.
                        source = dest;
                        // Can try another transfer, with next cluster.
                        continue tloop;
                    }
                }
                // If cluster sizes allow, move a single object.
                if (gain > 0 && (dest.size() < maxsize && source.size() > minsize)) {
                    transfer(metas, c, source, dest, id, i);
                    active += 1;
                    // We are assigned here now.
                    source = dest;
                    continue tloop;
                }
            }
            // transfer list.
            if (c.primary != preferences[0] && c.dists[c.primary] > c.dists[preferences[0]]) {
                transfers[c.primary].add(id);
            }
        }
        // TODO: try to get more transfers out of the transfer lists done by
        // considering more than one object?
        int pending = 0;
        // Clear transfer lists for next iteration.
        for (int i = 0; i < k; i++) {
            pending += transfers[i].size();
            transfers[i].clear();
        }
        if (LOG.isDebuggingFine()) {
            LOG.debugFine("Iteration #" + iter + ": performed " + active + " transfers skipped " + pending);
        }
        if (active <= 0) {
            break;
        }
        // Recompute means after reassignment
        means = means(clusters, means, relation);
    }
    return means;
}
Also used : DBIDMIter(de.lmu.ifi.dbs.elki.database.ids.DBIDMIter) DBIDArrayIter(de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter) IntegerComparator(de.lmu.ifi.dbs.elki.utilities.datastructures.arrays.IntegerComparator) Comparator(java.util.Comparator) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) DBIDRef(de.lmu.ifi.dbs.elki.database.ids.DBIDRef) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)

Example 40 with DBIDArrayIter

use of de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter in project elki by elki-project.

the class SimilarityBasedInitializationWithMedian method getSimilarityMatrix.

@Override
public double[][] getSimilarityMatrix(Database db, Relation<O> relation, ArrayDBIDs ids) {
    final int size = ids.size();
    SimilarityQuery<O> sq = db.getSimilarityQuery(relation, similarity);
    double[][] mat = new double[size][size];
    double[] flat = new double[(size * (size - 1)) >> 1];
    DBIDArrayIter i1 = ids.iter(), i2 = ids.iter();
    // Compute self-similarities first, for centering:
    for (int i = 0; i < size; i++, i1.advance()) {
        mat[i][i] = sq.similarity(i1, i1) * .5;
    }
    i1.seek(0);
    for (int i = 0, j = 0; i < size; i++, i1.advance()) {
        // Probably faster access.
        final double[] mati = mat[i];
        i2.seek(i + 1);
        for (int k = i + 1; k < size; k++, i2.advance()) {
            mati[k] = sq.similarity(i1, i2) - mati[i] - mat[k][k];
            // symmetry.
            mat[k][i] = mati[k];
            flat[j] = mati[k];
            j++;
        }
    }
    double median = QuickSelect.quantile(flat, quantile);
    // On the diagonal, we place the median
    for (int i = 0; i < size; i++) {
        mat[i][i] = median;
    }
    return mat;
}
Also used : DBIDArrayIter(de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter)

Aggregations

DBIDArrayIter (de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter)64 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)17 ArrayModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs)15 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)15 ArrayDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs)14 DBIDRange (de.lmu.ifi.dbs.elki.database.ids.DBIDRange)13 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)12 AbortException (de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)9 Test (org.junit.Test)9 ModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)8 WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)6 MeanVariance (de.lmu.ifi.dbs.elki.math.MeanVariance)5 IOException (java.io.IOException)5 Clustering (de.lmu.ifi.dbs.elki.data.Clustering)4 DBIDVar (de.lmu.ifi.dbs.elki.database.ids.DBIDVar)4 DoubleRelation (de.lmu.ifi.dbs.elki.database.relation.DoubleRelation)4 Cluster (de.lmu.ifi.dbs.elki.data.Cluster)3 DoubleVector (de.lmu.ifi.dbs.elki.data.DoubleVector)3 SortDBIDsBySingleDimension (de.lmu.ifi.dbs.elki.data.VectorUtil.SortDBIDsBySingleDimension)3 ClusterModel (de.lmu.ifi.dbs.elki.data.model.ClusterModel)3