Search in sources :

Example 56 with ModifiableDBIDs

use of de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs in project elki by elki-project.

the class JudgeOutlierScores method processNewResult.

@Override
public void processNewResult(ResultHierarchy hier, Result result) {
    Database db = ResultUtil.findDatabase(hier);
    List<OutlierResult> ors = ResultUtil.filterResults(hier, OutlierResult.class);
    if (ors == null || ors.isEmpty()) {
        // logger.warning("No results found for "+JudgeOutlierScores.class.getSimpleName());
        return;
    }
    ModifiableDBIDs ids = DBIDUtil.newHashSet(ors.iterator().next().getScores().getDBIDs());
    DBIDs outlierIds = DatabaseUtil.getObjectsByLabelMatch(db, positiveClassName);
    ids.removeDBIDs(outlierIds);
    for (OutlierResult or : ors) {
        db.getHierarchy().add(or, computeScore(ids, outlierIds, or));
    }
}
Also used : DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) Database(de.lmu.ifi.dbs.elki.database.Database) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)

Example 57 with ModifiableDBIDs

use of de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs in project elki by elki-project.

the class CTLuRandomWalkEC method run.

/**
 * Run the algorithm.
 *
 * @param spatial Spatial neighborhood relation
 * @param relation Attribute value relation
 * @return Outlier result
 */
public OutlierResult run(Relation<P> spatial, Relation<? extends NumberVector> relation) {
    DistanceQuery<P> distFunc = getDistanceFunction().instantiate(spatial);
    WritableDataStore<double[]> similarityVectors = DataStoreUtil.makeStorage(spatial.getDBIDs(), DataStoreFactory.HINT_TEMP, double[].class);
    WritableDataStore<DBIDs> neighbors = DataStoreUtil.makeStorage(spatial.getDBIDs(), DataStoreFactory.HINT_TEMP, DBIDs.class);
    // Make a static IDs array for matrix column indexing
    ArrayDBIDs ids = DBIDUtil.ensureArray(relation.getDBIDs());
    // construct the relation Matrix of the ec-graph
    double[][] E = new double[ids.size()][ids.size()];
    KNNHeap heap = DBIDUtil.newHeap(k);
    {
        int i = 0;
        for (DBIDIter id = ids.iter(); id.valid(); id.advance(), i++) {
            final double val = relation.get(id).doubleValue(0);
            assert (heap.size() == 0);
            int j = 0;
            for (DBIDIter n = ids.iter(); n.valid(); n.advance(), j++) {
                if (i == j) {
                    continue;
                }
                final double e;
                final double distance = distFunc.distance(id, n);
                heap.insert(distance, n);
                if (distance == 0) {
                    LOG.warning("Zero distances are not supported - skipping: " + DBIDUtil.toString(id) + " " + DBIDUtil.toString(n));
                    e = 0;
                } else {
                    double diff = Math.abs(val - relation.get(n).doubleValue(0));
                    double exp = FastMath.exp(FastMath.pow(diff, alpha));
                    // Implementation note: not inverting exp worked a lot better.
                    // Therefore we diverge from the article here.
                    e = exp / distance;
                }
                E[j][i] = e;
            }
            // Convert kNN Heap into DBID array
            ModifiableDBIDs nids = DBIDUtil.newArray(heap.size());
            while (heap.size() > 0) {
                nids.add(heap.poll());
            }
            neighbors.put(id, nids);
        }
    }
    // Also do the -c multiplication in this process.
    for (int i = 0; i < E[0].length; i++) {
        double sum = 0.0;
        for (int j = 0; j < E.length; j++) {
            sum += E[j][i];
        }
        if (sum == 0) {
            sum = 1.0;
        }
        for (int j = 0; j < E.length; j++) {
            E[j][i] = -c * E[j][i] / sum;
        }
    }
    // Add identity matrix. The diagonal should still be 0s, so this is trivial.
    assert (E.length == E[0].length);
    for (int col = 0; col < E[0].length; col++) {
        assert (E[col][col] == 0.0);
        E[col][col] = 1.0;
    }
    E = timesEquals(inverse(E), 1 - c);
    // Split the matrix into columns
    {
        int i = 0;
        for (DBIDIter id = ids.iter(); id.valid(); id.advance(), i++) {
            // Note: matrix times ith unit vector = ith column
            double[] sim = getCol(E, i);
            similarityVectors.put(id, sim);
        }
    }
    E = null;
    // compute the relevance scores between specified Object and its neighbors
    DoubleMinMax minmax = new DoubleMinMax();
    WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(spatial.getDBIDs(), DataStoreFactory.HINT_STATIC);
    for (DBIDIter id = ids.iter(); id.valid(); id.advance()) {
        double gmean = 1.0;
        int cnt = 0;
        for (DBIDIter iter = neighbors.get(id).iter(); iter.valid(); iter.advance()) {
            if (DBIDUtil.equal(id, iter)) {
                continue;
            }
            double sim = VMath.angle(similarityVectors.get(id), similarityVectors.get(iter));
            gmean *= sim;
            cnt++;
        }
        final double score = FastMath.pow(gmean, 1.0 / cnt);
        minmax.put(score);
        scores.putDouble(id, score);
    }
    DoubleRelation scoreResult = new MaterializedDoubleRelation("randomwalkec", "RandomWalkEC", scores, relation.getDBIDs());
    OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 0.0);
    return new OutlierResult(scoreMeta, scoreResult);
}
Also used : WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) ArrayDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) KNNHeap(de.lmu.ifi.dbs.elki.database.ids.KNNHeap) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) ArrayDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)

Example 58 with ModifiableDBIDs

use of de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs in project elki by elki-project.

the class SelectionTableWindow method handleDelete.

/**
 * Handle delete. <br>
 * Delete the marked objects in the database.
 */
protected void handleDelete() {
    ModifiableDBIDs todel = DBIDUtil.newHashSet();
    ModifiableDBIDs remain = DBIDUtil.newHashSet(dbids);
    DBIDArrayIter it = dbids.iter();
    for (int row : table.getSelectedRows()) {
        it.seek(row);
        todel.add(it);
        remain.remove(it);
    }
    // Unselect first ...
    context.setSelection(new DBIDSelection(remain));
    // Now delete them.
    for (DBIDIter iter = todel.iter(); iter.valid(); iter.advance()) {
        database.delete(iter);
    }
}
Also used : ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) DBIDArrayIter(de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter) DBIDSelection(de.lmu.ifi.dbs.elki.result.DBIDSelection) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 59 with ModifiableDBIDs

use of de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs in project elki by elki-project.

the class SameSizeKMeansAlgorithm method refineResult.

/**
 * Perform k-means style iterations to improve the clustering result.
 *
 * @param relation Data relation
 * @param means Means list
 * @param clusters Cluster list
 * @param metas Metadata storage
 * @param tids DBIDs array
 * @return final means
 */
protected double[][] refineResult(Relation<V> relation, double[][] means, List<ModifiableDBIDs> clusters, final WritableDataStore<Meta> metas, ArrayModifiableDBIDs tids) {
    NumberVectorDistanceFunction<? super V> df = getDistanceFunction();
    // Our desired cluster size:
    // rounded down
    final int minsize = tids.size() / k;
    // rounded up
    final int maxsize = (tids.size() + k - 1) / k;
    // Comparator: sort by largest gain by transfer
    final Comparator<DBIDRef> comp = new Comparator<DBIDRef>() {

        @Override
        public int compare(DBIDRef o1, DBIDRef o2) {
            Meta c1 = metas.get(o1), c2 = metas.get(o2);
            return Double.compare(c1.priority(), c2.priority());
        }
    };
    // List for sorting cluster preferences
    final int[] preferences = MathUtil.sequence(0, k);
    // Comparator for this list.
    final PreferenceComparator pcomp = new PreferenceComparator();
    // Initialize transfer lists:
    ArrayModifiableDBIDs[] transfers = new ArrayModifiableDBIDs[k];
    for (int i = 0; i < k; i++) {
        transfers[i] = DBIDUtil.newArray();
    }
    DBIDArrayIter id = tids.iter();
    for (int iter = 0; maxiter <= 0 || iter < maxiter; iter++) {
        updateDistances(relation, means, metas, df);
        tids.sort(comp);
        // Track if anything has changed
        int active = 0;
        for (id.seek(0); id.valid(); id.advance()) {
            Meta c = metas.get(id);
            IntegerArrayQuickSort.sort(preferences, pcomp.select(c));
            ModifiableDBIDs source = clusters.get(c.primary);
            assert (source.contains(id));
            tloop: for (int i : preferences) {
                if (i == c.primary) {
                    // Already assigned here
                    continue;
                }
                ModifiableDBIDs dest = clusters.get(i);
                // Can we pair this transfer?
                final double gain = c.gain(i);
                for (DBIDMIter other = transfers[i].iter(); other.valid(); other.advance()) {
                    Meta c2 = metas.get(other);
                    if (gain + c2.gain(c.primary) > 0) {
                        transfer(metas, c2, dest, source, other, c.primary);
                        transfer(metas, c, source, dest, id, i);
                        active += 2;
                        // last, as this invalidates the reference!
                        other.remove();
                        // We are assigned here now.
                        source = dest;
                        // Can try another transfer, with next cluster.
                        continue tloop;
                    }
                }
                // If cluster sizes allow, move a single object.
                if (gain > 0 && (dest.size() < maxsize && source.size() > minsize)) {
                    transfer(metas, c, source, dest, id, i);
                    active += 1;
                    // We are assigned here now.
                    source = dest;
                    continue tloop;
                }
            }
            // transfer list.
            if (c.primary != preferences[0] && c.dists[c.primary] > c.dists[preferences[0]]) {
                transfers[c.primary].add(id);
            }
        }
        // TODO: try to get more transfers out of the transfer lists done by
        // considering more than one object?
        int pending = 0;
        // Clear transfer lists for next iteration.
        for (int i = 0; i < k; i++) {
            pending += transfers[i].size();
            transfers[i].clear();
        }
        if (LOG.isDebuggingFine()) {
            LOG.debugFine("Iteration #" + iter + ": performed " + active + " transfers skipped " + pending);
        }
        if (active <= 0) {
            break;
        }
        // Recompute means after reassignment
        means = means(clusters, means, relation);
    }
    return means;
}
Also used : DBIDMIter(de.lmu.ifi.dbs.elki.database.ids.DBIDMIter) DBIDArrayIter(de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter) IntegerComparator(de.lmu.ifi.dbs.elki.utilities.datastructures.arrays.IntegerComparator) Comparator(java.util.Comparator) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) DBIDRef(de.lmu.ifi.dbs.elki.database.ids.DBIDRef) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)

Example 60 with ModifiableDBIDs

use of de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs in project elki by elki-project.

the class SameSizeKMeansAlgorithm method run.

/**
 * Run k-means with cluster size constraints.
 *
 * @param database Database
 * @param relation relation to use
 * @return result
 */
@Override
public Clustering<MeanModel> run(Database database, Relation<V> relation) {
    // Database objects to process
    final DBIDs ids = relation.getDBIDs();
    // Choose initial means
    double[][] means = initializer.chooseInitialMeans(database, relation, k, getDistanceFunction());
    // Setup cluster assignment store
    List<ModifiableDBIDs> clusters = new ArrayList<>();
    for (int i = 0; i < k; i++) {
        clusters.add(DBIDUtil.newHashSet(relation.size() / k + 2));
    }
    // Meta data storage
    final WritableDataStore<Meta> metas = initializeMeta(relation, means);
    // Perform the initial assignment
    ArrayModifiableDBIDs tids = initialAssignment(clusters, metas, ids);
    // Recompute the means after the initial assignment
    means = means(clusters, means, relation);
    // Refine the result via k-means like iterations
    means = refineResult(relation, means, clusters, metas, tids);
    // Wrap result
    Clustering<MeanModel> result = new Clustering<>("k-Means Samesize Clustering", "kmeans-samesize-clustering");
    for (int i = 0; i < clusters.size(); i++) {
        result.addToplevelCluster(new Cluster<>(clusters.get(i), new MeanModel(means[i])));
    }
    return result;
}
Also used : ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) ArrayList(java.util.ArrayList) MeanModel(de.lmu.ifi.dbs.elki.data.model.MeanModel) Clustering(de.lmu.ifi.dbs.elki.data.Clustering) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)

Aggregations

ModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)80 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)44 Clustering (de.lmu.ifi.dbs.elki.data.Clustering)30 ArrayList (java.util.ArrayList)30 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)28 ArrayModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs)18 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)15 WritableIntegerDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore)14 IndefiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress)14 DoubleStatistic (de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic)12 LongStatistic (de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic)12 Model (de.lmu.ifi.dbs.elki.data.model.Model)11 DBID (de.lmu.ifi.dbs.elki.database.ids.DBID)11 KMeansModel (de.lmu.ifi.dbs.elki.data.model.KMeansModel)10 StringStatistic (de.lmu.ifi.dbs.elki.logging.statistics.StringStatistic)10 Cluster (de.lmu.ifi.dbs.elki.data.Cluster)9 WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)9 HashSetModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs)8 KNNList (de.lmu.ifi.dbs.elki.database.ids.KNNList)8 ClusterModel (de.lmu.ifi.dbs.elki.data.model.ClusterModel)7