Search in sources :

Example 6 with DBIDVar

use of de.lmu.ifi.dbs.elki.database.ids.DBIDVar in project elki by elki-project.

the class CTLuGLSBackwardSearchAlgorithm method singleIteration.

/**
 * Run a single iteration of the GLS-SOD modeling step
 *
 * @param relationx Geo relation
 * @param relationy Attribute relation
 * @return Top outlier and associated score
 */
private Pair<DBIDVar, Double> singleIteration(Relation<V> relationx, Relation<? extends NumberVector> relationy) {
    final int dim = RelationUtil.dimensionality(relationx);
    final int dimy = RelationUtil.dimensionality(relationy);
    assert (dim == 2);
    KNNQuery<V> knnQuery = QueryUtil.getKNNQuery(relationx, getDistanceFunction(), k + 1);
    // We need stable indexed DBIDs
    ArrayModifiableDBIDs ids = DBIDUtil.newArray(relationx.getDBIDs());
    // Sort, so we can do a binary search below.
    ids.sort();
    // init F,X,Z
    double[][] X = new double[ids.size()][6];
    double[][] F = new double[ids.size()][ids.size()];
    double[][] Y = new double[ids.size()][dimy];
    {
        int i = 0;
        for (DBIDIter id = ids.iter(); id.valid(); id.advance(), i++) {
            // Fill the data matrix
            {
                V vec = relationx.get(id);
                double la = vec.doubleValue(0);
                double lo = vec.doubleValue(1);
                X[i][0] = 1.0;
                X[i][1] = la;
                X[i][2] = lo;
                X[i][3] = la * lo;
                X[i][4] = la * la;
                X[i][5] = lo * lo;
            }
            {
                final NumberVector vecy = relationy.get(id);
                for (int d = 0; d < dimy; d++) {
                    double idy = vecy.doubleValue(d);
                    Y[i][d] = idy;
                }
            }
            // Fill the neighborhood matrix F:
            {
                KNNList neighbors = knnQuery.getKNNForDBID(id, k + 1);
                ModifiableDBIDs neighborhood = DBIDUtil.newArray(neighbors.size());
                for (DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
                    if (DBIDUtil.equal(id, neighbor)) {
                        continue;
                    }
                    neighborhood.add(neighbor);
                }
                // Weight object itself positively.
                F[i][i] = 1.0;
                final int nweight = -1 / neighborhood.size();
                // unfortunately.
                for (DBIDIter iter = neighborhood.iter(); iter.valid(); iter.advance()) {
                    int pos = ids.binarySearch(iter);
                    assert (pos >= 0);
                    F[pos][i] = nweight;
                }
            }
        }
    }
    // Estimate the parameter beta
    // Common term that we can save recomputing.
    double[][] common = times(transposeTimesTranspose(X, F), F);
    double[][] b = times(inverse(times(common, X)), times(common, Y));
    // Estimate sigma_0 and sigma:
    // sigma_sum_square = sigma_0*sigma_0 + sigma*sigma
    double[][] sigmaMat = times(F, minusEquals(times(X, b), times(F, Y)));
    final double sigma_sum_square = normF(sigmaMat) / (relationx.size() - 6 - 1);
    final double norm = 1 / FastMath.sqrt(sigma_sum_square);
    // calculate the absolute values of standard residuals
    double[][] E = timesEquals(times(F, minus(Y, times(X, b))), norm);
    DBIDVar worstid = DBIDUtil.newVar();
    double worstscore = Double.NEGATIVE_INFINITY;
    int i = 0;
    for (DBIDIter id = ids.iter(); id.valid(); id.advance(), i++) {
        double err = squareSum(getRow(E, i));
        // double err = Math.abs(E.get(i, 0));
        if (err > worstscore) {
            worstscore = err;
            worstid.set(id);
        }
    }
    return new Pair<>(worstid, FastMath.sqrt(worstscore));
}
Also used : DBIDVar(de.lmu.ifi.dbs.elki.database.ids.DBIDVar) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) Pair(de.lmu.ifi.dbs.elki.utilities.pairs.Pair)

Example 7 with DBIDVar

use of de.lmu.ifi.dbs.elki.database.ids.DBIDVar in project elki by elki-project.

the class CanopyPreClustering method run.

/**
 * Run the algorithm
 *
 * @param database Database
 * @param relation Relation to process
 */
public Clustering<PrototypeModel<O>> run(Database database, Relation<O> relation) {
    if (!(t1 >= t2)) {
        throw new AbortException("T1 must be at least as large as T2.");
    }
    DistanceQuery<O> dq = database.getDistanceQuery(relation, getDistanceFunction());
    ModifiableDBIDs ids = DBIDUtil.newHashSet(relation.getDBIDs());
    ArrayList<Cluster<PrototypeModel<O>>> clusters = new ArrayList<>();
    final int size = relation.size();
    FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Canopy clustering", size, LOG) : null;
    DBIDVar first = DBIDUtil.newVar();
    while (!ids.isEmpty()) {
        // Remove first element:
        ids.pop(first);
        // Start a new cluster:
        ModifiableDBIDs cids = DBIDUtil.newArray();
        cids.add(first);
        // Compare to remaining objects:
        for (DBIDMIter iter = ids.iter(); iter.valid(); iter.advance()) {
            double dist = dq.distance(first, iter);
            // Inclusion threshold:
            if (dist > t1) {
                continue;
            }
            cids.add(iter);
            // Removal threshold:
            if (dist <= t2) {
                iter.remove();
            }
        }
        // TODO: remember the central object using a CanopyModel?
        // Construct cluster:
        clusters.add(new Cluster<>(cids, new SimplePrototypeModel<>(relation.get(first))));
        if (prog != null) {
            prog.setProcessed(size - ids.size(), LOG);
        }
    }
    LOG.ensureCompleted(prog);
    return new Clustering<>("Canopy clustering", "canopy-clustering", clusters);
}
Also used : DBIDVar(de.lmu.ifi.dbs.elki.database.ids.DBIDVar) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) DBIDMIter(de.lmu.ifi.dbs.elki.database.ids.DBIDMIter) ArrayList(java.util.ArrayList) Cluster(de.lmu.ifi.dbs.elki.data.Cluster) Clustering(de.lmu.ifi.dbs.elki.data.Clustering) SimplePrototypeModel(de.lmu.ifi.dbs.elki.data.model.SimplePrototypeModel) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Example 8 with DBIDVar

use of de.lmu.ifi.dbs.elki.database.ids.DBIDVar in project elki by elki-project.

the class SNNClustering method expandCluster.

/**
 * DBSCAN-function expandCluster adapted to SNN criterion.
 * <p/>
 * <p/>
 * Border-Objects become members of the first possible cluster.
 *
 * @param snnInstance shared nearest neighbors
 * @param startObjectID potential seed of a new potential cluster
 * @param objprog the progress object to report about the progress of
 *        clustering
 */
protected void expandCluster(SimilarityQuery<O> snnInstance, DBIDRef startObjectID, FiniteProgress objprog, IndefiniteProgress clusprog) {
    ArrayModifiableDBIDs seeds = findSNNNeighbors(snnInstance, startObjectID);
    // startObject is no core-object
    if (seeds.size() < minpts) {
        noise.add(startObjectID);
        processedIDs.add(startObjectID);
        if (objprog != null && clusprog != null) {
            objprog.setProcessed(processedIDs.size(), LOG);
            clusprog.setProcessed(resultList.size(), LOG);
        }
        return;
    }
    // try to expand the cluster
    ModifiableDBIDs currentCluster = DBIDUtil.newArray();
    for (DBIDIter seed = seeds.iter(); seed.valid(); seed.advance()) {
        if (!processedIDs.contains(seed)) {
            currentCluster.add(seed);
            processedIDs.add(seed);
        } else if (noise.contains(seed)) {
            currentCluster.add(seed);
            noise.remove(seed);
        }
    }
    DBIDVar o = DBIDUtil.newVar();
    while (seeds.size() > 0) {
        seeds.pop(o);
        ArrayModifiableDBIDs neighborhood = findSNNNeighbors(snnInstance, o);
        if (neighborhood.size() >= minpts) {
            for (DBIDIter iter = neighborhood.iter(); iter.valid(); iter.advance()) {
                boolean inNoise = noise.contains(iter);
                boolean unclassified = !processedIDs.contains(iter);
                if (inNoise || unclassified) {
                    if (unclassified) {
                        seeds.add(iter);
                    }
                    currentCluster.add(iter);
                    processedIDs.add(iter);
                    if (inNoise) {
                        noise.remove(iter);
                    }
                }
            }
        }
        if (objprog != null && clusprog != null) {
            objprog.setProcessed(processedIDs.size(), LOG);
            int numClusters = currentCluster.size() > minpts ? resultList.size() + 1 : resultList.size();
            clusprog.setProcessed(numClusters, LOG);
        }
        if (processedIDs.size() == snnInstance.getRelation().size() && noise.size() == 0) {
            break;
        }
    }
    if (currentCluster.size() >= minpts) {
        resultList.add(currentCluster);
    } else {
        noise.addDBIDs(currentCluster);
        noise.add(startObjectID);
        processedIDs.add(startObjectID);
    }
}
Also used : DBIDVar(de.lmu.ifi.dbs.elki.database.ids.DBIDVar) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 9 with DBIDVar

use of de.lmu.ifi.dbs.elki.database.ids.DBIDVar in project elki by elki-project.

the class DBSCAN method expandCluster.

/**
 * DBSCAN-function expandCluster.
 *
 * Border-Objects become members of the first possible cluster.
 *
 * @param relation Database relation to run on
 * @param rangeQuery Range query to use
 * @param startObjectID potential seed of a new potential cluster
 * @param seeds Array to store the current seeds
 * @param objprog Number of objects processed (may be {@code null})
 * @param clusprog Number of clusters found (may be {@code null})
 */
protected void expandCluster(Relation<O> relation, RangeQuery<O> rangeQuery, DBIDRef startObjectID, ArrayModifiableDBIDs seeds, FiniteProgress objprog, IndefiniteProgress clusprog) {
    DoubleDBIDList neighbors = rangeQuery.getRangeForDBID(startObjectID, epsilon);
    ncounter += neighbors.size();
    // startObject is no core-object
    if (neighbors.size() < minpts) {
        noise.add(startObjectID);
        processedIDs.add(startObjectID);
        if (objprog != null) {
            objprog.incrementProcessed(LOG);
        }
        return;
    }
    ModifiableDBIDs currentCluster = DBIDUtil.newArray();
    currentCluster.add(startObjectID);
    processedIDs.add(startObjectID);
    // try to expand the cluster
    assert (seeds.size() == 0);
    seeds.clear();
    processNeighbors(neighbors.iter(), currentCluster, seeds);
    DBIDVar o = DBIDUtil.newVar();
    while (!seeds.isEmpty()) {
        neighbors = rangeQuery.getRangeForDBID(seeds.pop(o), epsilon);
        ncounter += neighbors.size();
        if (neighbors.size() >= minpts) {
            processNeighbors(neighbors.iter(), currentCluster, seeds);
        }
        if (objprog != null) {
            objprog.incrementProcessed(LOG);
        }
    }
    resultList.add(currentCluster);
    if (clusprog != null) {
        clusprog.setProcessed(resultList.size(), LOG);
    }
}
Also used : DBIDVar(de.lmu.ifi.dbs.elki.database.ids.DBIDVar) DoubleDBIDList(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)

Example 10 with DBIDVar

use of de.lmu.ifi.dbs.elki.database.ids.DBIDVar in project elki by elki-project.

the class FarthestSumPointsInitialMeans method chooseInitialMeans.

@Override
public <T extends NumberVector> double[][] chooseInitialMeans(Database database, Relation<T> relation, int k, NumberVectorDistanceFunction<? super T> distanceFunction) {
    // Get a distance query
    DistanceQuery<T> distQ = database.getDistanceQuery(relation, distanceFunction);
    DBIDs ids = relation.getDBIDs();
    WritableDoubleDataStore store = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, 0.);
    // Chose first mean
    List<T> means = new ArrayList<>(k);
    DBIDRef first = DBIDUtil.randomSample(ids, rnd);
    T prevmean = relation.get(first);
    means.add(prevmean);
    // Find farthest object each.
    DBIDVar best = DBIDUtil.newVar(first);
    for (int i = (dropfirst ? 0 : 1); i < k; i++) {
        double maxdist = Double.NEGATIVE_INFINITY;
        for (DBIDIter it = ids.iter(); it.valid(); it.advance()) {
            final double prev = store.doubleValue(it);
            if (prev != prev) {
                // NaN: already chosen!
                continue;
            }
            double dsum = prev + distQ.distance(prevmean, it);
            // Don't store distance to first mean, when it will be dropped below.
            if (i > 0) {
                store.putDouble(it, dsum);
            }
            if (dsum > maxdist) {
                maxdist = dsum;
                best.set(it);
            }
        }
        // Add new mean (and drop the initial mean when desired)
        if (i == 0) {
            // Remove temporary first element.
            means.clear();
        }
        // So it won't be chosen twice.
        store.putDouble(best, Double.NaN);
        prevmean = relation.get(best);
        means.add(prevmean);
    }
    // Explicitly destroy temporary data.
    store.destroy();
    return unboxVectors(means);
}
Also used : DBIDVar(de.lmu.ifi.dbs.elki.database.ids.DBIDVar) WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) DBIDRef(de.lmu.ifi.dbs.elki.database.ids.DBIDRef) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) ArrayList(java.util.ArrayList) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Aggregations

DBIDVar (de.lmu.ifi.dbs.elki.database.ids.DBIDVar)26 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)13 ArrayModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs)12 WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)7 ModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)6 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)6 DBIDRef (de.lmu.ifi.dbs.elki.database.ids.DBIDRef)5 ArrayList (java.util.ArrayList)5 DBIDArrayIter (de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter)4 AbortException (de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)4 Clustering (de.lmu.ifi.dbs.elki.data.Clustering)3 Relation (de.lmu.ifi.dbs.elki.database.relation.Relation)3 Cluster (de.lmu.ifi.dbs.elki.data.Cluster)2 NumberVector (de.lmu.ifi.dbs.elki.data.NumberVector)2 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)2 KNNList (de.lmu.ifi.dbs.elki.database.ids.KNNList)2 Pair (de.lmu.ifi.dbs.elki.utilities.pairs.Pair)2 List (java.util.List)2 ClusterModel (de.lmu.ifi.dbs.elki.data.model.ClusterModel)1 Model (de.lmu.ifi.dbs.elki.data.model.Model)1