Search in sources :

Example 26 with ArrayDBIDs

use of de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs in project elki by elki-project.

the class LinearScanRKNNQuery method getRKNNForDBID.

@Override
public DoubleDBIDList getRKNNForDBID(DBIDRef id, int k) {
    ModifiableDoubleDBIDList rNNList = DBIDUtil.newDistanceDBIDList();
    ArrayDBIDs allIDs = DBIDUtil.ensureArray(relation.getDBIDs());
    List<? extends KNNList> kNNList = knnQuery.getKNNForBulkDBIDs(allIDs, k);
    int i = 0;
    for (DBIDIter iter = allIDs.iter(); iter.valid(); iter.advance()) {
        KNNList knn = kNNList.get(i);
        for (DoubleDBIDListIter n = knn.iter(); n.valid(); n.advance()) {
            if (DBIDUtil.equal(n, id)) {
                rNNList.add(n.doubleValue(), iter);
            }
        }
        i++;
    }
    rNNList.sort();
    return rNNList;
}
Also used : DoubleDBIDListIter(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter) ModifiableDoubleDBIDList(de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) ArrayDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 27 with ArrayDBIDs

use of de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs in project elki by elki-project.

the class LinearScanRKNNQuery method getRKNNForObject.

@Override
public DoubleDBIDList getRKNNForObject(O obj, int k) {
    ModifiableDoubleDBIDList rNNlist = DBIDUtil.newDistanceDBIDList();
    ArrayDBIDs allIDs = DBIDUtil.ensureArray(relation.getDBIDs());
    List<? extends KNNList> kNNLists = knnQuery.getKNNForBulkDBIDs(allIDs, k);
    int i = 0;
    for (DBIDIter iter = allIDs.iter(); iter.valid(); iter.advance()) {
        KNNList knn = kNNLists.get(i);
        int last = Math.min(k - 1, knn.size() - 1);
        double dist = distanceQuery.distance(obj, iter);
        if (last < k - 1 || dist <= knn.get(last).doubleValue()) {
            rNNlist.add(dist, iter);
        }
        i++;
    }
    rNNlist.sort();
    return rNNlist;
}
Also used : ModifiableDoubleDBIDList(de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) ArrayDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 28 with ArrayDBIDs

use of de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs in project elki by elki-project.

the class HiCS method run.

/**
 * Perform HiCS on a given database.
 *
 * @param relation the database
 * @return The aggregated resulting scores that were assigned by the given
 *         outlier detection algorithm
 */
public OutlierResult run(Relation<V> relation) {
    final DBIDs ids = relation.getDBIDs();
    ArrayList<ArrayDBIDs> subspaceIndex = buildOneDimIndexes(relation);
    Set<HiCSSubspace> subspaces = calculateSubspaces(relation, subspaceIndex, rnd.getSingleThreadedRandom());
    if (LOG.isVerbose()) {
        LOG.verbose("Number of high-contrast subspaces: " + subspaces.size());
    }
    List<DoubleRelation> results = new ArrayList<>();
    FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Calculating Outlier scores for high Contrast subspaces", subspaces.size(), LOG) : null;
    // relation instead of SubspaceEuclideanDistanceFunction?)
    for (HiCSSubspace dimset : subspaces) {
        if (LOG.isVerbose()) {
            LOG.verbose("Performing outlier detection in subspace " + dimset);
        }
        ProxyDatabase pdb = new ProxyDatabase(ids);
        pdb.addRelation(new ProjectedView<>(relation, new NumericalFeatureSelection<V>(dimset)));
        // run LOF and collect the result
        OutlierResult result = outlierAlgorithm.run(pdb);
        results.add(result.getScores());
        LOG.incrementProcessed(prog);
    }
    LOG.ensureCompleted(prog);
    WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
    DoubleMinMax minmax = new DoubleMinMax();
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        double sum = 0.0;
        for (DoubleRelation r : results) {
            final double s = r.doubleValue(iditer);
            if (!Double.isNaN(s)) {
                sum += s;
            }
        }
        scores.putDouble(iditer, sum);
        minmax.put(sum);
    }
    OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax());
    DoubleRelation scoreres = new MaterializedDoubleRelation("HiCS", "HiCS-outlier", scores, relation.getDBIDs());
    return new OutlierResult(meta, scoreres);
}
Also used : WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) ArrayDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) NumericalFeatureSelection(de.lmu.ifi.dbs.elki.data.projection.NumericalFeatureSelection) ArrayList(java.util.ArrayList) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) ProxyDatabase(de.lmu.ifi.dbs.elki.database.ProxyDatabase) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) ArrayDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)

Example 29 with ArrayDBIDs

use of de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs in project elki by elki-project.

the class HiCS method calculateContrast.

/**
 * Calculates the actual contrast of a given subspace.
 *
 * @param relation Relation to process
 * @param subspace Subspace
 * @param subspaceIndex Subspace indexes
 */
private void calculateContrast(Relation<? extends NumberVector> relation, HiCSSubspace subspace, ArrayList<ArrayDBIDs> subspaceIndex, Random random) {
    final int card = subspace.cardinality();
    final double alpha1 = FastMath.pow(alpha, (1.0 / card));
    final int windowsize = (int) (relation.size() * alpha1);
    final FiniteProgress prog = LOG.isDebugging() ? new FiniteProgress("Monte-Carlo iterations", m, LOG) : null;
    int retries = 0;
    double deviationSum = 0.0;
    for (int i = 0; i < m; i++) {
        // Choose a random set bit.
        int chosen = -1;
        for (int tmp = random.nextInt(card); tmp >= 0; tmp--) {
            chosen = subspace.nextSetBit(chosen + 1);
        }
        // initialize sample
        DBIDs conditionalSample = relation.getDBIDs();
        for (int j = subspace.nextSetBit(0); j >= 0; j = subspace.nextSetBit(j + 1)) {
            if (j == chosen) {
                continue;
            }
            ArrayDBIDs sortedIndices = subspaceIndex.get(j);
            ArrayModifiableDBIDs indexBlock = DBIDUtil.newArray(windowsize);
            // initialize index block
            DBIDArrayIter iter = sortedIndices.iter();
            iter.seek(random.nextInt(relation.size() - windowsize));
            for (int k = 0; k < windowsize; k++, iter.advance()) {
                // select index block
                indexBlock.add(iter);
            }
            conditionalSample = DBIDUtil.intersection(conditionalSample, indexBlock);
        }
        if (conditionalSample.size() < 10) {
            retries++;
            if (LOG.isDebugging()) {
                LOG.debug("Sample size very small. Retry no. " + retries);
            }
            if (retries >= MAX_RETRIES) {
                LOG.warning("Too many retries, for small samples: " + retries);
            } else {
                i--;
                continue;
            }
        }
        // Project conditional set
        double[] sampleValues = new double[conditionalSample.size()];
        {
            int l = 0;
            for (DBIDIter iter = conditionalSample.iter(); iter.valid(); iter.advance()) {
                sampleValues[l] = relation.get(iter).doubleValue(chosen);
                l++;
            }
        }
        // Project full set
        double[] fullValues = new double[relation.size()];
        {
            int l = 0;
            for (DBIDIter iter = subspaceIndex.get(chosen).iter(); iter.valid(); iter.advance()) {
                fullValues[l] = relation.get(iter).doubleValue(chosen);
                l++;
            }
        }
        double contrast = statTest.deviation(fullValues, sampleValues);
        if (Double.isNaN(contrast)) {
            i--;
            LOG.warning("Contrast was NaN");
            continue;
        }
        deviationSum += contrast;
        LOG.incrementProcessed(prog);
    }
    LOG.ensureCompleted(prog);
    subspace.contrast = deviationSum / m;
}
Also used : ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) ArrayDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) ArrayDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs) DBIDArrayIter(de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 30 with ArrayDBIDs

use of de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs in project elki by elki-project.

the class CTLuRandomWalkEC method run.

/**
 * Run the algorithm.
 *
 * @param spatial Spatial neighborhood relation
 * @param relation Attribute value relation
 * @return Outlier result
 */
public OutlierResult run(Relation<P> spatial, Relation<? extends NumberVector> relation) {
    DistanceQuery<P> distFunc = getDistanceFunction().instantiate(spatial);
    WritableDataStore<double[]> similarityVectors = DataStoreUtil.makeStorage(spatial.getDBIDs(), DataStoreFactory.HINT_TEMP, double[].class);
    WritableDataStore<DBIDs> neighbors = DataStoreUtil.makeStorage(spatial.getDBIDs(), DataStoreFactory.HINT_TEMP, DBIDs.class);
    // Make a static IDs array for matrix column indexing
    ArrayDBIDs ids = DBIDUtil.ensureArray(relation.getDBIDs());
    // construct the relation Matrix of the ec-graph
    double[][] E = new double[ids.size()][ids.size()];
    KNNHeap heap = DBIDUtil.newHeap(k);
    {
        int i = 0;
        for (DBIDIter id = ids.iter(); id.valid(); id.advance(), i++) {
            final double val = relation.get(id).doubleValue(0);
            assert (heap.size() == 0);
            int j = 0;
            for (DBIDIter n = ids.iter(); n.valid(); n.advance(), j++) {
                if (i == j) {
                    continue;
                }
                final double e;
                final double distance = distFunc.distance(id, n);
                heap.insert(distance, n);
                if (distance == 0) {
                    LOG.warning("Zero distances are not supported - skipping: " + DBIDUtil.toString(id) + " " + DBIDUtil.toString(n));
                    e = 0;
                } else {
                    double diff = Math.abs(val - relation.get(n).doubleValue(0));
                    double exp = FastMath.exp(FastMath.pow(diff, alpha));
                    // Implementation note: not inverting exp worked a lot better.
                    // Therefore we diverge from the article here.
                    e = exp / distance;
                }
                E[j][i] = e;
            }
            // Convert kNN Heap into DBID array
            ModifiableDBIDs nids = DBIDUtil.newArray(heap.size());
            while (heap.size() > 0) {
                nids.add(heap.poll());
            }
            neighbors.put(id, nids);
        }
    }
    // Also do the -c multiplication in this process.
    for (int i = 0; i < E[0].length; i++) {
        double sum = 0.0;
        for (int j = 0; j < E.length; j++) {
            sum += E[j][i];
        }
        if (sum == 0) {
            sum = 1.0;
        }
        for (int j = 0; j < E.length; j++) {
            E[j][i] = -c * E[j][i] / sum;
        }
    }
    // Add identity matrix. The diagonal should still be 0s, so this is trivial.
    assert (E.length == E[0].length);
    for (int col = 0; col < E[0].length; col++) {
        assert (E[col][col] == 0.0);
        E[col][col] = 1.0;
    }
    E = timesEquals(inverse(E), 1 - c);
    // Split the matrix into columns
    {
        int i = 0;
        for (DBIDIter id = ids.iter(); id.valid(); id.advance(), i++) {
            // Note: matrix times ith unit vector = ith column
            double[] sim = getCol(E, i);
            similarityVectors.put(id, sim);
        }
    }
    E = null;
    // compute the relevance scores between specified Object and its neighbors
    DoubleMinMax minmax = new DoubleMinMax();
    WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(spatial.getDBIDs(), DataStoreFactory.HINT_STATIC);
    for (DBIDIter id = ids.iter(); id.valid(); id.advance()) {
        double gmean = 1.0;
        int cnt = 0;
        for (DBIDIter iter = neighbors.get(id).iter(); iter.valid(); iter.advance()) {
            if (DBIDUtil.equal(id, iter)) {
                continue;
            }
            double sim = VMath.angle(similarityVectors.get(id), similarityVectors.get(iter));
            gmean *= sim;
            cnt++;
        }
        final double score = FastMath.pow(gmean, 1.0 / cnt);
        minmax.put(score);
        scores.putDouble(id, score);
    }
    DoubleRelation scoreResult = new MaterializedDoubleRelation("randomwalkec", "RandomWalkEC", scores, relation.getDBIDs());
    OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 0.0);
    return new OutlierResult(scoreMeta, scoreResult);
}
Also used : WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) ArrayDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) KNNHeap(de.lmu.ifi.dbs.elki.database.ids.KNNHeap) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) ArrayDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)

Aggregations

ArrayDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs)45 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)23 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)16 DBIDArrayIter (de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter)14 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)13 KNNList (de.lmu.ifi.dbs.elki.database.ids.KNNList)12 WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)10 ArrayModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs)9 DoubleDBIDListIter (de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter)9 ModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)8 DoubleRelation (de.lmu.ifi.dbs.elki.database.relation.DoubleRelation)8 MaterializedDoubleRelation (de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)7 DoubleMinMax (de.lmu.ifi.dbs.elki.math.DoubleMinMax)7 OutlierResult (de.lmu.ifi.dbs.elki.result.outlier.OutlierResult)7 OutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta)7 ArrayList (java.util.ArrayList)7 Clustering (de.lmu.ifi.dbs.elki.data.Clustering)6 KNNHeap (de.lmu.ifi.dbs.elki.database.ids.KNNHeap)6 ModifiableDoubleDBIDList (de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList)6 LongStatistic (de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic)5