Search in sources :

Example 76 with DoubleRelation

use of de.lmu.ifi.dbs.elki.database.relation.DoubleRelation in project elki by elki-project.

the class CTLuRandomWalkEC method run.

/**
 * Run the algorithm.
 *
 * @param spatial Spatial neighborhood relation
 * @param relation Attribute value relation
 * @return Outlier result
 */
public OutlierResult run(Relation<P> spatial, Relation<? extends NumberVector> relation) {
    DistanceQuery<P> distFunc = getDistanceFunction().instantiate(spatial);
    WritableDataStore<double[]> similarityVectors = DataStoreUtil.makeStorage(spatial.getDBIDs(), DataStoreFactory.HINT_TEMP, double[].class);
    WritableDataStore<DBIDs> neighbors = DataStoreUtil.makeStorage(spatial.getDBIDs(), DataStoreFactory.HINT_TEMP, DBIDs.class);
    // Make a static IDs array for matrix column indexing
    ArrayDBIDs ids = DBIDUtil.ensureArray(relation.getDBIDs());
    // construct the relation Matrix of the ec-graph
    double[][] E = new double[ids.size()][ids.size()];
    KNNHeap heap = DBIDUtil.newHeap(k);
    {
        int i = 0;
        for (DBIDIter id = ids.iter(); id.valid(); id.advance(), i++) {
            final double val = relation.get(id).doubleValue(0);
            assert (heap.size() == 0);
            int j = 0;
            for (DBIDIter n = ids.iter(); n.valid(); n.advance(), j++) {
                if (i == j) {
                    continue;
                }
                final double e;
                final double distance = distFunc.distance(id, n);
                heap.insert(distance, n);
                if (distance == 0) {
                    LOG.warning("Zero distances are not supported - skipping: " + DBIDUtil.toString(id) + " " + DBIDUtil.toString(n));
                    e = 0;
                } else {
                    double diff = Math.abs(val - relation.get(n).doubleValue(0));
                    double exp = FastMath.exp(FastMath.pow(diff, alpha));
                    // Implementation note: not inverting exp worked a lot better.
                    // Therefore we diverge from the article here.
                    e = exp / distance;
                }
                E[j][i] = e;
            }
            // Convert kNN Heap into DBID array
            ModifiableDBIDs nids = DBIDUtil.newArray(heap.size());
            while (heap.size() > 0) {
                nids.add(heap.poll());
            }
            neighbors.put(id, nids);
        }
    }
    // Also do the -c multiplication in this process.
    for (int i = 0; i < E[0].length; i++) {
        double sum = 0.0;
        for (int j = 0; j < E.length; j++) {
            sum += E[j][i];
        }
        if (sum == 0) {
            sum = 1.0;
        }
        for (int j = 0; j < E.length; j++) {
            E[j][i] = -c * E[j][i] / sum;
        }
    }
    // Add identity matrix. The diagonal should still be 0s, so this is trivial.
    assert (E.length == E[0].length);
    for (int col = 0; col < E[0].length; col++) {
        assert (E[col][col] == 0.0);
        E[col][col] = 1.0;
    }
    E = timesEquals(inverse(E), 1 - c);
    // Split the matrix into columns
    {
        int i = 0;
        for (DBIDIter id = ids.iter(); id.valid(); id.advance(), i++) {
            // Note: matrix times ith unit vector = ith column
            double[] sim = getCol(E, i);
            similarityVectors.put(id, sim);
        }
    }
    E = null;
    // compute the relevance scores between specified Object and its neighbors
    DoubleMinMax minmax = new DoubleMinMax();
    WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(spatial.getDBIDs(), DataStoreFactory.HINT_STATIC);
    for (DBIDIter id = ids.iter(); id.valid(); id.advance()) {
        double gmean = 1.0;
        int cnt = 0;
        for (DBIDIter iter = neighbors.get(id).iter(); iter.valid(); iter.advance()) {
            if (DBIDUtil.equal(id, iter)) {
                continue;
            }
            double sim = VMath.angle(similarityVectors.get(id), similarityVectors.get(iter));
            gmean *= sim;
            cnt++;
        }
        final double score = FastMath.pow(gmean, 1.0 / cnt);
        minmax.put(score);
        scores.putDouble(id, score);
    }
    DoubleRelation scoreResult = new MaterializedDoubleRelation("randomwalkec", "RandomWalkEC", scores, relation.getDBIDs());
    OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 0.0);
    return new OutlierResult(scoreMeta, scoreResult);
}
Also used : WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) ArrayDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) KNNHeap(de.lmu.ifi.dbs.elki.database.ids.KNNHeap) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) ArrayDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)

Example 77 with DoubleRelation

use of de.lmu.ifi.dbs.elki.database.relation.DoubleRelation in project elki by elki-project.

the class SOF method run.

/**
 * The main run method
 *
 * @param database Database to use (actually unused)
 * @param spatial Relation for neighborhood
 * @param relation Attributes to evaluate
 * @return Outlier result
 */
public OutlierResult run(Database database, Relation<N> spatial, Relation<O> relation) {
    final NeighborSetPredicate npred = getNeighborSetPredicateFactory().instantiate(database, spatial);
    DistanceQuery<O> distFunc = getNonSpatialDistanceFunction().instantiate(relation);
    WritableDoubleDataStore lrds = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT);
    WritableDoubleDataStore lofs = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
    DoubleMinMax lofminmax = new DoubleMinMax();
    // Compute densities
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        DBIDs neighbors = npred.getNeighborDBIDs(iditer);
        double avg = 0;
        for (DBIDIter iter = neighbors.iter(); iter.valid(); iter.advance()) {
            avg += distFunc.distance(iditer, iter);
        }
        double lrd = 1 / (avg / neighbors.size());
        if (Double.isNaN(lrd)) {
            lrd = 0;
        }
        lrds.putDouble(iditer, lrd);
    }
    // Compute density quotients
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        DBIDs neighbors = npred.getNeighborDBIDs(iditer);
        double avg = 0;
        for (DBIDIter iter = neighbors.iter(); iter.valid(); iter.advance()) {
            avg += lrds.doubleValue(iter);
        }
        final double lrd = (avg / neighbors.size()) / lrds.doubleValue(iditer);
        if (!Double.isNaN(lrd)) {
            lofs.putDouble(iditer, lrd);
            lofminmax.put(lrd);
        } else {
            lofs.putDouble(iditer, 0.0);
        }
    }
    // Build result representation.
    DoubleRelation scoreResult = new MaterializedDoubleRelation("Spatial Outlier Factor", "sof-outlier", lofs, relation.getDBIDs());
    OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(lofminmax.getMin(), lofminmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 1.0);
    OutlierResult or = new OutlierResult(scoreMeta, scoreResult);
    or.addChildResult(npred);
    return or;
}
Also used : WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) NeighborSetPredicate(de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood.NeighborSetPredicate) QuotientOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) QuotientOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)

Example 78 with DoubleRelation

use of de.lmu.ifi.dbs.elki.database.relation.DoubleRelation in project elki by elki-project.

the class LibSVMOneClassOutlierDetection method run.

/**
 * Run one-class SVM.
 *
 * @param relation Data relation
 * @return Outlier result.
 */
public OutlierResult run(Relation<V> relation) {
    final int dim = RelationUtil.dimensionality(relation);
    final ArrayDBIDs ids = DBIDUtil.ensureArray(relation.getDBIDs());
    svm.svm_set_print_string_function(LOG_HELPER);
    svm_parameter param = new svm_parameter();
    param.svm_type = svm_parameter.ONE_CLASS;
    param.kernel_type = svm_parameter.LINEAR;
    param.degree = 3;
    switch(kernel) {
        case LINEAR:
            param.kernel_type = svm_parameter.LINEAR;
            break;
        case QUADRATIC:
            param.kernel_type = svm_parameter.POLY;
            param.degree = 2;
            break;
        case CUBIC:
            param.kernel_type = svm_parameter.POLY;
            param.degree = 3;
            break;
        case RBF:
            param.kernel_type = svm_parameter.RBF;
            break;
        case SIGMOID:
            param.kernel_type = svm_parameter.SIGMOID;
            break;
        default:
            throw new AbortException("Invalid kernel parameter: " + kernel);
    }
    // TODO: expose additional parameters to the end user!
    param.nu = nu;
    param.coef0 = 0.;
    param.cache_size = 10000;
    param.C = 1;
    // not used by one-class?
    param.eps = 1e-4;
    // not used by one-class?
    param.p = 0.1;
    param.shrinking = 0;
    param.probability = 0;
    param.nr_weight = 0;
    param.weight_label = new int[0];
    param.weight = new double[0];
    param.gamma = 1. / dim;
    // Transform data:
    svm_problem prob = new svm_problem();
    prob.l = relation.size();
    prob.x = new svm_node[prob.l][];
    prob.y = new double[prob.l];
    {
        DBIDIter iter = ids.iter();
        for (int i = 0; i < prob.l && iter.valid(); iter.advance(), i++) {
            V vec = relation.get(iter);
            // TODO: support compact sparse vectors, too!
            svm_node[] x = new svm_node[dim];
            for (int d = 0; d < dim; d++) {
                x[d] = new svm_node();
                x[d].index = d + 1;
                x[d].value = vec.doubleValue(d);
            }
            prob.x[i] = x;
            prob.y[i] = +1;
        }
    }
    if (LOG.isVerbose()) {
        LOG.verbose("Training one-class SVM...");
    }
    String err = svm.svm_check_parameter(prob, param);
    if (err != null) {
        LOG.warning("svm_check_parameter: " + err);
    }
    svm_model model = svm.svm_train(prob, param);
    if (LOG.isVerbose()) {
        LOG.verbose("Predicting...");
    }
    WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_DB);
    DoubleMinMax mm = new DoubleMinMax();
    {
        DBIDIter iter = ids.iter();
        double[] buf = new double[svm.svm_get_nr_class(model)];
        for (int i = 0; i < prob.l && iter.valid(); iter.advance(), i++) {
            V vec = relation.get(iter);
            svm_node[] x = new svm_node[dim];
            for (int d = 0; d < dim; d++) {
                x[d] = new svm_node();
                x[d].index = d + 1;
                x[d].value = vec.doubleValue(d);
            }
            svm.svm_predict_values(model, x, buf);
            // / param.gamma; // Heuristic rescaling, sorry.
            double score = -buf[0];
            // Unfortunately, libsvm one-class currently yields a binary decision.
            scores.putDouble(iter, score);
            mm.put(score);
        }
    }
    DoubleRelation scoreResult = new MaterializedDoubleRelation("One-Class SVM Decision", "svm-outlier", scores, ids);
    OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(mm.getMin(), mm.getMax(), Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY, 0.);
    return new OutlierResult(scoreMeta, scoreResult);
}
Also used : libsvm.svm_node(libsvm.svm_node) WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) libsvm.svm_parameter(libsvm.svm_parameter) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) libsvm.svm_model(libsvm.svm_model) ArrayDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs) libsvm.svm_problem(libsvm.svm_problem) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Example 79 with DoubleRelation

use of de.lmu.ifi.dbs.elki.database.relation.DoubleRelation in project elki by elki-project.

the class OutlierSqrtScaling method prepare.

@Override
public void prepare(OutlierResult or) {
    if (pmin == null || pmax == null) {
        DoubleMinMax mm = new DoubleMinMax();
        DoubleRelation scores = or.getScores();
        for (DBIDIter id = scores.iterDBIDs(); id.valid(); id.advance()) {
            double val = scores.doubleValue(id);
            if (!Double.isInfinite(val)) {
                mm.put(val);
            }
        }
        min = (pmin == null) ? mm.getMin() : pmin;
        max = (pmax == null) ? mm.getMax() : pmax;
    }
    factor = FastMath.sqrt(max - min);
}
Also used : DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 80 with DoubleRelation

use of de.lmu.ifi.dbs.elki.database.relation.DoubleRelation in project elki by elki-project.

the class HeDESNormalizationOutlierScaling method prepare.

@Override
public void prepare(OutlierResult or) {
    MeanVariance mv = new MeanVariance();
    DoubleMinMax minmax = new DoubleMinMax();
    DoubleRelation scores = or.getScores();
    for (DBIDIter id = scores.iterDBIDs(); id.valid(); id.advance()) {
        double val = scores.doubleValue(id);
        if (!Double.isNaN(val) && !Double.isInfinite(val)) {
            mv.put(val);
            minmax.put(val);
        }
    }
    mean = mv.getMean();
    stddev = mv.getSampleStddev();
    scaledmax = getScaled(minmax.getMax());
    scaledmin = getScaled(minmax.getMin());
}
Also used : MeanVariance(de.lmu.ifi.dbs.elki.math.MeanVariance) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Aggregations

DoubleRelation (de.lmu.ifi.dbs.elki.database.relation.DoubleRelation)89 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)72 MaterializedDoubleRelation (de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)70 OutlierResult (de.lmu.ifi.dbs.elki.result.outlier.OutlierResult)70 OutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta)70 WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)69 DoubleMinMax (de.lmu.ifi.dbs.elki.math.DoubleMinMax)65 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)38 BasicOutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta)34 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)21 KNNList (de.lmu.ifi.dbs.elki.database.ids.KNNList)18 MeanVariance (de.lmu.ifi.dbs.elki.math.MeanVariance)17 InvertedOutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta)14 ProbabilisticOutlierScore (de.lmu.ifi.dbs.elki.result.outlier.ProbabilisticOutlierScore)13 QuotientOutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta)13 DoubleDBIDListIter (de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter)11 StepProgress (de.lmu.ifi.dbs.elki.logging.progress.StepProgress)11 NeighborSetPredicate (de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood.NeighborSetPredicate)9 ArrayDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs)8 Mean (de.lmu.ifi.dbs.elki.math.Mean)8