Search in sources :

Example 6 with WritableIntegerDataStore

use of de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore in project elki by elki-project.

the class KMediansLloyd method run.

@Override
public Clustering<MeanModel> run(Database database, Relation<V> relation) {
    if (relation.size() <= 0) {
        return new Clustering<>("k-Medians Clustering", "kmedians-clustering");
    }
    // Choose initial medians
    if (LOG.isStatistics()) {
        LOG.statistics(new StringStatistic(KEY + ".initialization", initializer.toString()));
    }
    double[][] medians = initializer.chooseInitialMeans(database, relation, k, getDistanceFunction());
    // Setup cluster assignment store
    List<ModifiableDBIDs> clusters = new ArrayList<>();
    for (int i = 0; i < k; i++) {
        clusters.add(DBIDUtil.newHashSet((int) (relation.size() * 2. / k)));
    }
    WritableIntegerDataStore assignment = DataStoreUtil.makeIntegerStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, -1);
    double[] distsum = new double[k];
    IndefiniteProgress prog = LOG.isVerbose() ? new IndefiniteProgress("K-Medians iteration", LOG) : null;
    int iteration = 0;
    for (; maxiter <= 0 || iteration < maxiter; iteration++) {
        LOG.incrementProcessed(prog);
        boolean changed = assignToNearestCluster(relation, medians, clusters, assignment, distsum);
        // Stop if no cluster assignment changed.
        if (!changed) {
            break;
        }
        // Recompute medians.
        medians = medians(clusters, medians, relation);
    }
    LOG.setCompleted(prog);
    if (LOG.isStatistics()) {
        LOG.statistics(new LongStatistic(KEY + ".iterations", iteration));
    }
    // Wrap result
    Clustering<MeanModel> result = new Clustering<>("k-Medians Clustering", "kmedians-clustering");
    for (int i = 0; i < clusters.size(); i++) {
        MeanModel model = new MeanModel(medians[i]);
        result.addToplevelCluster(new Cluster<>(clusters.get(i), model));
    }
    return result;
}
Also used : WritableIntegerDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore) ArrayList(java.util.ArrayList) MeanModel(de.lmu.ifi.dbs.elki.data.model.MeanModel) Clustering(de.lmu.ifi.dbs.elki.data.Clustering) StringStatistic(de.lmu.ifi.dbs.elki.logging.statistics.StringStatistic) IndefiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress) LongStatistic(de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)

Example 7 with WritableIntegerDataStore

use of de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore in project elki by elki-project.

the class KMedoidsPAM method run.

/**
 * Run k-medoids
 *
 * @param database Database
 * @param relation relation to use
 * @return result
 */
public Clustering<MedoidModel> run(Database database, Relation<V> relation) {
    if (relation.size() <= 0) {
        return new Clustering<>("PAM Clustering", "pam-clustering");
    }
    if (k > 0x7FFF) {
        throw new NotImplementedException("PAM supports at most " + 0x7FFF + " clusters.");
    }
    DistanceQuery<V> distQ = DatabaseUtil.precomputedDistanceQuery(database, relation, getDistanceFunction(), LOG);
    DBIDs ids = relation.getDBIDs();
    // Choose initial medoids
    if (LOG.isStatistics()) {
        LOG.statistics(new StringStatistic(KEY + ".initialization", initializer.toString()));
    }
    ArrayModifiableDBIDs medoids = DBIDUtil.newArray(initializer.chooseInitialMedoids(k, ids, distQ));
    if (medoids.size() != k) {
        throw new AbortException("Initializer " + initializer.toString() + " did not return " + k + " means, but " + medoids.size());
    }
    // Setup cluster assignment store
    WritableIntegerDataStore assignment = DataStoreUtil.makeIntegerStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, -1);
    run(distQ, ids, medoids, assignment);
    ArrayModifiableDBIDs[] clusters = ClusteringAlgorithmUtil.partitionsFromIntegerLabels(ids, assignment, k);
    // Wrap result
    Clustering<MedoidModel> result = new Clustering<>("PAM Clustering", "pam-clustering");
    for (DBIDArrayIter it = medoids.iter(); it.valid(); it.advance()) {
        result.addToplevelCluster(new Cluster<>(clusters[it.getOffset()], new MedoidModel(DBIDUtil.deref(it))));
    }
    return result;
}
Also used : WritableIntegerDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore) NotImplementedException(de.lmu.ifi.dbs.elki.utilities.exceptions.NotImplementedException) Clustering(de.lmu.ifi.dbs.elki.data.Clustering) MedoidModel(de.lmu.ifi.dbs.elki.data.model.MedoidModel) StringStatistic(de.lmu.ifi.dbs.elki.logging.statistics.StringStatistic) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Example 8 with WritableIntegerDataStore

use of de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore in project elki by elki-project.

the class SingleAssignmentKMeans method run.

@Override
public Clustering<KMeansModel> run(Database database, Relation<V> relation) {
    if (relation.size() <= 0) {
        return new Clustering<>("k-Means Assignment", "kmeans-assignment");
    }
    // Choose initial means
    if (LOG.isStatistics()) {
        LOG.statistics(new StringStatistic(KEY + ".initialization", initializer.toString()));
    }
    double[][] means = initializer.chooseInitialMeans(database, relation, k, getDistanceFunction());
    // Setup cluster assignment store
    List<ModifiableDBIDs> clusters = new ArrayList<>();
    for (int i = 0; i < k; i++) {
        clusters.add(DBIDUtil.newHashSet((int) (relation.size() * 2. / k)));
    }
    WritableIntegerDataStore assignment = DataStoreUtil.makeIntegerStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, -1);
    double[] varsum = new double[k];
    assignToNearestCluster(relation, means, clusters, assignment, varsum);
    // Wrap result
    Clustering<KMeansModel> result = new Clustering<>("Nearest Centroid Clustering", "nearest-center-clustering");
    for (int i = 0; i < clusters.size(); i++) {
        KMeansModel model = new KMeansModel(means[i], varsum[i]);
        result.addToplevelCluster(new Cluster<>(clusters.get(i), model));
    }
    return result;
}
Also used : StringStatistic(de.lmu.ifi.dbs.elki.logging.statistics.StringStatistic) WritableIntegerDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore) KMeansModel(de.lmu.ifi.dbs.elki.data.model.KMeansModel) ArrayList(java.util.ArrayList) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) Clustering(de.lmu.ifi.dbs.elki.data.Clustering)

Example 9 with WritableIntegerDataStore

use of de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore in project elki by elki-project.

the class SimpleCOP method run.

public OutlierResult run(Database database, Relation<V> data) throws IllegalStateException {
    KNNQuery<V> knnQuery = QueryUtil.getKNNQuery(data, getDistanceFunction(), k + 1);
    DBIDs ids = data.getDBIDs();
    WritableDoubleDataStore cop_score = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC);
    WritableDataStore<double[]> cop_err_v = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC, double[].class);
    WritableDataStore<double[][]> cop_datav = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC, double[][].class);
    WritableIntegerDataStore cop_dim = DataStoreUtil.makeIntegerStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC, -1);
    WritableDataStore<CorrelationAnalysisSolution<?>> cop_sol = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC, CorrelationAnalysisSolution.class);
    {
        // compute neighbors of each db object
        FiniteProgress progressLocalPCA = LOG.isVerbose() ? new FiniteProgress("Correlation Outlier Probabilities", data.size(), LOG) : null;
        double sqrt2 = MathUtil.SQRT2;
        for (DBIDIter id = data.iterDBIDs(); id.valid(); id.advance()) {
            KNNList neighbors = knnQuery.getKNNForDBID(id, k + 1);
            ModifiableDBIDs nids = DBIDUtil.newArray(neighbors);
            nids.remove(id);
            // TODO: do we want to use the query point as centroid?
            CorrelationAnalysisSolution<V> depsol = dependencyDerivator.generateModel(data, nids);
            double stddev = depsol.getStandardDeviation();
            double distance = depsol.distance(data.get(id));
            double prob = NormalDistribution.erf(distance / (stddev * sqrt2));
            cop_score.putDouble(id, prob);
            cop_err_v.put(id, times(depsol.errorVector(data.get(id)), -1));
            double[][] datav = depsol.dataProjections(data.get(id));
            cop_datav.put(id, datav);
            cop_dim.putInt(id, depsol.getCorrelationDimensionality());
            cop_sol.put(id, depsol);
            LOG.incrementProcessed(progressLocalPCA);
        }
        LOG.ensureCompleted(progressLocalPCA);
    }
    // combine results.
    DoubleRelation scoreResult = new MaterializedDoubleRelation("Original Correlation Outlier Probabilities", "origcop-outlier", cop_score, ids);
    OutlierScoreMeta scoreMeta = new ProbabilisticOutlierScore();
    OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
    // extra results
    result.addChildResult(new MaterializedRelation<>("Local Dimensionality", COP.COP_DIM, TypeUtil.INTEGER, cop_dim, ids));
    result.addChildResult(new MaterializedRelation<>("Error vectors", COP.COP_ERRORVEC, TypeUtil.DOUBLE_ARRAY, cop_err_v, ids));
    result.addChildResult(new MaterializedRelation<>("Data vectors", "cop-datavec", TypeUtil.MATRIX, cop_datav, ids));
    result.addChildResult(new MaterializedRelation<>("Correlation analysis", "cop-sol", new SimpleTypeInformation<CorrelationAnalysisSolution<?>>(CorrelationAnalysisSolution.class), cop_sol, ids));
    return result;
}
Also used : WritableIntegerDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore) WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) ProbabilisticOutlierScore(de.lmu.ifi.dbs.elki.result.outlier.ProbabilisticOutlierScore) SimpleTypeInformation(de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) CorrelationAnalysisSolution(de.lmu.ifi.dbs.elki.data.model.CorrelationAnalysisSolution) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)

Example 10 with WritableIntegerDataStore

use of de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore in project elki by elki-project.

the class OPTICSOF method run.

/**
 * Perform OPTICS-based outlier detection.
 *
 * @param database Database
 * @param relation Relation
 * @return Outlier detection result
 */
public OutlierResult run(Database database, Relation<O> relation) {
    DistanceQuery<O> distQuery = database.getDistanceQuery(relation, getDistanceFunction());
    KNNQuery<O> knnQuery = database.getKNNQuery(distQuery, minpts);
    RangeQuery<O> rangeQuery = database.getRangeQuery(distQuery);
    DBIDs ids = relation.getDBIDs();
    // FIXME: implicit preprocessor.
    WritableDataStore<KNNList> nMinPts = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, KNNList.class);
    WritableDoubleDataStore coreDistance = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
    WritableIntegerDataStore minPtsNeighborhoodSize = DataStoreUtil.makeIntegerStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, -1);
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        KNNList minptsNeighbours = knnQuery.getKNNForDBID(iditer, minpts);
        double d = minptsNeighbours.getKNNDistance();
        nMinPts.put(iditer, minptsNeighbours);
        coreDistance.putDouble(iditer, d);
        minPtsNeighborhoodSize.put(iditer, rangeQuery.getRangeForDBID(iditer, d).size());
    }
    // Pass 2
    WritableDataStore<List<Double>> reachDistance = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, List.class);
    WritableDoubleDataStore lrds = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        List<Double> core = new ArrayList<>();
        double lrd = 0;
        // TODO: optimize for double distances
        for (DoubleDBIDListIter neighbor = nMinPts.get(iditer).iter(); neighbor.valid(); neighbor.advance()) {
            double coreDist = coreDistance.doubleValue(neighbor);
            double dist = distQuery.distance(iditer, neighbor);
            double rd = MathUtil.max(coreDist, dist);
            lrd = rd + lrd;
            core.add(rd);
        }
        lrd = minPtsNeighborhoodSize.intValue(iditer) / lrd;
        reachDistance.put(iditer, core);
        lrds.putDouble(iditer, lrd);
    }
    // Pass 3
    DoubleMinMax ofminmax = new DoubleMinMax();
    WritableDoubleDataStore ofs = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC);
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        double of = 0;
        for (DBIDIter neighbor = nMinPts.get(iditer).iter(); neighbor.valid(); neighbor.advance()) {
            double lrd = lrds.doubleValue(iditer);
            double lrdN = lrds.doubleValue(neighbor);
            of = of + lrdN / lrd;
        }
        of = of / minPtsNeighborhoodSize.intValue(iditer);
        ofs.putDouble(iditer, of);
        // update minimum and maximum
        ofminmax.put(of);
    }
    // Build result representation.
    DoubleRelation scoreResult = new MaterializedDoubleRelation("OPTICS Outlier Scores", "optics-outlier", ofs, relation.getDBIDs());
    OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(ofminmax.getMin(), ofminmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 1.0);
    return new OutlierResult(scoreMeta, scoreResult);
}
Also used : WritableIntegerDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore) DoubleDBIDListIter(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter) WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) ArrayList(java.util.ArrayList) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) QuotientOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) QuotientOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) ArrayList(java.util.ArrayList) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) List(java.util.List) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)

Aggregations

WritableIntegerDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore)21 Clustering (de.lmu.ifi.dbs.elki.data.Clustering)16 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)16 ModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)14 IndefiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress)14 ArrayList (java.util.ArrayList)14 KMeansModel (de.lmu.ifi.dbs.elki.data.model.KMeansModel)12 StringStatistic (de.lmu.ifi.dbs.elki.logging.statistics.StringStatistic)12 LongStatistic (de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic)11 DoubleStatistic (de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic)10 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)8 WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)7 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)5 DoubleRelation (de.lmu.ifi.dbs.elki.database.relation.DoubleRelation)4 MaterializedDoubleRelation (de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)4 OutlierResult (de.lmu.ifi.dbs.elki.result.outlier.OutlierResult)4 OutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta)4 KNNList (de.lmu.ifi.dbs.elki.database.ids.KNNList)3 DoubleVector (de.lmu.ifi.dbs.elki.data.DoubleVector)2 MedoidModel (de.lmu.ifi.dbs.elki.data.model.MedoidModel)2