Search in sources :

Example 6 with ProxyDatabase

use of de.lmu.ifi.dbs.elki.database.ProxyDatabase in project elki by elki-project.

the class CASH method buildDerivatorDB.

/**
 * Builds a database for the derivator consisting of the ids in the specified
 * interval.
 *
 * @param relation the database storing the parameterization functions
 * @param ids the ids to build the database from
 * @return a database for the derivator consisting of the ids in the specified
 *         interval
 */
private Database buildDerivatorDB(Relation<ParameterizationFunction> relation, DBIDs ids) {
    ProxyDatabase proxy = new ProxyDatabase(ids);
    int dim = dimensionality(relation);
    SimpleTypeInformation<DoubleVector> type = new VectorFieldTypeInformation<>(DoubleVector.FACTORY, dim);
    MaterializedRelation<DoubleVector> prep = new MaterializedRelation<>(type, ids);
    proxy.addRelation(prep);
    // Project
    for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
        prep.insert(iter, DoubleVector.wrap(relation.get(iter).getColumnVector()));
    }
    return proxy;
}
Also used : VectorFieldTypeInformation(de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation) ProxyDatabase(de.lmu.ifi.dbs.elki.database.ProxyDatabase) MaterializedRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 7 with ProxyDatabase

use of de.lmu.ifi.dbs.elki.database.ProxyDatabase in project elki by elki-project.

the class CASH method buildDerivatorDB.

/**
 * Builds a database for the derivator consisting of the ids in the specified
 * interval.
 *
 * @param relation the database storing the parameterization functions
 * @param interval the interval to build the database from
 * @return a database for the derivator consisting of the ids in the specified
 *         interval
 */
private Database buildDerivatorDB(Relation<ParameterizationFunction> relation, CASHInterval interval) {
    DBIDs ids = interval.getIDs();
    ProxyDatabase proxy = new ProxyDatabase(ids);
    int dim = dimensionality(relation);
    SimpleTypeInformation<DoubleVector> type = new VectorFieldTypeInformation<>(DoubleVector.FACTORY, dim);
    WritableDataStore<DoubleVector> prep = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_HOT, DoubleVector.class);
    // Project
    for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
        prep.put(iter, DoubleVector.wrap(relation.get(iter).getColumnVector()));
    }
    if (LOG.isDebugging()) {
        LOG.debugFine("db fuer derivator : " + ids.size());
    }
    MaterializedRelation<DoubleVector> prel = new MaterializedRelation<>(type, ids, null, prep);
    proxy.addRelation(prel);
    return proxy;
}
Also used : VectorFieldTypeInformation(de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) ProxyDatabase(de.lmu.ifi.dbs.elki.database.ProxyDatabase) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) MaterializedRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation)

Example 8 with ProxyDatabase

use of de.lmu.ifi.dbs.elki.database.ProxyDatabase in project elki by elki-project.

the class HiCS method run.

/**
 * Perform HiCS on a given database.
 *
 * @param relation the database
 * @return The aggregated resulting scores that were assigned by the given
 *         outlier detection algorithm
 */
public OutlierResult run(Relation<V> relation) {
    final DBIDs ids = relation.getDBIDs();
    ArrayList<ArrayDBIDs> subspaceIndex = buildOneDimIndexes(relation);
    Set<HiCSSubspace> subspaces = calculateSubspaces(relation, subspaceIndex, rnd.getSingleThreadedRandom());
    if (LOG.isVerbose()) {
        LOG.verbose("Number of high-contrast subspaces: " + subspaces.size());
    }
    List<DoubleRelation> results = new ArrayList<>();
    FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Calculating Outlier scores for high Contrast subspaces", subspaces.size(), LOG) : null;
    // relation instead of SubspaceEuclideanDistanceFunction?)
    for (HiCSSubspace dimset : subspaces) {
        if (LOG.isVerbose()) {
            LOG.verbose("Performing outlier detection in subspace " + dimset);
        }
        ProxyDatabase pdb = new ProxyDatabase(ids);
        pdb.addRelation(new ProjectedView<>(relation, new NumericalFeatureSelection<V>(dimset)));
        // run LOF and collect the result
        OutlierResult result = outlierAlgorithm.run(pdb);
        results.add(result.getScores());
        LOG.incrementProcessed(prog);
    }
    LOG.ensureCompleted(prog);
    WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
    DoubleMinMax minmax = new DoubleMinMax();
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        double sum = 0.0;
        for (DoubleRelation r : results) {
            final double s = r.doubleValue(iditer);
            if (!Double.isNaN(s)) {
                sum += s;
            }
        }
        scores.putDouble(iditer, sum);
        minmax.put(sum);
    }
    OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax());
    DoubleRelation scoreres = new MaterializedDoubleRelation("HiCS", "HiCS-outlier", scores, relation.getDBIDs());
    return new OutlierResult(meta, scoreres);
}
Also used : WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) ArrayDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) NumericalFeatureSelection(de.lmu.ifi.dbs.elki.data.projection.NumericalFeatureSelection) ArrayList(java.util.ArrayList) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) ProxyDatabase(de.lmu.ifi.dbs.elki.database.ProxyDatabase) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) ArrayDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)

Example 9 with ProxyDatabase

use of de.lmu.ifi.dbs.elki.database.ProxyDatabase in project elki by elki-project.

the class SUBCLU method runDBSCAN.

/**
 * Runs the DBSCAN algorithm on the specified partition of the database in the
 * given subspace. If parameter {@code ids} is null DBSCAN will be applied to
 * the whole database.
 *
 * @param relation the database holding the objects to run DBSCAN on
 * @param ids the IDs of the database defining the partition to run DBSCAN on
 *        - if this parameter is null DBSCAN will be applied to the whole
 *        database
 * @param subspace the subspace to run DBSCAN on
 * @return the clustering result of the DBSCAN run
 */
private List<Cluster<Model>> runDBSCAN(Relation<V> relation, DBIDs ids, Subspace subspace) {
    // distance function
    distanceFunction.setSelectedDimensions(subspace.getDimensions());
    ProxyDatabase proxy;
    if (ids == null) {
        // TODO: in this case, we might want to use an index - the proxy below
        // will prevent this!
        ids = relation.getDBIDs();
    }
    proxy = new ProxyDatabase(ids, relation);
    DBSCAN<V> dbscan = new DBSCAN<>(distanceFunction, epsilon, minpts);
    // run DBSCAN
    if (LOG.isVerbose()) {
        LOG.verbose("\nRun DBSCAN on subspace " + subspace.dimensonsToString());
    }
    Clustering<Model> dbsres = dbscan.run(proxy);
    // separate cluster and noise
    List<Cluster<Model>> clusterAndNoise = dbsres.getAllClusters();
    List<Cluster<Model>> clusters = new ArrayList<>();
    for (Cluster<Model> c : clusterAndNoise) {
        if (!c.isNoise()) {
            clusters.add(c);
        }
    }
    return clusters;
}
Also used : SubspaceModel(de.lmu.ifi.dbs.elki.data.model.SubspaceModel) Model(de.lmu.ifi.dbs.elki.data.model.Model) ArrayList(java.util.ArrayList) ProxyDatabase(de.lmu.ifi.dbs.elki.database.ProxyDatabase) DBSCAN(de.lmu.ifi.dbs.elki.algorithm.clustering.DBSCAN) Cluster(de.lmu.ifi.dbs.elki.data.Cluster)

Example 10 with ProxyDatabase

use of de.lmu.ifi.dbs.elki.database.ProxyDatabase in project elki by elki-project.

the class XMeans method splitCluster.

/**
 * Conditionally splits the clusters based on the information criterion.
 *
 * @param parentCluster Cluster to split
 * @param database Database
 * @param relation Data relation
 * @return Parent cluster when split decreases clustering quality or child
 *         clusters when split improves clustering.
 */
protected List<Cluster<M>> splitCluster(Cluster<M> parentCluster, Database database, Relation<V> relation) {
    // Transform parent cluster into a clustering
    ArrayList<Cluster<M>> parentClusterList = new ArrayList<Cluster<M>>(1);
    parentClusterList.add(parentCluster);
    Clustering<M> parentClustering = new Clustering<>(parentCluster.getName(), parentCluster.getName(), parentClusterList);
    if (parentCluster.size() < 2) {
        // Split is not possbile
        return parentClusterList;
    }
    ProxyDatabase proxyDB = new ProxyDatabase(parentCluster.getIDs(), database);
    splitInitializer.setInitialMeans(splitCentroid(parentCluster, relation));
    innerKMeans.setK(2);
    Clustering<M> childClustering = innerKMeans.run(proxyDB);
    double parentEvaluation = informationCriterion.quality(parentClustering, getDistanceFunction(), relation);
    double childrenEvaluation = informationCriterion.quality(childClustering, getDistanceFunction(), relation);
    if (LOG.isDebugging()) {
        LOG.debug("parentEvaluation: " + parentEvaluation);
        LOG.debug("childrenEvaluation: " + childrenEvaluation);
    }
    // Check if split is an improvement:
    return (childrenEvaluation > parentEvaluation) ^ informationCriterion.ascending() ? parentClusterList : childClustering.getAllClusters();
}
Also used : ArrayList(java.util.ArrayList) Cluster(de.lmu.ifi.dbs.elki.data.Cluster) ProxyDatabase(de.lmu.ifi.dbs.elki.database.ProxyDatabase) Clustering(de.lmu.ifi.dbs.elki.data.Clustering)

Aggregations

ProxyDatabase (de.lmu.ifi.dbs.elki.database.ProxyDatabase)11 MaterializedRelation (de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation)6 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)5 VectorFieldTypeInformation (de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation)4 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)4 ArrayList (java.util.ArrayList)4 Cluster (de.lmu.ifi.dbs.elki.data.Cluster)3 Clustering (de.lmu.ifi.dbs.elki.data.Clustering)3 DoubleVector (de.lmu.ifi.dbs.elki.data.DoubleVector)3 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)3 Relation (de.lmu.ifi.dbs.elki.database.relation.Relation)2 DBSCAN (de.lmu.ifi.dbs.elki.algorithm.clustering.DBSCAN)1 ParameterizationFunction (de.lmu.ifi.dbs.elki.algorithm.clustering.correlation.cash.ParameterizationFunction)1 Model (de.lmu.ifi.dbs.elki.data.model.Model)1 SubspaceModel (de.lmu.ifi.dbs.elki.data.model.SubspaceModel)1 NumericalFeatureSelection (de.lmu.ifi.dbs.elki.data.projection.NumericalFeatureSelection)1 SimpleTypeInformation (de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation)1 WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)1 ArrayDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs)1 ArrayModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs)1