use of de.lmu.ifi.dbs.elki.database.ProxyDatabase in project elki by elki-project.
the class CASH method buildDerivatorDB.
/**
* Builds a database for the derivator consisting of the ids in the specified
* interval.
*
* @param relation the database storing the parameterization functions
* @param ids the ids to build the database from
* @return a database for the derivator consisting of the ids in the specified
* interval
*/
private Database buildDerivatorDB(Relation<ParameterizationFunction> relation, DBIDs ids) {
ProxyDatabase proxy = new ProxyDatabase(ids);
int dim = dimensionality(relation);
SimpleTypeInformation<DoubleVector> type = new VectorFieldTypeInformation<>(DoubleVector.FACTORY, dim);
MaterializedRelation<DoubleVector> prep = new MaterializedRelation<>(type, ids);
proxy.addRelation(prep);
// Project
for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
prep.insert(iter, DoubleVector.wrap(relation.get(iter).getColumnVector()));
}
return proxy;
}
use of de.lmu.ifi.dbs.elki.database.ProxyDatabase in project elki by elki-project.
the class CASH method buildDerivatorDB.
/**
* Builds a database for the derivator consisting of the ids in the specified
* interval.
*
* @param relation the database storing the parameterization functions
* @param interval the interval to build the database from
* @return a database for the derivator consisting of the ids in the specified
* interval
*/
private Database buildDerivatorDB(Relation<ParameterizationFunction> relation, CASHInterval interval) {
DBIDs ids = interval.getIDs();
ProxyDatabase proxy = new ProxyDatabase(ids);
int dim = dimensionality(relation);
SimpleTypeInformation<DoubleVector> type = new VectorFieldTypeInformation<>(DoubleVector.FACTORY, dim);
WritableDataStore<DoubleVector> prep = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_HOT, DoubleVector.class);
// Project
for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
prep.put(iter, DoubleVector.wrap(relation.get(iter).getColumnVector()));
}
if (LOG.isDebugging()) {
LOG.debugFine("db fuer derivator : " + ids.size());
}
MaterializedRelation<DoubleVector> prel = new MaterializedRelation<>(type, ids, null, prep);
proxy.addRelation(prel);
return proxy;
}
use of de.lmu.ifi.dbs.elki.database.ProxyDatabase in project elki by elki-project.
the class HiCS method run.
/**
* Perform HiCS on a given database.
*
* @param relation the database
* @return The aggregated resulting scores that were assigned by the given
* outlier detection algorithm
*/
public OutlierResult run(Relation<V> relation) {
final DBIDs ids = relation.getDBIDs();
ArrayList<ArrayDBIDs> subspaceIndex = buildOneDimIndexes(relation);
Set<HiCSSubspace> subspaces = calculateSubspaces(relation, subspaceIndex, rnd.getSingleThreadedRandom());
if (LOG.isVerbose()) {
LOG.verbose("Number of high-contrast subspaces: " + subspaces.size());
}
List<DoubleRelation> results = new ArrayList<>();
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Calculating Outlier scores for high Contrast subspaces", subspaces.size(), LOG) : null;
// relation instead of SubspaceEuclideanDistanceFunction?)
for (HiCSSubspace dimset : subspaces) {
if (LOG.isVerbose()) {
LOG.verbose("Performing outlier detection in subspace " + dimset);
}
ProxyDatabase pdb = new ProxyDatabase(ids);
pdb.addRelation(new ProjectedView<>(relation, new NumericalFeatureSelection<V>(dimset)));
// run LOF and collect the result
OutlierResult result = outlierAlgorithm.run(pdb);
results.add(result.getScores());
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
DoubleMinMax minmax = new DoubleMinMax();
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
double sum = 0.0;
for (DoubleRelation r : results) {
final double s = r.doubleValue(iditer);
if (!Double.isNaN(s)) {
sum += s;
}
}
scores.putDouble(iditer, sum);
minmax.put(sum);
}
OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax());
DoubleRelation scoreres = new MaterializedDoubleRelation("HiCS", "HiCS-outlier", scores, relation.getDBIDs());
return new OutlierResult(meta, scoreres);
}
use of de.lmu.ifi.dbs.elki.database.ProxyDatabase in project elki by elki-project.
the class SUBCLU method runDBSCAN.
/**
* Runs the DBSCAN algorithm on the specified partition of the database in the
* given subspace. If parameter {@code ids} is null DBSCAN will be applied to
* the whole database.
*
* @param relation the database holding the objects to run DBSCAN on
* @param ids the IDs of the database defining the partition to run DBSCAN on
* - if this parameter is null DBSCAN will be applied to the whole
* database
* @param subspace the subspace to run DBSCAN on
* @return the clustering result of the DBSCAN run
*/
private List<Cluster<Model>> runDBSCAN(Relation<V> relation, DBIDs ids, Subspace subspace) {
// distance function
distanceFunction.setSelectedDimensions(subspace.getDimensions());
ProxyDatabase proxy;
if (ids == null) {
// TODO: in this case, we might want to use an index - the proxy below
// will prevent this!
ids = relation.getDBIDs();
}
proxy = new ProxyDatabase(ids, relation);
DBSCAN<V> dbscan = new DBSCAN<>(distanceFunction, epsilon, minpts);
// run DBSCAN
if (LOG.isVerbose()) {
LOG.verbose("\nRun DBSCAN on subspace " + subspace.dimensonsToString());
}
Clustering<Model> dbsres = dbscan.run(proxy);
// separate cluster and noise
List<Cluster<Model>> clusterAndNoise = dbsres.getAllClusters();
List<Cluster<Model>> clusters = new ArrayList<>();
for (Cluster<Model> c : clusterAndNoise) {
if (!c.isNoise()) {
clusters.add(c);
}
}
return clusters;
}
use of de.lmu.ifi.dbs.elki.database.ProxyDatabase in project elki by elki-project.
the class XMeans method splitCluster.
/**
* Conditionally splits the clusters based on the information criterion.
*
* @param parentCluster Cluster to split
* @param database Database
* @param relation Data relation
* @return Parent cluster when split decreases clustering quality or child
* clusters when split improves clustering.
*/
protected List<Cluster<M>> splitCluster(Cluster<M> parentCluster, Database database, Relation<V> relation) {
// Transform parent cluster into a clustering
ArrayList<Cluster<M>> parentClusterList = new ArrayList<Cluster<M>>(1);
parentClusterList.add(parentCluster);
Clustering<M> parentClustering = new Clustering<>(parentCluster.getName(), parentCluster.getName(), parentClusterList);
if (parentCluster.size() < 2) {
// Split is not possbile
return parentClusterList;
}
ProxyDatabase proxyDB = new ProxyDatabase(parentCluster.getIDs(), database);
splitInitializer.setInitialMeans(splitCentroid(parentCluster, relation));
innerKMeans.setK(2);
Clustering<M> childClustering = innerKMeans.run(proxyDB);
double parentEvaluation = informationCriterion.quality(parentClustering, getDistanceFunction(), relation);
double childrenEvaluation = informationCriterion.quality(childClustering, getDistanceFunction(), relation);
if (LOG.isDebugging()) {
LOG.debug("parentEvaluation: " + parentEvaluation);
LOG.debug("childrenEvaluation: " + childrenEvaluation);
}
// Check if split is an improvement:
return (childrenEvaluation > parentEvaluation) ^ informationCriterion.ascending() ? parentClusterList : childClustering.getAllClusters();
}
Aggregations