use of de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation in project elki by elki-project.
the class RepresentativeUncertainClustering method run.
/**
* This run method will do the wrapping.
*
* Its called from {@link AbstractAlgorithm#run(Database)} and performs the
* call to the algorithms particular run method as well as the storing and
* comparison of the resulting Clusterings.
*
* @param database Database
* @param relation Data relation of uncertain objects
* @return Clustering result
*/
public Clustering<?> run(Database database, Relation<? extends UncertainObject> relation) {
ResultHierarchy hierarchy = database.getHierarchy();
ArrayList<Clustering<?>> clusterings = new ArrayList<>();
final int dim = RelationUtil.dimensionality(relation);
DBIDs ids = relation.getDBIDs();
// To collect samples
Result samples = new BasicResult("Samples", "samples");
// Step 1: Cluster sampled possible worlds:
Random rand = random.getSingleThreadedRandom();
FiniteProgress sampleP = LOG.isVerbose() ? new FiniteProgress("Clustering samples", numsamples, LOG) : null;
for (int i = 0; i < numsamples; i++) {
WritableDataStore<DoubleVector> store = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_DB, DoubleVector.class);
for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
store.put(iter, relation.get(iter).drawSample(rand));
}
clusterings.add(runClusteringAlgorithm(hierarchy, samples, ids, store, dim, "Sample " + i));
LOG.incrementProcessed(sampleP);
}
LOG.ensureCompleted(sampleP);
// Step 2: perform the meta clustering (on samples only).
DBIDRange rids = DBIDFactory.FACTORY.generateStaticDBIDRange(clusterings.size());
WritableDataStore<Clustering<?>> datastore = DataStoreUtil.makeStorage(rids, DataStoreFactory.HINT_DB, Clustering.class);
{
Iterator<Clustering<?>> it2 = clusterings.iterator();
for (DBIDIter iter = rids.iter(); iter.valid(); iter.advance()) {
datastore.put(iter, it2.next());
}
}
assert (rids.size() == clusterings.size());
// Build a relation, and a distance matrix.
Relation<Clustering<?>> crel = new MaterializedRelation<Clustering<?>>(Clustering.TYPE, rids, "Clusterings", datastore);
PrecomputedDistanceMatrix<Clustering<?>> mat = new PrecomputedDistanceMatrix<>(crel, rids, distance);
mat.initialize();
ProxyDatabase d = new ProxyDatabase(rids, crel);
d.getHierarchy().add(crel, mat);
Clustering<?> c = metaAlgorithm.run(d);
// Detach from database
d.getHierarchy().remove(d, c);
// Evaluation
Result reps = new BasicResult("Representants", "representative");
hierarchy.add(relation, reps);
DistanceQuery<Clustering<?>> dq = mat.getDistanceQuery(distance);
List<? extends Cluster<?>> cl = c.getAllClusters();
List<DoubleObjPair<Clustering<?>>> evaluated = new ArrayList<>(cl.size());
for (Cluster<?> clus : cl) {
double besttau = Double.POSITIVE_INFINITY;
Clustering<?> bestc = null;
for (DBIDIter it1 = clus.getIDs().iter(); it1.valid(); it1.advance()) {
double tau = 0.;
Clustering<?> curc = crel.get(it1);
for (DBIDIter it2 = clus.getIDs().iter(); it2.valid(); it2.advance()) {
if (DBIDUtil.equal(it1, it2)) {
continue;
}
double di = dq.distance(curc, it2);
tau = di > tau ? di : tau;
}
// Cluster member with the least maximum distance.
if (tau < besttau) {
besttau = tau;
bestc = curc;
}
}
if (bestc == null) {
// E.g. degenerate empty clusters
continue;
}
// Global tau:
double gtau = 0.;
for (DBIDIter it2 = crel.iterDBIDs(); it2.valid(); it2.advance()) {
double di = dq.distance(bestc, it2);
gtau = di > gtau ? di : gtau;
}
final double cprob = computeConfidence(clus.size(), crel.size());
// Build an evaluation result
hierarchy.add(bestc, new RepresentativenessEvaluation(gtau, besttau, cprob));
evaluated.add(new DoubleObjPair<Clustering<?>>(cprob, bestc));
}
// Sort evaluated results by confidence:
Collections.sort(evaluated, Collections.reverseOrder());
for (DoubleObjPair<Clustering<?>> pair : evaluated) {
// Attach parent relation (= sample) to the representative samples.
for (It<Relation<?>> it = hierarchy.iterParents(pair.second).filter(Relation.class); it.valid(); it.advance()) {
hierarchy.add(reps, it.get());
}
}
// Add the random samples below the representative results only:
if (keep) {
hierarchy.add(relation, samples);
} else {
hierarchy.removeSubtree(samples);
}
return c;
}
use of de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation in project elki by elki-project.
the class RepresentativeUncertainClustering method runClusteringAlgorithm.
/**
* Run a clustering algorithm on a single instance.
*
* @param parent Parent result to attach to
* @param ids Object IDs to process
* @param store Input data
* @param dim Dimensionality
* @param title Title of relation
* @return Clustering result
*/
protected Clustering<?> runClusteringAlgorithm(ResultHierarchy hierarchy, Result parent, DBIDs ids, DataStore<DoubleVector> store, int dim, String title) {
SimpleTypeInformation<DoubleVector> t = new VectorFieldTypeInformation<>(DoubleVector.FACTORY, dim);
Relation<DoubleVector> sample = new MaterializedRelation<>(t, ids, title, store);
ProxyDatabase d = new ProxyDatabase(ids, sample);
Clustering<?> clusterResult = samplesAlgorithm.run(d);
d.getHierarchy().remove(sample);
d.getHierarchy().remove(clusterResult);
hierarchy.add(parent, sample);
hierarchy.add(sample, clusterResult);
return clusterResult;
}
use of de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation in project elki by elki-project.
the class GreedyEnsembleExperiment method applyPrescaling.
/**
* Prescale each vector (except when in {@code skip}) with the given scaling
* function.
*
* @param scaling Scaling function
* @param relation Relation to read
* @param skip DBIDs to pass unmodified
* @return New relation
*/
public static Relation<NumberVector> applyPrescaling(ScalingFunction scaling, Relation<NumberVector> relation, DBIDs skip) {
if (scaling == null) {
return relation;
}
NumberVector.Factory<NumberVector> factory = RelationUtil.getNumberVectorFactory(relation);
DBIDs ids = relation.getDBIDs();
WritableDataStore<NumberVector> contents = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_HOT, NumberVector.class);
for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
NumberVector v = relation.get(iter);
double[] raw = v.toArray();
if (!skip.contains(iter)) {
applyScaling(raw, scaling);
}
contents.put(iter, factory.newNumberVector(raw, ArrayLikeUtil.DOUBLEARRAYADAPTER));
}
return new MaterializedRelation<>(relation.getDataTypeInformation(), ids, "rescaled", contents);
}
use of de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation in project elki by elki-project.
the class StaticArrayDatabase method initialize.
/**
* Initialize the database by getting the initial data from the database
* connection.
*/
@Override
public void initialize() {
if (databaseConnection != null) {
if (LOG.isDebugging()) {
LOG.debugFine("Loading data from database connection.");
}
MultipleObjectsBundle bundle = databaseConnection.loadData();
// Run at most once.
databaseConnection = null;
// Find DBIDs for bundle
{
DBIDs bids = bundle.getDBIDs();
if (bids instanceof ArrayStaticDBIDs) {
this.ids = (ArrayStaticDBIDs) bids;
} else if (bids == null) {
this.ids = DBIDUtil.generateStaticDBIDRange(bundle.dataLength());
} else {
this.ids = (ArrayStaticDBIDs) DBIDUtil.makeUnmodifiable(bids);
}
}
// Replace id representation (it would be nicer if we would not need
// DBIDView at all)
this.idrep = new DBIDView(this.ids);
relations.add(this.idrep);
getHierarchy().add(this, idrep);
DBIDArrayIter it = this.ids.iter();
int numrel = bundle.metaLength();
for (int i = 0; i < numrel; i++) {
SimpleTypeInformation<?> meta = bundle.meta(i);
@SuppressWarnings("unchecked") SimpleTypeInformation<Object> ometa = (SimpleTypeInformation<Object>) meta;
WritableDataStore<Object> store = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_DB, ometa.getRestrictionClass());
for (it.seek(0); it.valid(); it.advance()) {
store.put(it, bundle.data(it.getOffset(), i));
}
Relation<?> relation = new MaterializedRelation<>(ometa, ids, null, store);
relations.add(relation);
getHierarchy().add(this, relation);
// Try to add indexes where appropriate
for (IndexFactory<?, ?> factory : indexFactories) {
if (factory.getInputTypeRestriction().isAssignableFromType(ometa)) {
@SuppressWarnings("unchecked") final IndexFactory<Object, ?> ofact = (IndexFactory<Object, ?>) factory;
@SuppressWarnings("unchecked") final Relation<Object> orep = (Relation<Object>) relation;
final Index index = ofact.instantiate(orep);
Duration duration = LOG.isStatistics() ? LOG.newDuration(index.getClass().getName() + ".construction").begin() : null;
index.initialize();
if (duration != null) {
LOG.statistics(duration.end());
}
getHierarchy().add(relation, index);
}
}
}
// fire insertion event
eventManager.fireObjectsInserted(ids);
}
}
use of de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation in project elki by elki-project.
the class SOD method run.
/**
* Performs the SOD algorithm on the given database.
*
* @param relation Data relation to process
* @return Outlier result
*/
public OutlierResult run(Relation<V> relation) {
SimilarityQuery<V> snnInstance = similarityFunction.instantiate(relation);
FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Assigning Subspace Outlier Degree", relation.size(), LOG) : null;
final WritableDoubleDataStore sod_scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
WritableDataStore<SODModel> sod_models = null;
if (models) {
// Models requested
sod_models = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC, SODModel.class);
}
DoubleMinMax minmax = new DoubleMinMax();
for (DBIDIter iter = relation.iterDBIDs(); iter.valid(); iter.advance()) {
LOG.incrementProcessed(progress);
DBIDs neighborhood = getNearestNeighbors(relation, snnInstance, iter);
double[] center;
long[] weightVector;
double sod;
if (neighborhood.size() > 0) {
center = Centroid.make(relation, neighborhood).getArrayRef();
// Note: per-dimension variances; no covariances.
double[] variances = computePerDimensionVariances(relation, center, neighborhood);
double expectationOfVariance = Mean.of(variances);
weightVector = BitsUtil.zero(variances.length);
for (int d = 0; d < variances.length; d++) {
if (variances[d] < alpha * expectationOfVariance) {
BitsUtil.setI(weightVector, d);
}
}
sod = subspaceOutlierDegree(relation.get(iter), center, weightVector);
} else {
center = relation.get(iter).toArray();
weightVector = null;
sod = 0.;
}
if (sod_models != null) {
sod_models.put(iter, new SODModel(center, weightVector));
}
sod_scores.putDouble(iter, sod);
minmax.put(sod);
}
LOG.ensureCompleted(progress);
// combine results.
OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax());
OutlierResult sodResult = new OutlierResult(meta, new MaterializedDoubleRelation("Subspace Outlier Degree", "sod-outlier", sod_scores, relation.getDBIDs()));
if (sod_models != null) {
Relation<SODModel> models = new MaterializedRelation<>("Subspace Outlier Model", "sod-outlier", new SimpleTypeInformation<>(SODModel.class), sod_models, relation.getDBIDs());
sodResult.addChildResult(models);
}
return sodResult;
}
Aggregations