Search in sources :

Example 6 with MaterializedRelation

use of de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation in project elki by elki-project.

the class RepresentativeUncertainClustering method run.

/**
 * This run method will do the wrapping.
 *
 * Its called from {@link AbstractAlgorithm#run(Database)} and performs the
 * call to the algorithms particular run method as well as the storing and
 * comparison of the resulting Clusterings.
 *
 * @param database Database
 * @param relation Data relation of uncertain objects
 * @return Clustering result
 */
public Clustering<?> run(Database database, Relation<? extends UncertainObject> relation) {
    ResultHierarchy hierarchy = database.getHierarchy();
    ArrayList<Clustering<?>> clusterings = new ArrayList<>();
    final int dim = RelationUtil.dimensionality(relation);
    DBIDs ids = relation.getDBIDs();
    // To collect samples
    Result samples = new BasicResult("Samples", "samples");
    // Step 1: Cluster sampled possible worlds:
    Random rand = random.getSingleThreadedRandom();
    FiniteProgress sampleP = LOG.isVerbose() ? new FiniteProgress("Clustering samples", numsamples, LOG) : null;
    for (int i = 0; i < numsamples; i++) {
        WritableDataStore<DoubleVector> store = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_DB, DoubleVector.class);
        for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
            store.put(iter, relation.get(iter).drawSample(rand));
        }
        clusterings.add(runClusteringAlgorithm(hierarchy, samples, ids, store, dim, "Sample " + i));
        LOG.incrementProcessed(sampleP);
    }
    LOG.ensureCompleted(sampleP);
    // Step 2: perform the meta clustering (on samples only).
    DBIDRange rids = DBIDFactory.FACTORY.generateStaticDBIDRange(clusterings.size());
    WritableDataStore<Clustering<?>> datastore = DataStoreUtil.makeStorage(rids, DataStoreFactory.HINT_DB, Clustering.class);
    {
        Iterator<Clustering<?>> it2 = clusterings.iterator();
        for (DBIDIter iter = rids.iter(); iter.valid(); iter.advance()) {
            datastore.put(iter, it2.next());
        }
    }
    assert (rids.size() == clusterings.size());
    // Build a relation, and a distance matrix.
    Relation<Clustering<?>> crel = new MaterializedRelation<Clustering<?>>(Clustering.TYPE, rids, "Clusterings", datastore);
    PrecomputedDistanceMatrix<Clustering<?>> mat = new PrecomputedDistanceMatrix<>(crel, rids, distance);
    mat.initialize();
    ProxyDatabase d = new ProxyDatabase(rids, crel);
    d.getHierarchy().add(crel, mat);
    Clustering<?> c = metaAlgorithm.run(d);
    // Detach from database
    d.getHierarchy().remove(d, c);
    // Evaluation
    Result reps = new BasicResult("Representants", "representative");
    hierarchy.add(relation, reps);
    DistanceQuery<Clustering<?>> dq = mat.getDistanceQuery(distance);
    List<? extends Cluster<?>> cl = c.getAllClusters();
    List<DoubleObjPair<Clustering<?>>> evaluated = new ArrayList<>(cl.size());
    for (Cluster<?> clus : cl) {
        double besttau = Double.POSITIVE_INFINITY;
        Clustering<?> bestc = null;
        for (DBIDIter it1 = clus.getIDs().iter(); it1.valid(); it1.advance()) {
            double tau = 0.;
            Clustering<?> curc = crel.get(it1);
            for (DBIDIter it2 = clus.getIDs().iter(); it2.valid(); it2.advance()) {
                if (DBIDUtil.equal(it1, it2)) {
                    continue;
                }
                double di = dq.distance(curc, it2);
                tau = di > tau ? di : tau;
            }
            // Cluster member with the least maximum distance.
            if (tau < besttau) {
                besttau = tau;
                bestc = curc;
            }
        }
        if (bestc == null) {
            // E.g. degenerate empty clusters
            continue;
        }
        // Global tau:
        double gtau = 0.;
        for (DBIDIter it2 = crel.iterDBIDs(); it2.valid(); it2.advance()) {
            double di = dq.distance(bestc, it2);
            gtau = di > gtau ? di : gtau;
        }
        final double cprob = computeConfidence(clus.size(), crel.size());
        // Build an evaluation result
        hierarchy.add(bestc, new RepresentativenessEvaluation(gtau, besttau, cprob));
        evaluated.add(new DoubleObjPair<Clustering<?>>(cprob, bestc));
    }
    // Sort evaluated results by confidence:
    Collections.sort(evaluated, Collections.reverseOrder());
    for (DoubleObjPair<Clustering<?>> pair : evaluated) {
        // Attach parent relation (= sample) to the representative samples.
        for (It<Relation<?>> it = hierarchy.iterParents(pair.second).filter(Relation.class); it.valid(); it.advance()) {
            hierarchy.add(reps, it.get());
        }
    }
    // Add the random samples below the representative results only:
    if (keep) {
        hierarchy.add(relation, samples);
    } else {
        hierarchy.removeSubtree(samples);
    }
    return c;
}
Also used : ArrayList(java.util.ArrayList) Result(de.lmu.ifi.dbs.elki.result.Result) EvaluationResult(de.lmu.ifi.dbs.elki.result.EvaluationResult) BasicResult(de.lmu.ifi.dbs.elki.result.BasicResult) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) MaterializedRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation) MaterializedRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation) Relation(de.lmu.ifi.dbs.elki.database.relation.Relation) Random(java.util.Random) BasicResult(de.lmu.ifi.dbs.elki.result.BasicResult) Iterator(java.util.Iterator) ResultHierarchy(de.lmu.ifi.dbs.elki.result.ResultHierarchy) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) ProxyDatabase(de.lmu.ifi.dbs.elki.database.ProxyDatabase) PrecomputedDistanceMatrix(de.lmu.ifi.dbs.elki.index.distancematrix.PrecomputedDistanceMatrix) Clustering(de.lmu.ifi.dbs.elki.data.Clustering) DoubleObjPair(de.lmu.ifi.dbs.elki.utilities.pairs.DoubleObjPair) DBIDRange(de.lmu.ifi.dbs.elki.database.ids.DBIDRange) DoubleVector(de.lmu.ifi.dbs.elki.data.DoubleVector)

Example 7 with MaterializedRelation

use of de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation in project elki by elki-project.

the class RepresentativeUncertainClustering method runClusteringAlgorithm.

/**
 * Run a clustering algorithm on a single instance.
 *
 * @param parent Parent result to attach to
 * @param ids Object IDs to process
 * @param store Input data
 * @param dim Dimensionality
 * @param title Title of relation
 * @return Clustering result
 */
protected Clustering<?> runClusteringAlgorithm(ResultHierarchy hierarchy, Result parent, DBIDs ids, DataStore<DoubleVector> store, int dim, String title) {
    SimpleTypeInformation<DoubleVector> t = new VectorFieldTypeInformation<>(DoubleVector.FACTORY, dim);
    Relation<DoubleVector> sample = new MaterializedRelation<>(t, ids, title, store);
    ProxyDatabase d = new ProxyDatabase(ids, sample);
    Clustering<?> clusterResult = samplesAlgorithm.run(d);
    d.getHierarchy().remove(sample);
    d.getHierarchy().remove(clusterResult);
    hierarchy.add(parent, sample);
    hierarchy.add(sample, clusterResult);
    return clusterResult;
}
Also used : VectorFieldTypeInformation(de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation) ProxyDatabase(de.lmu.ifi.dbs.elki.database.ProxyDatabase) DoubleVector(de.lmu.ifi.dbs.elki.data.DoubleVector) MaterializedRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation)

Example 8 with MaterializedRelation

use of de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation in project elki by elki-project.

the class GreedyEnsembleExperiment method applyPrescaling.

/**
 * Prescale each vector (except when in {@code skip}) with the given scaling
 * function.
 *
 * @param scaling Scaling function
 * @param relation Relation to read
 * @param skip DBIDs to pass unmodified
 * @return New relation
 */
public static Relation<NumberVector> applyPrescaling(ScalingFunction scaling, Relation<NumberVector> relation, DBIDs skip) {
    if (scaling == null) {
        return relation;
    }
    NumberVector.Factory<NumberVector> factory = RelationUtil.getNumberVectorFactory(relation);
    DBIDs ids = relation.getDBIDs();
    WritableDataStore<NumberVector> contents = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_HOT, NumberVector.class);
    for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
        NumberVector v = relation.get(iter);
        double[] raw = v.toArray();
        if (!skip.contains(iter)) {
            applyScaling(raw, scaling);
        }
        contents.put(iter, factory.newNumberVector(raw, ArrayLikeUtil.DOUBLEARRAYADAPTER));
    }
    return new MaterializedRelation<>(relation.getDataTypeInformation(), ids, "rescaled", contents);
}
Also used : NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) HashSetModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) MaterializedRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation)

Example 9 with MaterializedRelation

use of de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation in project elki by elki-project.

the class StaticArrayDatabase method initialize.

/**
 * Initialize the database by getting the initial data from the database
 * connection.
 */
@Override
public void initialize() {
    if (databaseConnection != null) {
        if (LOG.isDebugging()) {
            LOG.debugFine("Loading data from database connection.");
        }
        MultipleObjectsBundle bundle = databaseConnection.loadData();
        // Run at most once.
        databaseConnection = null;
        // Find DBIDs for bundle
        {
            DBIDs bids = bundle.getDBIDs();
            if (bids instanceof ArrayStaticDBIDs) {
                this.ids = (ArrayStaticDBIDs) bids;
            } else if (bids == null) {
                this.ids = DBIDUtil.generateStaticDBIDRange(bundle.dataLength());
            } else {
                this.ids = (ArrayStaticDBIDs) DBIDUtil.makeUnmodifiable(bids);
            }
        }
        // Replace id representation (it would be nicer if we would not need
        // DBIDView at all)
        this.idrep = new DBIDView(this.ids);
        relations.add(this.idrep);
        getHierarchy().add(this, idrep);
        DBIDArrayIter it = this.ids.iter();
        int numrel = bundle.metaLength();
        for (int i = 0; i < numrel; i++) {
            SimpleTypeInformation<?> meta = bundle.meta(i);
            @SuppressWarnings("unchecked") SimpleTypeInformation<Object> ometa = (SimpleTypeInformation<Object>) meta;
            WritableDataStore<Object> store = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_DB, ometa.getRestrictionClass());
            for (it.seek(0); it.valid(); it.advance()) {
                store.put(it, bundle.data(it.getOffset(), i));
            }
            Relation<?> relation = new MaterializedRelation<>(ometa, ids, null, store);
            relations.add(relation);
            getHierarchy().add(this, relation);
            // Try to add indexes where appropriate
            for (IndexFactory<?, ?> factory : indexFactories) {
                if (factory.getInputTypeRestriction().isAssignableFromType(ometa)) {
                    @SuppressWarnings("unchecked") final IndexFactory<Object, ?> ofact = (IndexFactory<Object, ?>) factory;
                    @SuppressWarnings("unchecked") final Relation<Object> orep = (Relation<Object>) relation;
                    final Index index = ofact.instantiate(orep);
                    Duration duration = LOG.isStatistics() ? LOG.newDuration(index.getClass().getName() + ".construction").begin() : null;
                    index.initialize();
                    if (duration != null) {
                        LOG.statistics(duration.end());
                    }
                    getHierarchy().add(relation, index);
                }
            }
        }
        // fire insertion event
        eventManager.fireObjectsInserted(ids);
    }
}
Also used : DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) ArrayStaticDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayStaticDBIDs) MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) SimpleTypeInformation(de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation) DBIDArrayIter(de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter) Index(de.lmu.ifi.dbs.elki.index.Index) Duration(de.lmu.ifi.dbs.elki.logging.statistics.Duration) ArrayStaticDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayStaticDBIDs) MaterializedRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation) Relation(de.lmu.ifi.dbs.elki.database.relation.Relation) MaterializedRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation) IndexFactory(de.lmu.ifi.dbs.elki.index.IndexFactory) DBIDView(de.lmu.ifi.dbs.elki.database.relation.DBIDView)

Example 10 with MaterializedRelation

use of de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation in project elki by elki-project.

the class SOD method run.

/**
 * Performs the SOD algorithm on the given database.
 *
 * @param relation Data relation to process
 * @return Outlier result
 */
public OutlierResult run(Relation<V> relation) {
    SimilarityQuery<V> snnInstance = similarityFunction.instantiate(relation);
    FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Assigning Subspace Outlier Degree", relation.size(), LOG) : null;
    final WritableDoubleDataStore sod_scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
    WritableDataStore<SODModel> sod_models = null;
    if (models) {
        // Models requested
        sod_models = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC, SODModel.class);
    }
    DoubleMinMax minmax = new DoubleMinMax();
    for (DBIDIter iter = relation.iterDBIDs(); iter.valid(); iter.advance()) {
        LOG.incrementProcessed(progress);
        DBIDs neighborhood = getNearestNeighbors(relation, snnInstance, iter);
        double[] center;
        long[] weightVector;
        double sod;
        if (neighborhood.size() > 0) {
            center = Centroid.make(relation, neighborhood).getArrayRef();
            // Note: per-dimension variances; no covariances.
            double[] variances = computePerDimensionVariances(relation, center, neighborhood);
            double expectationOfVariance = Mean.of(variances);
            weightVector = BitsUtil.zero(variances.length);
            for (int d = 0; d < variances.length; d++) {
                if (variances[d] < alpha * expectationOfVariance) {
                    BitsUtil.setI(weightVector, d);
                }
            }
            sod = subspaceOutlierDegree(relation.get(iter), center, weightVector);
        } else {
            center = relation.get(iter).toArray();
            weightVector = null;
            sod = 0.;
        }
        if (sod_models != null) {
            sod_models.put(iter, new SODModel(center, weightVector));
        }
        sod_scores.putDouble(iter, sod);
        minmax.put(sod);
    }
    LOG.ensureCompleted(progress);
    // combine results.
    OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax());
    OutlierResult sodResult = new OutlierResult(meta, new MaterializedDoubleRelation("Subspace Outlier Degree", "sod-outlier", sod_scores, relation.getDBIDs()));
    if (sod_models != null) {
        Relation<SODModel> models = new MaterializedRelation<>("Subspace Outlier Model", "sod-outlier", new SimpleTypeInformation<>(SODModel.class), sod_models, relation.getDBIDs());
        sodResult.addChildResult(models);
    }
    return sodResult;
}
Also used : WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) MaterializedRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)

Aggregations

MaterializedRelation (de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation)15 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)10 VectorFieldTypeInformation (de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation)7 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)7 DoubleVector (de.lmu.ifi.dbs.elki.data.DoubleVector)6 ProxyDatabase (de.lmu.ifi.dbs.elki.database.ProxyDatabase)6 SimpleTypeInformation (de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation)4 DBIDArrayIter (de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter)4 ModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)3 Relation (de.lmu.ifi.dbs.elki.database.relation.Relation)3 ParameterizationFunction (de.lmu.ifi.dbs.elki.algorithm.clustering.correlation.cash.ParameterizationFunction)2 Index (de.lmu.ifi.dbs.elki.index.Index)2 IndexFactory (de.lmu.ifi.dbs.elki.index.IndexFactory)2 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)2 Clustering (de.lmu.ifi.dbs.elki.data.Clustering)1 NumberVector (de.lmu.ifi.dbs.elki.data.NumberVector)1 WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)1 ArrayModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs)1 ArrayStaticDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayStaticDBIDs)1 DBIDRange (de.lmu.ifi.dbs.elki.database.ids.DBIDRange)1