Search in sources :

Example 26 with DBIDArrayIter

use of de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter in project elki by elki-project.

the class StaticArrayDatabase method initialize.

/**
 * Initialize the database by getting the initial data from the database
 * connection.
 */
@Override
public void initialize() {
    if (databaseConnection != null) {
        if (LOG.isDebugging()) {
            LOG.debugFine("Loading data from database connection.");
        }
        MultipleObjectsBundle bundle = databaseConnection.loadData();
        // Run at most once.
        databaseConnection = null;
        // Find DBIDs for bundle
        {
            DBIDs bids = bundle.getDBIDs();
            if (bids instanceof ArrayStaticDBIDs) {
                this.ids = (ArrayStaticDBIDs) bids;
            } else if (bids == null) {
                this.ids = DBIDUtil.generateStaticDBIDRange(bundle.dataLength());
            } else {
                this.ids = (ArrayStaticDBIDs) DBIDUtil.makeUnmodifiable(bids);
            }
        }
        // Replace id representation (it would be nicer if we would not need
        // DBIDView at all)
        this.idrep = new DBIDView(this.ids);
        relations.add(this.idrep);
        getHierarchy().add(this, idrep);
        DBIDArrayIter it = this.ids.iter();
        int numrel = bundle.metaLength();
        for (int i = 0; i < numrel; i++) {
            SimpleTypeInformation<?> meta = bundle.meta(i);
            @SuppressWarnings("unchecked") SimpleTypeInformation<Object> ometa = (SimpleTypeInformation<Object>) meta;
            WritableDataStore<Object> store = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_DB, ometa.getRestrictionClass());
            for (it.seek(0); it.valid(); it.advance()) {
                store.put(it, bundle.data(it.getOffset(), i));
            }
            Relation<?> relation = new MaterializedRelation<>(ometa, ids, null, store);
            relations.add(relation);
            getHierarchy().add(this, relation);
            // Try to add indexes where appropriate
            for (IndexFactory<?, ?> factory : indexFactories) {
                if (factory.getInputTypeRestriction().isAssignableFromType(ometa)) {
                    @SuppressWarnings("unchecked") final IndexFactory<Object, ?> ofact = (IndexFactory<Object, ?>) factory;
                    @SuppressWarnings("unchecked") final Relation<Object> orep = (Relation<Object>) relation;
                    final Index index = ofact.instantiate(orep);
                    Duration duration = LOG.isStatistics() ? LOG.newDuration(index.getClass().getName() + ".construction").begin() : null;
                    index.initialize();
                    if (duration != null) {
                        LOG.statistics(duration.end());
                    }
                    getHierarchy().add(relation, index);
                }
            }
        }
        // fire insertion event
        eventManager.fireObjectsInserted(ids);
    }
}
Also used : DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) ArrayStaticDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayStaticDBIDs) MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) SimpleTypeInformation(de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation) DBIDArrayIter(de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter) Index(de.lmu.ifi.dbs.elki.index.Index) Duration(de.lmu.ifi.dbs.elki.logging.statistics.Duration) ArrayStaticDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayStaticDBIDs) MaterializedRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation) Relation(de.lmu.ifi.dbs.elki.database.relation.Relation) MaterializedRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation) IndexFactory(de.lmu.ifi.dbs.elki.index.IndexFactory) DBIDView(de.lmu.ifi.dbs.elki.database.relation.DBIDView)

Example 27 with DBIDArrayIter

use of de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter in project elki by elki-project.

the class SigmoidOutlierScalingFunction method prepare.

@Override
public void prepare(OutlierResult or) {
    // Initial parameters - are these defaults sounds?
    MeanVariance mv = new MeanVariance();
    DoubleRelation scores = or.getScores();
    for (DBIDIter id = scores.iterDBIDs(); id.valid(); id.advance()) {
        double val = scores.doubleValue(id);
        mv.put(val);
    }
    double a = 1.0;
    double b = -mv.getMean();
    int iter = 0;
    ArrayDBIDs ids = DBIDUtil.ensureArray(or.getScores().getDBIDs());
    DBIDArrayIter it = ids.iter();
    long[] t = BitsUtil.zero(ids.size());
    boolean changing = true;
    while (changing) {
        changing = false;
        // E-Step
        it.seek(0);
        for (int i = 0; i < ids.size(); i++, it.advance()) {
            double val = or.getScores().doubleValue(it);
            double targ = a * val + b;
            if (targ > 0) {
                if (!BitsUtil.get(t, i)) {
                    BitsUtil.setI(t, i);
                    changing = true;
                }
            } else {
                if (BitsUtil.get(t, i)) {
                    BitsUtil.clearI(t, i);
                    changing = true;
                }
            }
        }
        if (!changing) {
            break;
        }
        // logger.debugFine("Number of outliers in sigmoid: " + t.cardinality());
        // M-Step
        // Implementation based on:<br />
        // H.-T. Lin, C.-J. Lin, R. C. Weng:<br />
        // A Note on Platt’s Probabilistic Outputs for Support Vector Machines
        {
            double[] newab = MStepLevenbergMarquardt(a, b, ids, t, or.getScores());
            a = newab[0];
            b = newab[1];
        }
        iter++;
        if (iter > 100) {
            LOG.warning("Max iterations met in sigmoid fitting.");
            break;
        }
    }
    Afinal = a;
    Bfinal = b;
    LOG.debugFine("A = " + Afinal + " B = " + Bfinal);
}
Also used : MeanVariance(de.lmu.ifi.dbs.elki.math.MeanVariance) ArrayDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs) DBIDArrayIter(de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 28 with DBIDArrayIter

use of de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter in project elki by elki-project.

the class SigmoidOutlierScalingFunction method MStepLevenbergMarquardt.

/**
 * M-Step using a modified Levenberg-Marquardt method.
 *
 * <p>
 * Implementation based on:<br />
 * H.-T. Lin, C.-J. Lin, R. C. Weng:<br />
 * A Note on Platt’s Probabilistic Outputs for Support Vector Machines
 * </p>
 *
 * @param a A parameter
 * @param b B parameter
 * @param ids Ids to process
 * @param t Bitset containing the assignment
 * @param scores Scores
 * @return new values for A and B.
 */
private final double[] MStepLevenbergMarquardt(double a, double b, ArrayDBIDs ids, long[] t, DoubleRelation scores) {
    final int prior1 = BitsUtil.cardinality(t);
    final int prior0 = ids.size() - prior1;
    DBIDArrayIter iter = ids.iter();
    final int maxiter = 10;
    final double minstep = 1e-8;
    final double sigma = 1e-12;
    // target value for "set" objects
    final double loTarget = (prior1 + 1.0) / (prior1 + 2.0);
    // target value for "unset" objects
    final double hiTarget = 1.0 / (prior0 + 2.0);
    // t[i] := t.get(i) ? hiTarget : loTarget.
    // Reset, or continue with previous values?
    // a = 0.0;
    // b = FastMath.log((prior0 + 1.0) / (prior1 + 1.0));
    double fval = 0.0;
    iter.seek(0);
    for (int i = 0; i < ids.size(); i++, iter.advance()) {
        final double val = scores.doubleValue(iter);
        final double fApB = val * a + b;
        final double ti = BitsUtil.get(t, i) ? hiTarget : loTarget;
        if (fApB >= 0) {
            fval += ti * fApB + FastMath.log(1 + FastMath.exp(-fApB));
        } else {
            fval += (ti - 1) * fApB + FastMath.log(1 + FastMath.exp(fApB));
        }
    }
    for (int it = 0; it < maxiter; it++) {
        // logger.debugFinest("Iter: " + it + "a: " + a + " b: " + b);
        // Update Gradient and Hessian (use H’ = H + sigma I)
        double h11 = sigma;
        double h22 = sigma;
        double h21 = 0.0;
        double g1 = 0.0;
        double g2 = 0.0;
        iter.seek(0);
        for (int i = 0; i < ids.size(); i++, iter.advance()) {
            final double val = scores.doubleValue(iter);
            final double fApB = val * a + b;
            final double p;
            final double q;
            if (fApB >= 0) {
                p = FastMath.exp(-fApB) / (1.0 + FastMath.exp(-fApB));
                q = 1.0 / (1.0 + FastMath.exp(-fApB));
            } else {
                p = 1.0 / (1.0 + FastMath.exp(fApB));
                q = FastMath.exp(fApB) / (1.0 + FastMath.exp(fApB));
            }
            final double d2 = p * q;
            h11 += val * val * d2;
            h22 += d2;
            h21 += val * d2;
            final double d1 = (BitsUtil.get(t, i) ? hiTarget : loTarget) - p;
            g1 += val * d1;
            g2 += d1;
        }
        // Stop condition
        if (Math.abs(g1) < 1e-5 && Math.abs(g2) < 1e-5) {
            break;
        }
        // Compute modified Newton directions
        final double det = h11 * h22 - h21 * h21;
        final double dA = -(h22 * g1 - h21 * g2) / det;
        final double dB = -(-h21 * g1 + h11 * g2) / det;
        final double gd = g1 * dA + g2 * dB;
        double stepsize = 1.0;
        while (stepsize >= minstep) {
            // Line search
            final double newA = a + stepsize * dA;
            final double newB = b + stepsize * dB;
            double newf = 0.0;
            iter.seek(0);
            for (int i = 0; i < ids.size(); i++, iter.advance()) {
                final double val = scores.doubleValue(iter);
                final double fApB = val * newA + newB;
                final double ti = BitsUtil.get(t, i) ? hiTarget : loTarget;
                if (fApB >= 0) {
                    newf += ti * fApB + FastMath.log(1 + FastMath.exp(-fApB));
                } else {
                    newf += (ti - 1) * fApB + FastMath.log(1 + FastMath.exp(fApB));
                }
            }
            if (newf < fval + 0.0001 * stepsize * gd) {
                a = newA;
                b = newB;
                fval = newf;
                // Sufficient decrease satisfied
                break;
            } else {
                stepsize /= 2.0;
            }
            if (stepsize < minstep) {
                LOG.debug("Minstep hit.");
                break;
            }
        }
        if (it + 1 >= maxiter) {
            LOG.debug("Maximum iterations hit.");
            break;
        }
    }
    return new double[] { a, b };
}
Also used : DBIDArrayIter(de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter)

Example 29 with DBIDArrayIter

use of de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter in project elki by elki-project.

the class ComputeSimilarityMatrixImage method computeSimilarityMatrixImage.

/**
 * Compute the actual similarity image.
 *
 * @param relation Relation
 * @param iter DBID iterator
 * @return result object
 */
private SimilarityMatrix computeSimilarityMatrixImage(Relation<O> relation, DBIDIter iter) {
    ArrayModifiableDBIDs order = DBIDUtil.newArray(relation.size());
    for (; iter.valid(); iter.advance()) {
        order.add(iter);
    }
    if (order.size() != relation.size()) {
        throw new IllegalStateException("Iterable result doesn't match database size - incomplete ordering?");
    }
    DistanceQuery<O> dq = distanceFunction.instantiate(relation);
    final int size = order.size();
    // When the logging is in the outer loop, it's just 2*size (providing enough
    // resolution)
    // size * (size + 1);
    final int ltotal = 2 * size;
    FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Similarity Matrix Image", ltotal, LOG) : null;
    // Note: we assume that we have an efficient distance cache available,
    // since we are using 2*O(n*n) distance computations.
    DoubleMinMax minmax = new DoubleMinMax();
    {
        DBIDArrayIter id1 = order.iter();
        DBIDArrayIter id2 = order.iter();
        for (; id1.valid(); id1.advance()) {
            id2.seek(id1.getOffset());
            for (; id2.valid(); id2.advance()) {
                final double dist = dq.distance(id1, id2);
                if (!Double.isNaN(dist) && !Double.isInfinite(dist)) /* && dist > 0.0 */
                {
                    if (!skipzero || dist > 0.0) {
                        minmax.put(dist);
                    }
                }
            }
            LOG.incrementProcessed(prog);
        }
    }
    double zoom = minmax.getMax() - minmax.getMin();
    if (zoom > 0.0) {
        zoom = 1. / zoom;
    }
    LinearScaling scale = new LinearScaling(zoom, -minmax.getMin() * zoom);
    BufferedImage img = new BufferedImage(size, size, BufferedImage.TYPE_INT_RGB);
    {
        DBIDArrayIter id1 = order.iter();
        DBIDArrayIter id2 = order.iter();
        for (int x = 0; x < size && id1.valid(); x++, id1.advance()) {
            id2.seek(id1.getOffset());
            for (int y = x; y < size && id2.valid(); y++, id2.advance()) {
                double ddist = dq.distance(id1, id2);
                if (ddist > 0.0) {
                    ddist = scale.getScaled(ddist);
                }
                // Apply extra scaling
                if (scaling != null) {
                    ddist = scaling.getScaled(ddist);
                }
                int dist = 0xFF & (int) (255 * ddist);
                int col = 0xff000000 | (dist << 16) | (dist << 8) | dist;
                img.setRGB(x, y, col);
                img.setRGB(y, x, col);
            }
            LOG.incrementProcessed(prog);
        }
    }
    LOG.ensureCompleted(prog);
    return new SimilarityMatrix(img, relation, order);
}
Also used : LinearScaling(de.lmu.ifi.dbs.elki.utilities.scaling.LinearScaling) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) DBIDArrayIter(de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter) BufferedImage(java.awt.image.BufferedImage) ImageIO(javax.imageio.ImageIO)

Example 30 with DBIDArrayIter

use of de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter in project elki by elki-project.

the class InMemoryIDistanceIndex method initialize.

@Override
public void initialize() {
    referencepoints = DBIDUtil.ensureArray(initialization.chooseInitialMedoids(numref, relation.getDBIDs(), distanceQuery));
    // should be the same k anyway.
    final int k = referencepoints.size();
    index = new ModifiableDoubleDBIDList[k];
    for (int i = 0; i < k; i++) {
        index[i] = DBIDUtil.newDistanceDBIDList(relation.size() / (2 * k));
    }
    // TODO: add optimized codepath for primitive distances.
    DBIDArrayIter riter = referencepoints.iter();
    for (DBIDIter oiter = relation.iterDBIDs(); oiter.valid(); oiter.advance()) {
        double bestd = Double.POSITIVE_INFINITY;
        int besti = -1;
        for (riter.seek(0); riter.valid(); riter.advance()) {
            double dist = distanceQuery.distance(oiter, riter);
            if (dist < bestd) {
                bestd = dist;
                besti = riter.getOffset();
            }
        }
        assert (besti >= 0 && besti < k);
        index[besti].add(bestd, oiter);
    }
    // Sort index.
    for (int i = 0; i < k; i++) {
        index[i].sort();
    }
}
Also used : DBIDArrayIter(de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Aggregations

DBIDArrayIter (de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter)64 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)17 ArrayModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs)15 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)15 ArrayDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs)14 DBIDRange (de.lmu.ifi.dbs.elki.database.ids.DBIDRange)13 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)12 AbortException (de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)9 Test (org.junit.Test)9 ModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)8 WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)6 MeanVariance (de.lmu.ifi.dbs.elki.math.MeanVariance)5 IOException (java.io.IOException)5 Clustering (de.lmu.ifi.dbs.elki.data.Clustering)4 DBIDVar (de.lmu.ifi.dbs.elki.database.ids.DBIDVar)4 DoubleRelation (de.lmu.ifi.dbs.elki.database.relation.DoubleRelation)4 Cluster (de.lmu.ifi.dbs.elki.data.Cluster)3 DoubleVector (de.lmu.ifi.dbs.elki.data.DoubleVector)3 SortDBIDsBySingleDimension (de.lmu.ifi.dbs.elki.data.VectorUtil.SortDBIDsBySingleDimension)3 ClusterModel (de.lmu.ifi.dbs.elki.data.model.ClusterModel)3