Search in sources :

Example 31 with MultipleObjectsBundle

use of de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle in project elki by elki-project.

the class FastMultidimensionalScalingTransform method filter.

@Override
public MultipleObjectsBundle filter(MultipleObjectsBundle objects) {
    final int size = objects.dataLength();
    if (size == 0) {
        return objects;
    }
    MultipleObjectsBundle bundle = new MultipleObjectsBundle();
    for (int r = 0; r < objects.metaLength(); r++) {
        SimpleTypeInformation<? extends Object> type = objects.meta(r);
        @SuppressWarnings("unchecked") final List<Object> column = (List<Object>) objects.getColumn(r);
        // Not supported column (e.g. labels):
        if (!dist.getInputTypeRestriction().isAssignableFromType(type)) {
            bundle.appendColumn(type, column);
            continue;
        }
        // Get the replacement type information
        @SuppressWarnings("unchecked") final List<I> castColumn = (List<I>) column;
        NumberVector.Factory<? extends NumberVector> factory = null;
        {
            if (type instanceof VectorFieldTypeInformation) {
                final VectorFieldTypeInformation<?> ctype = (VectorFieldTypeInformation<?>) type;
                // Note two-step cast, to make stricter compilers happy.
                @SuppressWarnings("unchecked") final VectorFieldTypeInformation<? extends NumberVector> vtype = (VectorFieldTypeInformation<? extends NumberVector>) ctype;
                factory = FilterUtil.guessFactory(vtype);
            } else {
                factory = DoubleVector.FACTORY;
            }
            bundle.appendColumn(new VectorFieldTypeInformation<>(factory, tdim), castColumn);
        }
        // Compute distance matrix.
        double[][] imat = computeSquaredDistanceMatrix(castColumn, dist);
        doubleCenterSymmetric(imat);
        // Find eigenvectors.
        {
            double[][] evs = new double[tdim][size];
            double[] lambda = new double[tdim];
            findEigenVectors(imat, evs, lambda);
            // Undo squared, unless we were given a squared distance function:
            if (!dist.isSquared()) {
                for (int i = 0; i < tdim; i++) {
                    lambda[i] = FastMath.sqrt(Math.abs(lambda[i]));
                }
            }
            // Project each data point to the new coordinates.
            double[] buf = new double[tdim];
            for (int i = 0; i < size; i++) {
                for (int d = 0; d < tdim; d++) {
                    buf[d] = lambda[d] * evs[d][i];
                }
                column.set(i, factory.newNumberVector(buf));
            }
        }
    }
    return bundle;
}
Also used : MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) VectorFieldTypeInformation(de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation) List(java.util.List)

Example 32 with MultipleObjectsBundle

use of de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle in project elki by elki-project.

the class StaticArrayDatabase method initialize.

/**
 * Initialize the database by getting the initial data from the database
 * connection.
 */
@Override
public void initialize() {
    if (databaseConnection != null) {
        if (LOG.isDebugging()) {
            LOG.debugFine("Loading data from database connection.");
        }
        MultipleObjectsBundle bundle = databaseConnection.loadData();
        // Run at most once.
        databaseConnection = null;
        // Find DBIDs for bundle
        {
            DBIDs bids = bundle.getDBIDs();
            if (bids instanceof ArrayStaticDBIDs) {
                this.ids = (ArrayStaticDBIDs) bids;
            } else if (bids == null) {
                this.ids = DBIDUtil.generateStaticDBIDRange(bundle.dataLength());
            } else {
                this.ids = (ArrayStaticDBIDs) DBIDUtil.makeUnmodifiable(bids);
            }
        }
        // Replace id representation (it would be nicer if we would not need
        // DBIDView at all)
        this.idrep = new DBIDView(this.ids);
        relations.add(this.idrep);
        getHierarchy().add(this, idrep);
        DBIDArrayIter it = this.ids.iter();
        int numrel = bundle.metaLength();
        for (int i = 0; i < numrel; i++) {
            SimpleTypeInformation<?> meta = bundle.meta(i);
            @SuppressWarnings("unchecked") SimpleTypeInformation<Object> ometa = (SimpleTypeInformation<Object>) meta;
            WritableDataStore<Object> store = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_DB, ometa.getRestrictionClass());
            for (it.seek(0); it.valid(); it.advance()) {
                store.put(it, bundle.data(it.getOffset(), i));
            }
            Relation<?> relation = new MaterializedRelation<>(ometa, ids, null, store);
            relations.add(relation);
            getHierarchy().add(this, relation);
            // Try to add indexes where appropriate
            for (IndexFactory<?, ?> factory : indexFactories) {
                if (factory.getInputTypeRestriction().isAssignableFromType(ometa)) {
                    @SuppressWarnings("unchecked") final IndexFactory<Object, ?> ofact = (IndexFactory<Object, ?>) factory;
                    @SuppressWarnings("unchecked") final Relation<Object> orep = (Relation<Object>) relation;
                    final Index index = ofact.instantiate(orep);
                    Duration duration = LOG.isStatistics() ? LOG.newDuration(index.getClass().getName() + ".construction").begin() : null;
                    index.initialize();
                    if (duration != null) {
                        LOG.statistics(duration.end());
                    }
                    getHierarchy().add(relation, index);
                }
            }
        }
        // fire insertion event
        eventManager.fireObjectsInserted(ids);
    }
}
Also used : DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) ArrayStaticDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayStaticDBIDs) MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) SimpleTypeInformation(de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation) DBIDArrayIter(de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter) Index(de.lmu.ifi.dbs.elki.index.Index) Duration(de.lmu.ifi.dbs.elki.logging.statistics.Duration) ArrayStaticDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayStaticDBIDs) MaterializedRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation) Relation(de.lmu.ifi.dbs.elki.database.relation.Relation) MaterializedRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation) IndexFactory(de.lmu.ifi.dbs.elki.index.IndexFactory) DBIDView(de.lmu.ifi.dbs.elki.database.relation.DBIDView)

Example 33 with MultipleObjectsBundle

use of de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle in project elki by elki-project.

the class RangeQueryBenchmarkAlgorithm method run.

/**
 * Run the algorithm, with a separate query set.
 *
 * @param database Database
 * @param relation Relation
 * @return Null result
 */
public Result run(Database database, Relation<O> relation) {
    if (queries == null) {
        throw new AbortException("A query set is required for this 'run' method.");
    }
    // Get a distance and kNN query instance.
    DistanceQuery<O> distQuery = database.getDistanceQuery(relation, getDistanceFunction());
    RangeQuery<O> rangeQuery = database.getRangeQuery(distQuery);
    NumberVector.Factory<O> ofactory = RelationUtil.getNumberVectorFactory(relation);
    int dim = RelationUtil.dimensionality(relation);
    // Separate query set.
    TypeInformation res = VectorFieldTypeInformation.typeRequest(NumberVector.class, dim + 1, dim + 1);
    MultipleObjectsBundle bundle = queries.loadData();
    int col = -1;
    for (int i = 0; i < bundle.metaLength(); i++) {
        if (res.isAssignableFromType(bundle.meta(i))) {
            col = i;
            break;
        }
    }
    if (col < 0) {
        StringBuilder buf = new StringBuilder();
        buf.append("No compatible data type in query input was found. Expected: ");
        buf.append(res.toString());
        buf.append(" have: ");
        for (int i = 0; i < bundle.metaLength(); i++) {
            if (i > 0) {
                buf.append(' ');
            }
            buf.append(bundle.meta(i).toString());
        }
        throw new IncompatibleDataException(buf.toString());
    }
    // Random sampling is a bit of hack, sorry.
    // But currently, we don't (yet) have an "integer random sample" function.
    DBIDRange sids = DBIDUtil.generateStaticDBIDRange(bundle.dataLength());
    final DBIDs sample = DBIDUtil.randomSample(sids, sampling, random);
    FiniteProgress prog = LOG.isVeryVerbose() ? new FiniteProgress("kNN queries", sample.size(), LOG) : null;
    int hash = 0;
    MeanVariance mv = new MeanVariance();
    double[] buf = new double[dim];
    for (DBIDIter iditer = sample.iter(); iditer.valid(); iditer.advance()) {
        int off = sids.binarySearch(iditer);
        assert (off >= 0);
        NumberVector o = (NumberVector) bundle.data(off, col);
        for (int i = 0; i < dim; i++) {
            buf[i] = o.doubleValue(i);
        }
        O v = ofactory.newNumberVector(buf);
        double r = o.doubleValue(dim);
        DoubleDBIDList rres = rangeQuery.getRangeForObject(v, r);
        int ichecksum = 0;
        for (DBIDIter it = rres.iter(); it.valid(); it.advance()) {
            ichecksum += DBIDUtil.asInteger(it);
        }
        hash = Util.mixHashCodes(hash, ichecksum);
        mv.put(rres.size());
        LOG.incrementProcessed(prog);
    }
    LOG.ensureCompleted(prog);
    if (LOG.isStatistics()) {
        LOG.statistics("Result hashcode: " + hash);
        LOG.statistics("Mean number of results: " + mv.getMean() + " +- " + mv.getNaiveStddev());
    }
    return null;
}
Also used : DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) VectorFieldTypeInformation(de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation) TypeInformation(de.lmu.ifi.dbs.elki.data.type.TypeInformation) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) MeanVariance(de.lmu.ifi.dbs.elki.math.MeanVariance) NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) IncompatibleDataException(de.lmu.ifi.dbs.elki.utilities.exceptions.IncompatibleDataException) DBIDRange(de.lmu.ifi.dbs.elki.database.ids.DBIDRange) DoubleDBIDList(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Example 34 with MultipleObjectsBundle

use of de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle in project elki by elki-project.

the class ValidateApproximativeKNNIndex method run.

/**
 * Run the algorithm.
 *
 * @param database Database
 * @param relation Relation
 * @return Null result
 */
public Result run(Database database, Relation<O> relation) {
    // Get a distance and kNN query instance.
    DistanceQuery<O> distQuery = database.getDistanceQuery(relation, getDistanceFunction());
    // Approximate query:
    KNNQuery<O> knnQuery = database.getKNNQuery(distQuery, k, DatabaseQuery.HINT_OPTIMIZED_ONLY);
    if (knnQuery == null || knnQuery instanceof LinearScanQuery) {
        throw new AbortException("Expected an accelerated query, but got a linear scan -- index is not used.");
    }
    // Exact query:
    KNNQuery<O> truekNNQuery;
    if (forcelinear) {
        truekNNQuery = QueryUtil.getLinearScanKNNQuery(distQuery);
    } else {
        truekNNQuery = database.getKNNQuery(distQuery, k, DatabaseQuery.HINT_EXACT);
    }
    if (knnQuery.getClass().equals(truekNNQuery.getClass())) {
        LOG.warning("Query classes are the same. This experiment may be invalid!");
    }
    // No query set - use original database.
    if (queries == null || pattern != null) {
        // Relation to filter on
        Relation<String> lrel = (pattern != null) ? DatabaseUtil.guessLabelRepresentation(database) : null;
        final DBIDs sample = DBIDUtil.randomSample(relation.getDBIDs(), sampling, random);
        FiniteProgress prog = LOG.isVeryVerbose() ? new FiniteProgress("kNN queries", sample.size(), LOG) : null;
        MeanVariance mv = new MeanVariance(), mvrec = new MeanVariance();
        MeanVariance mvdist = new MeanVariance(), mvdaerr = new MeanVariance(), mvdrerr = new MeanVariance();
        int misses = 0;
        for (DBIDIter iditer = sample.iter(); iditer.valid(); iditer.advance()) {
            if (pattern == null || pattern.matcher(lrel.get(iditer)).find()) {
                // Query index:
                KNNList knns = knnQuery.getKNNForDBID(iditer, k);
                // Query reference:
                KNNList trueknns = truekNNQuery.getKNNForDBID(iditer, k);
                // Put adjusted knn size:
                mv.put(knns.size() * k / (double) trueknns.size());
                // Put recall:
                mvrec.put(DBIDUtil.intersectionSize(knns, trueknns) / (double) trueknns.size());
                if (knns.size() >= k) {
                    double kdist = knns.getKNNDistance();
                    final double tdist = trueknns.getKNNDistance();
                    if (tdist > 0.0) {
                        mvdist.put(kdist);
                        mvdaerr.put(kdist - tdist);
                        mvdrerr.put(kdist / tdist);
                    }
                } else {
                    // Less than k objects.
                    misses++;
                }
            }
            LOG.incrementProcessed(prog);
        }
        LOG.ensureCompleted(prog);
        if (LOG.isStatistics()) {
            LOG.statistics("Mean number of results: " + mv.getMean() + " +- " + mv.getNaiveStddev());
            LOG.statistics("Recall of true results: " + mvrec.getMean() + " +- " + mvrec.getNaiveStddev());
            if (mvdist.getCount() > 0) {
                LOG.statistics("Mean k-distance: " + mvdist.getMean() + " +- " + mvdist.getNaiveStddev());
                LOG.statistics("Mean absolute k-error: " + mvdaerr.getMean() + " +- " + mvdaerr.getNaiveStddev());
                LOG.statistics("Mean relative k-error: " + mvdrerr.getMean() + " +- " + mvdrerr.getNaiveStddev());
            }
            if (misses > 0) {
                LOG.statistics(String.format("Number of queries that returned less than k=%d objects: %d (%.2f%%)", k, misses, misses * 100. / mv.getCount()));
            }
        }
    } else {
        // Separate query set.
        TypeInformation res = getDistanceFunction().getInputTypeRestriction();
        MultipleObjectsBundle bundle = queries.loadData();
        int col = -1;
        for (int i = 0; i < bundle.metaLength(); i++) {
            if (res.isAssignableFromType(bundle.meta(i))) {
                col = i;
                break;
            }
        }
        if (col < 0) {
            throw new AbortException("No compatible data type in query input was found. Expected: " + res.toString());
        }
        // Random sampling is a bit of hack, sorry.
        // But currently, we don't (yet) have an "integer random sample" function.
        DBIDRange sids = DBIDUtil.generateStaticDBIDRange(bundle.dataLength());
        final DBIDs sample = DBIDUtil.randomSample(sids, sampling, random);
        FiniteProgress prog = LOG.isVeryVerbose() ? new FiniteProgress("kNN queries", sample.size(), LOG) : null;
        MeanVariance mv = new MeanVariance(), mvrec = new MeanVariance();
        MeanVariance mvdist = new MeanVariance(), mvdaerr = new MeanVariance(), mvdrerr = new MeanVariance();
        int misses = 0;
        for (DBIDIter iditer = sample.iter(); iditer.valid(); iditer.advance()) {
            int off = sids.binarySearch(iditer);
            assert (off >= 0);
            @SuppressWarnings("unchecked") O o = (O) bundle.data(off, col);
            // Query index:
            KNNList knns = knnQuery.getKNNForObject(o, k);
            // Query reference:
            KNNList trueknns = truekNNQuery.getKNNForObject(o, k);
            // Put adjusted knn size:
            mv.put(knns.size() * k / (double) trueknns.size());
            // Put recall:
            mvrec.put(DBIDUtil.intersectionSize(knns, trueknns) / (double) trueknns.size());
            if (knns.size() >= k) {
                double kdist = knns.getKNNDistance();
                final double tdist = trueknns.getKNNDistance();
                if (tdist > 0.0) {
                    mvdist.put(kdist);
                    mvdaerr.put(kdist - tdist);
                    mvdrerr.put(kdist / tdist);
                }
            } else {
                // Less than k objects.
                misses++;
            }
            LOG.incrementProcessed(prog);
        }
        LOG.ensureCompleted(prog);
        if (LOG.isStatistics()) {
            LOG.statistics("Mean number of results: " + mv.getMean() + " +- " + mv.getNaiveStddev());
            LOG.statistics("Recall of true results: " + mvrec.getMean() + " +- " + mvrec.getNaiveStddev());
            if (mvdist.getCount() > 0) {
                LOG.statistics("Mean absolute k-error: " + mvdaerr.getMean() + " +- " + mvdaerr.getNaiveStddev());
                LOG.statistics("Mean relative k-error: " + mvdrerr.getMean() + " +- " + mvdrerr.getNaiveStddev());
            }
            if (misses > 0) {
                LOG.statistics(String.format("Number of queries that returned less than k=%d objects: %d (%.2f%%)", k, misses, misses * 100. / mv.getCount()));
            }
        }
    }
    return null;
}
Also used : DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) TypeInformation(de.lmu.ifi.dbs.elki.data.type.TypeInformation) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) MeanVariance(de.lmu.ifi.dbs.elki.math.MeanVariance) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) DBIDRange(de.lmu.ifi.dbs.elki.database.ids.DBIDRange) LinearScanQuery(de.lmu.ifi.dbs.elki.database.query.LinearScanQuery) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Example 35 with MultipleObjectsBundle

use of de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle in project elki by elki-project.

the class HashmapDatabase method delete.

/**
 * Removes the objects from the database (by calling
 * {@link #doDelete(DBIDRef)} for each object) and indexes and fires a
 * deletion event.
 *
 * {@inheritDoc}
 */
@Override
public MultipleObjectsBundle delete(DBIDs ids) {
    // Prepare bundle to return
    MultipleObjectsBundle bundle = new MultipleObjectsBundle();
    for (Relation<?> relation : relations) {
        ArrayList<Object> data = new ArrayList<>(ids.size());
        for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
            data.add(relation.get(iter));
        }
        bundle.appendColumn(relation.getDataTypeInformation(), data);
    }
    // remove from db
    for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
        doDelete(iter);
    }
    // fire deletion event
    eventManager.fireObjectsRemoved(ids);
    return bundle;
}
Also used : MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) ArrayList(java.util.ArrayList) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Aggregations

MultipleObjectsBundle (de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle)72 AbstractDataSourceTest (de.lmu.ifi.dbs.elki.datasource.AbstractDataSourceTest)37 Test (org.junit.Test)37 DoubleVector (de.lmu.ifi.dbs.elki.data.DoubleVector)27 ArrayList (java.util.ArrayList)19 AbortException (de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)13 NumberVector (de.lmu.ifi.dbs.elki.data.NumberVector)10 ELKIBuilder (de.lmu.ifi.dbs.elki.utilities.ELKIBuilder)10 VectorFieldTypeInformation (de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation)9 MeanVariance (de.lmu.ifi.dbs.elki.math.MeanVariance)8 List (java.util.List)7 LabelList (de.lmu.ifi.dbs.elki.data.LabelList)5 SimpleTypeInformation (de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation)5 InputStreamDatabaseConnection (de.lmu.ifi.dbs.elki.datasource.InputStreamDatabaseConnection)5 InputStream (java.io.InputStream)5 ClassLabel (de.lmu.ifi.dbs.elki.data.ClassLabel)4 TypeInformation (de.lmu.ifi.dbs.elki.data.type.TypeInformation)4 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)4 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)4 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)4