use of de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle in project elki by elki-project.
the class FastMultidimensionalScalingTransform method filter.
@Override
public MultipleObjectsBundle filter(MultipleObjectsBundle objects) {
final int size = objects.dataLength();
if (size == 0) {
return objects;
}
MultipleObjectsBundle bundle = new MultipleObjectsBundle();
for (int r = 0; r < objects.metaLength(); r++) {
SimpleTypeInformation<? extends Object> type = objects.meta(r);
@SuppressWarnings("unchecked") final List<Object> column = (List<Object>) objects.getColumn(r);
// Not supported column (e.g. labels):
if (!dist.getInputTypeRestriction().isAssignableFromType(type)) {
bundle.appendColumn(type, column);
continue;
}
// Get the replacement type information
@SuppressWarnings("unchecked") final List<I> castColumn = (List<I>) column;
NumberVector.Factory<? extends NumberVector> factory = null;
{
if (type instanceof VectorFieldTypeInformation) {
final VectorFieldTypeInformation<?> ctype = (VectorFieldTypeInformation<?>) type;
// Note two-step cast, to make stricter compilers happy.
@SuppressWarnings("unchecked") final VectorFieldTypeInformation<? extends NumberVector> vtype = (VectorFieldTypeInformation<? extends NumberVector>) ctype;
factory = FilterUtil.guessFactory(vtype);
} else {
factory = DoubleVector.FACTORY;
}
bundle.appendColumn(new VectorFieldTypeInformation<>(factory, tdim), castColumn);
}
// Compute distance matrix.
double[][] imat = computeSquaredDistanceMatrix(castColumn, dist);
doubleCenterSymmetric(imat);
// Find eigenvectors.
{
double[][] evs = new double[tdim][size];
double[] lambda = new double[tdim];
findEigenVectors(imat, evs, lambda);
// Undo squared, unless we were given a squared distance function:
if (!dist.isSquared()) {
for (int i = 0; i < tdim; i++) {
lambda[i] = FastMath.sqrt(Math.abs(lambda[i]));
}
}
// Project each data point to the new coordinates.
double[] buf = new double[tdim];
for (int i = 0; i < size; i++) {
for (int d = 0; d < tdim; d++) {
buf[d] = lambda[d] * evs[d][i];
}
column.set(i, factory.newNumberVector(buf));
}
}
}
return bundle;
}
use of de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle in project elki by elki-project.
the class StaticArrayDatabase method initialize.
/**
* Initialize the database by getting the initial data from the database
* connection.
*/
@Override
public void initialize() {
if (databaseConnection != null) {
if (LOG.isDebugging()) {
LOG.debugFine("Loading data from database connection.");
}
MultipleObjectsBundle bundle = databaseConnection.loadData();
// Run at most once.
databaseConnection = null;
// Find DBIDs for bundle
{
DBIDs bids = bundle.getDBIDs();
if (bids instanceof ArrayStaticDBIDs) {
this.ids = (ArrayStaticDBIDs) bids;
} else if (bids == null) {
this.ids = DBIDUtil.generateStaticDBIDRange(bundle.dataLength());
} else {
this.ids = (ArrayStaticDBIDs) DBIDUtil.makeUnmodifiable(bids);
}
}
// Replace id representation (it would be nicer if we would not need
// DBIDView at all)
this.idrep = new DBIDView(this.ids);
relations.add(this.idrep);
getHierarchy().add(this, idrep);
DBIDArrayIter it = this.ids.iter();
int numrel = bundle.metaLength();
for (int i = 0; i < numrel; i++) {
SimpleTypeInformation<?> meta = bundle.meta(i);
@SuppressWarnings("unchecked") SimpleTypeInformation<Object> ometa = (SimpleTypeInformation<Object>) meta;
WritableDataStore<Object> store = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_DB, ometa.getRestrictionClass());
for (it.seek(0); it.valid(); it.advance()) {
store.put(it, bundle.data(it.getOffset(), i));
}
Relation<?> relation = new MaterializedRelation<>(ometa, ids, null, store);
relations.add(relation);
getHierarchy().add(this, relation);
// Try to add indexes where appropriate
for (IndexFactory<?, ?> factory : indexFactories) {
if (factory.getInputTypeRestriction().isAssignableFromType(ometa)) {
@SuppressWarnings("unchecked") final IndexFactory<Object, ?> ofact = (IndexFactory<Object, ?>) factory;
@SuppressWarnings("unchecked") final Relation<Object> orep = (Relation<Object>) relation;
final Index index = ofact.instantiate(orep);
Duration duration = LOG.isStatistics() ? LOG.newDuration(index.getClass().getName() + ".construction").begin() : null;
index.initialize();
if (duration != null) {
LOG.statistics(duration.end());
}
getHierarchy().add(relation, index);
}
}
}
// fire insertion event
eventManager.fireObjectsInserted(ids);
}
}
use of de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle in project elki by elki-project.
the class RangeQueryBenchmarkAlgorithm method run.
/**
* Run the algorithm, with a separate query set.
*
* @param database Database
* @param relation Relation
* @return Null result
*/
public Result run(Database database, Relation<O> relation) {
if (queries == null) {
throw new AbortException("A query set is required for this 'run' method.");
}
// Get a distance and kNN query instance.
DistanceQuery<O> distQuery = database.getDistanceQuery(relation, getDistanceFunction());
RangeQuery<O> rangeQuery = database.getRangeQuery(distQuery);
NumberVector.Factory<O> ofactory = RelationUtil.getNumberVectorFactory(relation);
int dim = RelationUtil.dimensionality(relation);
// Separate query set.
TypeInformation res = VectorFieldTypeInformation.typeRequest(NumberVector.class, dim + 1, dim + 1);
MultipleObjectsBundle bundle = queries.loadData();
int col = -1;
for (int i = 0; i < bundle.metaLength(); i++) {
if (res.isAssignableFromType(bundle.meta(i))) {
col = i;
break;
}
}
if (col < 0) {
StringBuilder buf = new StringBuilder();
buf.append("No compatible data type in query input was found. Expected: ");
buf.append(res.toString());
buf.append(" have: ");
for (int i = 0; i < bundle.metaLength(); i++) {
if (i > 0) {
buf.append(' ');
}
buf.append(bundle.meta(i).toString());
}
throw new IncompatibleDataException(buf.toString());
}
// Random sampling is a bit of hack, sorry.
// But currently, we don't (yet) have an "integer random sample" function.
DBIDRange sids = DBIDUtil.generateStaticDBIDRange(bundle.dataLength());
final DBIDs sample = DBIDUtil.randomSample(sids, sampling, random);
FiniteProgress prog = LOG.isVeryVerbose() ? new FiniteProgress("kNN queries", sample.size(), LOG) : null;
int hash = 0;
MeanVariance mv = new MeanVariance();
double[] buf = new double[dim];
for (DBIDIter iditer = sample.iter(); iditer.valid(); iditer.advance()) {
int off = sids.binarySearch(iditer);
assert (off >= 0);
NumberVector o = (NumberVector) bundle.data(off, col);
for (int i = 0; i < dim; i++) {
buf[i] = o.doubleValue(i);
}
O v = ofactory.newNumberVector(buf);
double r = o.doubleValue(dim);
DoubleDBIDList rres = rangeQuery.getRangeForObject(v, r);
int ichecksum = 0;
for (DBIDIter it = rres.iter(); it.valid(); it.advance()) {
ichecksum += DBIDUtil.asInteger(it);
}
hash = Util.mixHashCodes(hash, ichecksum);
mv.put(rres.size());
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
if (LOG.isStatistics()) {
LOG.statistics("Result hashcode: " + hash);
LOG.statistics("Mean number of results: " + mv.getMean() + " +- " + mv.getNaiveStddev());
}
return null;
}
use of de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle in project elki by elki-project.
the class ValidateApproximativeKNNIndex method run.
/**
* Run the algorithm.
*
* @param database Database
* @param relation Relation
* @return Null result
*/
public Result run(Database database, Relation<O> relation) {
// Get a distance and kNN query instance.
DistanceQuery<O> distQuery = database.getDistanceQuery(relation, getDistanceFunction());
// Approximate query:
KNNQuery<O> knnQuery = database.getKNNQuery(distQuery, k, DatabaseQuery.HINT_OPTIMIZED_ONLY);
if (knnQuery == null || knnQuery instanceof LinearScanQuery) {
throw new AbortException("Expected an accelerated query, but got a linear scan -- index is not used.");
}
// Exact query:
KNNQuery<O> truekNNQuery;
if (forcelinear) {
truekNNQuery = QueryUtil.getLinearScanKNNQuery(distQuery);
} else {
truekNNQuery = database.getKNNQuery(distQuery, k, DatabaseQuery.HINT_EXACT);
}
if (knnQuery.getClass().equals(truekNNQuery.getClass())) {
LOG.warning("Query classes are the same. This experiment may be invalid!");
}
// No query set - use original database.
if (queries == null || pattern != null) {
// Relation to filter on
Relation<String> lrel = (pattern != null) ? DatabaseUtil.guessLabelRepresentation(database) : null;
final DBIDs sample = DBIDUtil.randomSample(relation.getDBIDs(), sampling, random);
FiniteProgress prog = LOG.isVeryVerbose() ? new FiniteProgress("kNN queries", sample.size(), LOG) : null;
MeanVariance mv = new MeanVariance(), mvrec = new MeanVariance();
MeanVariance mvdist = new MeanVariance(), mvdaerr = new MeanVariance(), mvdrerr = new MeanVariance();
int misses = 0;
for (DBIDIter iditer = sample.iter(); iditer.valid(); iditer.advance()) {
if (pattern == null || pattern.matcher(lrel.get(iditer)).find()) {
// Query index:
KNNList knns = knnQuery.getKNNForDBID(iditer, k);
// Query reference:
KNNList trueknns = truekNNQuery.getKNNForDBID(iditer, k);
// Put adjusted knn size:
mv.put(knns.size() * k / (double) trueknns.size());
// Put recall:
mvrec.put(DBIDUtil.intersectionSize(knns, trueknns) / (double) trueknns.size());
if (knns.size() >= k) {
double kdist = knns.getKNNDistance();
final double tdist = trueknns.getKNNDistance();
if (tdist > 0.0) {
mvdist.put(kdist);
mvdaerr.put(kdist - tdist);
mvdrerr.put(kdist / tdist);
}
} else {
// Less than k objects.
misses++;
}
}
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
if (LOG.isStatistics()) {
LOG.statistics("Mean number of results: " + mv.getMean() + " +- " + mv.getNaiveStddev());
LOG.statistics("Recall of true results: " + mvrec.getMean() + " +- " + mvrec.getNaiveStddev());
if (mvdist.getCount() > 0) {
LOG.statistics("Mean k-distance: " + mvdist.getMean() + " +- " + mvdist.getNaiveStddev());
LOG.statistics("Mean absolute k-error: " + mvdaerr.getMean() + " +- " + mvdaerr.getNaiveStddev());
LOG.statistics("Mean relative k-error: " + mvdrerr.getMean() + " +- " + mvdrerr.getNaiveStddev());
}
if (misses > 0) {
LOG.statistics(String.format("Number of queries that returned less than k=%d objects: %d (%.2f%%)", k, misses, misses * 100. / mv.getCount()));
}
}
} else {
// Separate query set.
TypeInformation res = getDistanceFunction().getInputTypeRestriction();
MultipleObjectsBundle bundle = queries.loadData();
int col = -1;
for (int i = 0; i < bundle.metaLength(); i++) {
if (res.isAssignableFromType(bundle.meta(i))) {
col = i;
break;
}
}
if (col < 0) {
throw new AbortException("No compatible data type in query input was found. Expected: " + res.toString());
}
// Random sampling is a bit of hack, sorry.
// But currently, we don't (yet) have an "integer random sample" function.
DBIDRange sids = DBIDUtil.generateStaticDBIDRange(bundle.dataLength());
final DBIDs sample = DBIDUtil.randomSample(sids, sampling, random);
FiniteProgress prog = LOG.isVeryVerbose() ? new FiniteProgress("kNN queries", sample.size(), LOG) : null;
MeanVariance mv = new MeanVariance(), mvrec = new MeanVariance();
MeanVariance mvdist = new MeanVariance(), mvdaerr = new MeanVariance(), mvdrerr = new MeanVariance();
int misses = 0;
for (DBIDIter iditer = sample.iter(); iditer.valid(); iditer.advance()) {
int off = sids.binarySearch(iditer);
assert (off >= 0);
@SuppressWarnings("unchecked") O o = (O) bundle.data(off, col);
// Query index:
KNNList knns = knnQuery.getKNNForObject(o, k);
// Query reference:
KNNList trueknns = truekNNQuery.getKNNForObject(o, k);
// Put adjusted knn size:
mv.put(knns.size() * k / (double) trueknns.size());
// Put recall:
mvrec.put(DBIDUtil.intersectionSize(knns, trueknns) / (double) trueknns.size());
if (knns.size() >= k) {
double kdist = knns.getKNNDistance();
final double tdist = trueknns.getKNNDistance();
if (tdist > 0.0) {
mvdist.put(kdist);
mvdaerr.put(kdist - tdist);
mvdrerr.put(kdist / tdist);
}
} else {
// Less than k objects.
misses++;
}
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
if (LOG.isStatistics()) {
LOG.statistics("Mean number of results: " + mv.getMean() + " +- " + mv.getNaiveStddev());
LOG.statistics("Recall of true results: " + mvrec.getMean() + " +- " + mvrec.getNaiveStddev());
if (mvdist.getCount() > 0) {
LOG.statistics("Mean absolute k-error: " + mvdaerr.getMean() + " +- " + mvdaerr.getNaiveStddev());
LOG.statistics("Mean relative k-error: " + mvdrerr.getMean() + " +- " + mvdrerr.getNaiveStddev());
}
if (misses > 0) {
LOG.statistics(String.format("Number of queries that returned less than k=%d objects: %d (%.2f%%)", k, misses, misses * 100. / mv.getCount()));
}
}
}
return null;
}
use of de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle in project elki by elki-project.
the class HashmapDatabase method delete.
/**
* Removes the objects from the database (by calling
* {@link #doDelete(DBIDRef)} for each object) and indexes and fires a
* deletion event.
*
* {@inheritDoc}
*/
@Override
public MultipleObjectsBundle delete(DBIDs ids) {
// Prepare bundle to return
MultipleObjectsBundle bundle = new MultipleObjectsBundle();
for (Relation<?> relation : relations) {
ArrayList<Object> data = new ArrayList<>(ids.size());
for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
data.add(relation.get(iter));
}
bundle.appendColumn(relation.getDataTypeInformation(), data);
}
// remove from db
for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
doDelete(iter);
}
// fire deletion event
eventManager.fireObjectsRemoved(ids);
return bundle;
}
Aggregations