Search in sources :

Example 51 with AbortException

use of de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException in project elki by elki-project.

the class EvaluationTabPanel method executeStep.

@Override
protected void executeStep() {
    if (input.canRun() && !input.isComplete()) {
        input.execute();
    }
    if (algs.canRun() && !algs.isComplete()) {
        algs.execute();
    }
    if (!input.isComplete() || !algs.isComplete()) {
        throw new AbortException("Input data not available.");
    }
    // Get the database and run the algorithms
    Database database = input.getInputStep().getDatabase();
    Result res = algs.getAlgorithmStep().getResult();
    evals.runEvaluators(database.getHierarchy(), database);
    basedOnResult = new WeakReference<Object>(res);
}
Also used : Database(de.lmu.ifi.dbs.elki.database.Database) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException) Result(de.lmu.ifi.dbs.elki.result.Result)

Example 52 with AbortException

use of de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException in project elki by elki-project.

the class RangeQueryBenchmarkAlgorithm method run.

/**
 * Run the algorithm, with separate radius relation
 *
 * @param database Database
 * @param relation Relation
 * @param radrel Radius relation
 * @return Null result
 */
public Result run(Database database, Relation<O> relation, Relation<NumberVector> radrel) {
    if (queries != null) {
        throw new AbortException("This 'run' method will not use the given query set!");
    }
    // Get a distance and kNN query instance.
    DistanceQuery<O> distQuery = database.getDistanceQuery(relation, getDistanceFunction());
    RangeQuery<O> rangeQuery = database.getRangeQuery(distQuery);
    final DBIDs sample = DBIDUtil.randomSample(relation.getDBIDs(), sampling, random);
    FiniteProgress prog = LOG.isVeryVerbose() ? new FiniteProgress("kNN queries", sample.size(), LOG) : null;
    int hash = 0;
    MeanVariance mv = new MeanVariance();
    for (DBIDIter iditer = sample.iter(); iditer.valid(); iditer.advance()) {
        double r = radrel.get(iditer).doubleValue(0);
        DoubleDBIDList rres = rangeQuery.getRangeForDBID(iditer, r);
        int ichecksum = 0;
        for (DBIDIter it = rres.iter(); it.valid(); it.advance()) {
            ichecksum += DBIDUtil.asInteger(it);
        }
        hash = Util.mixHashCodes(hash, ichecksum);
        mv.put(rres.size());
        LOG.incrementProcessed(prog);
    }
    LOG.ensureCompleted(prog);
    if (LOG.isStatistics()) {
        LOG.statistics("Result hashcode: " + hash);
        LOG.statistics("Mean number of results: " + mv.getMean() + " +- " + mv.getNaiveStddev());
    }
    return null;
}
Also used : MeanVariance(de.lmu.ifi.dbs.elki.math.MeanVariance) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) DoubleDBIDList(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 53 with AbortException

use of de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException in project elki by elki-project.

the class RangeQueryBenchmarkAlgorithm method run.

/**
 * Run the algorithm, with a separate query set.
 *
 * @param database Database
 * @param relation Relation
 * @return Null result
 */
public Result run(Database database, Relation<O> relation) {
    if (queries == null) {
        throw new AbortException("A query set is required for this 'run' method.");
    }
    // Get a distance and kNN query instance.
    DistanceQuery<O> distQuery = database.getDistanceQuery(relation, getDistanceFunction());
    RangeQuery<O> rangeQuery = database.getRangeQuery(distQuery);
    NumberVector.Factory<O> ofactory = RelationUtil.getNumberVectorFactory(relation);
    int dim = RelationUtil.dimensionality(relation);
    // Separate query set.
    TypeInformation res = VectorFieldTypeInformation.typeRequest(NumberVector.class, dim + 1, dim + 1);
    MultipleObjectsBundle bundle = queries.loadData();
    int col = -1;
    for (int i = 0; i < bundle.metaLength(); i++) {
        if (res.isAssignableFromType(bundle.meta(i))) {
            col = i;
            break;
        }
    }
    if (col < 0) {
        StringBuilder buf = new StringBuilder();
        buf.append("No compatible data type in query input was found. Expected: ");
        buf.append(res.toString());
        buf.append(" have: ");
        for (int i = 0; i < bundle.metaLength(); i++) {
            if (i > 0) {
                buf.append(' ');
            }
            buf.append(bundle.meta(i).toString());
        }
        throw new IncompatibleDataException(buf.toString());
    }
    // Random sampling is a bit of hack, sorry.
    // But currently, we don't (yet) have an "integer random sample" function.
    DBIDRange sids = DBIDUtil.generateStaticDBIDRange(bundle.dataLength());
    final DBIDs sample = DBIDUtil.randomSample(sids, sampling, random);
    FiniteProgress prog = LOG.isVeryVerbose() ? new FiniteProgress("kNN queries", sample.size(), LOG) : null;
    int hash = 0;
    MeanVariance mv = new MeanVariance();
    double[] buf = new double[dim];
    for (DBIDIter iditer = sample.iter(); iditer.valid(); iditer.advance()) {
        int off = sids.binarySearch(iditer);
        assert (off >= 0);
        NumberVector o = (NumberVector) bundle.data(off, col);
        for (int i = 0; i < dim; i++) {
            buf[i] = o.doubleValue(i);
        }
        O v = ofactory.newNumberVector(buf);
        double r = o.doubleValue(dim);
        DoubleDBIDList rres = rangeQuery.getRangeForObject(v, r);
        int ichecksum = 0;
        for (DBIDIter it = rres.iter(); it.valid(); it.advance()) {
            ichecksum += DBIDUtil.asInteger(it);
        }
        hash = Util.mixHashCodes(hash, ichecksum);
        mv.put(rres.size());
        LOG.incrementProcessed(prog);
    }
    LOG.ensureCompleted(prog);
    if (LOG.isStatistics()) {
        LOG.statistics("Result hashcode: " + hash);
        LOG.statistics("Mean number of results: " + mv.getMean() + " +- " + mv.getNaiveStddev());
    }
    return null;
}
Also used : DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) VectorFieldTypeInformation(de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation) TypeInformation(de.lmu.ifi.dbs.elki.data.type.TypeInformation) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) MeanVariance(de.lmu.ifi.dbs.elki.math.MeanVariance) NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) IncompatibleDataException(de.lmu.ifi.dbs.elki.utilities.exceptions.IncompatibleDataException) DBIDRange(de.lmu.ifi.dbs.elki.database.ids.DBIDRange) DoubleDBIDList(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Example 54 with AbortException

use of de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException in project elki by elki-project.

the class ValidateApproximativeKNNIndex method run.

/**
 * Run the algorithm.
 *
 * @param database Database
 * @param relation Relation
 * @return Null result
 */
public Result run(Database database, Relation<O> relation) {
    // Get a distance and kNN query instance.
    DistanceQuery<O> distQuery = database.getDistanceQuery(relation, getDistanceFunction());
    // Approximate query:
    KNNQuery<O> knnQuery = database.getKNNQuery(distQuery, k, DatabaseQuery.HINT_OPTIMIZED_ONLY);
    if (knnQuery == null || knnQuery instanceof LinearScanQuery) {
        throw new AbortException("Expected an accelerated query, but got a linear scan -- index is not used.");
    }
    // Exact query:
    KNNQuery<O> truekNNQuery;
    if (forcelinear) {
        truekNNQuery = QueryUtil.getLinearScanKNNQuery(distQuery);
    } else {
        truekNNQuery = database.getKNNQuery(distQuery, k, DatabaseQuery.HINT_EXACT);
    }
    if (knnQuery.getClass().equals(truekNNQuery.getClass())) {
        LOG.warning("Query classes are the same. This experiment may be invalid!");
    }
    // No query set - use original database.
    if (queries == null || pattern != null) {
        // Relation to filter on
        Relation<String> lrel = (pattern != null) ? DatabaseUtil.guessLabelRepresentation(database) : null;
        final DBIDs sample = DBIDUtil.randomSample(relation.getDBIDs(), sampling, random);
        FiniteProgress prog = LOG.isVeryVerbose() ? new FiniteProgress("kNN queries", sample.size(), LOG) : null;
        MeanVariance mv = new MeanVariance(), mvrec = new MeanVariance();
        MeanVariance mvdist = new MeanVariance(), mvdaerr = new MeanVariance(), mvdrerr = new MeanVariance();
        int misses = 0;
        for (DBIDIter iditer = sample.iter(); iditer.valid(); iditer.advance()) {
            if (pattern == null || pattern.matcher(lrel.get(iditer)).find()) {
                // Query index:
                KNNList knns = knnQuery.getKNNForDBID(iditer, k);
                // Query reference:
                KNNList trueknns = truekNNQuery.getKNNForDBID(iditer, k);
                // Put adjusted knn size:
                mv.put(knns.size() * k / (double) trueknns.size());
                // Put recall:
                mvrec.put(DBIDUtil.intersectionSize(knns, trueknns) / (double) trueknns.size());
                if (knns.size() >= k) {
                    double kdist = knns.getKNNDistance();
                    final double tdist = trueknns.getKNNDistance();
                    if (tdist > 0.0) {
                        mvdist.put(kdist);
                        mvdaerr.put(kdist - tdist);
                        mvdrerr.put(kdist / tdist);
                    }
                } else {
                    // Less than k objects.
                    misses++;
                }
            }
            LOG.incrementProcessed(prog);
        }
        LOG.ensureCompleted(prog);
        if (LOG.isStatistics()) {
            LOG.statistics("Mean number of results: " + mv.getMean() + " +- " + mv.getNaiveStddev());
            LOG.statistics("Recall of true results: " + mvrec.getMean() + " +- " + mvrec.getNaiveStddev());
            if (mvdist.getCount() > 0) {
                LOG.statistics("Mean k-distance: " + mvdist.getMean() + " +- " + mvdist.getNaiveStddev());
                LOG.statistics("Mean absolute k-error: " + mvdaerr.getMean() + " +- " + mvdaerr.getNaiveStddev());
                LOG.statistics("Mean relative k-error: " + mvdrerr.getMean() + " +- " + mvdrerr.getNaiveStddev());
            }
            if (misses > 0) {
                LOG.statistics(String.format("Number of queries that returned less than k=%d objects: %d (%.2f%%)", k, misses, misses * 100. / mv.getCount()));
            }
        }
    } else {
        // Separate query set.
        TypeInformation res = getDistanceFunction().getInputTypeRestriction();
        MultipleObjectsBundle bundle = queries.loadData();
        int col = -1;
        for (int i = 0; i < bundle.metaLength(); i++) {
            if (res.isAssignableFromType(bundle.meta(i))) {
                col = i;
                break;
            }
        }
        if (col < 0) {
            throw new AbortException("No compatible data type in query input was found. Expected: " + res.toString());
        }
        // Random sampling is a bit of hack, sorry.
        // But currently, we don't (yet) have an "integer random sample" function.
        DBIDRange sids = DBIDUtil.generateStaticDBIDRange(bundle.dataLength());
        final DBIDs sample = DBIDUtil.randomSample(sids, sampling, random);
        FiniteProgress prog = LOG.isVeryVerbose() ? new FiniteProgress("kNN queries", sample.size(), LOG) : null;
        MeanVariance mv = new MeanVariance(), mvrec = new MeanVariance();
        MeanVariance mvdist = new MeanVariance(), mvdaerr = new MeanVariance(), mvdrerr = new MeanVariance();
        int misses = 0;
        for (DBIDIter iditer = sample.iter(); iditer.valid(); iditer.advance()) {
            int off = sids.binarySearch(iditer);
            assert (off >= 0);
            @SuppressWarnings("unchecked") O o = (O) bundle.data(off, col);
            // Query index:
            KNNList knns = knnQuery.getKNNForObject(o, k);
            // Query reference:
            KNNList trueknns = truekNNQuery.getKNNForObject(o, k);
            // Put adjusted knn size:
            mv.put(knns.size() * k / (double) trueknns.size());
            // Put recall:
            mvrec.put(DBIDUtil.intersectionSize(knns, trueknns) / (double) trueknns.size());
            if (knns.size() >= k) {
                double kdist = knns.getKNNDistance();
                final double tdist = trueknns.getKNNDistance();
                if (tdist > 0.0) {
                    mvdist.put(kdist);
                    mvdaerr.put(kdist - tdist);
                    mvdrerr.put(kdist / tdist);
                }
            } else {
                // Less than k objects.
                misses++;
            }
            LOG.incrementProcessed(prog);
        }
        LOG.ensureCompleted(prog);
        if (LOG.isStatistics()) {
            LOG.statistics("Mean number of results: " + mv.getMean() + " +- " + mv.getNaiveStddev());
            LOG.statistics("Recall of true results: " + mvrec.getMean() + " +- " + mvrec.getNaiveStddev());
            if (mvdist.getCount() > 0) {
                LOG.statistics("Mean absolute k-error: " + mvdaerr.getMean() + " +- " + mvdaerr.getNaiveStddev());
                LOG.statistics("Mean relative k-error: " + mvdrerr.getMean() + " +- " + mvdrerr.getNaiveStddev());
            }
            if (misses > 0) {
                LOG.statistics(String.format("Number of queries that returned less than k=%d objects: %d (%.2f%%)", k, misses, misses * 100. / mv.getCount()));
            }
        }
    }
    return null;
}
Also used : DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) TypeInformation(de.lmu.ifi.dbs.elki.data.type.TypeInformation) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) MeanVariance(de.lmu.ifi.dbs.elki.math.MeanVariance) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) DBIDRange(de.lmu.ifi.dbs.elki.database.ids.DBIDRange) LinearScanQuery(de.lmu.ifi.dbs.elki.database.query.LinearScanQuery) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Example 55 with AbortException

use of de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException in project elki by elki-project.

the class HiCSDependenceMeasure method dependence.

@Override
public <A, B> double dependence(final NumberArrayAdapter<?, A> adapter1, final A data1, final NumberArrayAdapter<?, B> adapter2, final B data2) {
    final int len = size(adapter1, data1, adapter2, data2);
    final int windowsize = (int) (len * alphasqrt);
    final Random random = rnd.getSingleThreadedRandom();
    // Sorted copies for slicing.
    int[] s1 = MathUtil.sequence(0, len), s2 = MathUtil.sequence(0, len);
    IntegerArrayQuickSort.sort(s1, new IntegerComparator() {

        @Override
        public int compare(int x, int y) {
            return Double.compare(adapter1.getDouble(data1, x), adapter1.getDouble(data1, y));
        }
    });
    IntegerArrayQuickSort.sort(s2, new IntegerComparator() {

        @Override
        public int compare(int x, int y) {
            return Double.compare(adapter2.getDouble(data2, x), adapter2.getDouble(data2, y));
        }
    });
    // Distributions for testing
    double[] fullValues = new double[len];
    double[] sampleValues = new double[windowsize];
    double deviationSum = 0.;
    // For the first half, we use the first dimension as reference
    for (int i = 0; i < len; i++) {
        fullValues[i] = adapter1.getDouble(data1, i);
        if (fullValues[i] != fullValues[i]) {
            throw new AbortException("NaN values are not allowed by this implementation!");
        }
    }
    // TODO: remove bias?
    int half = m >> 1;
    for (int i = 0; i < half; ++i) {
        // Build the sample
        for (int j = random.nextInt(len - windowsize), k = 0; k < windowsize; ++k, ++j) {
            sampleValues[k] = adapter2.getDouble(data2, j);
        }
        double contrast = statTest.deviation(fullValues, sampleValues);
        if (Double.isNaN(contrast)) {
            // Retry.
            --i;
            continue;
        }
        deviationSum += contrast;
    }
    // For the second half, we use the second dimension as reference
    for (int i = 0; i < len; i++) {
        fullValues[i] = adapter2.getDouble(data2, i);
        if (fullValues[i] != fullValues[i]) {
            throw new AbortException("NaN values are not allowed by this implementation!");
        }
    }
    for (int i = half; i < m; ++i) {
        // Build the sample
        for (int j = random.nextInt(len - windowsize), k = 0; k < windowsize; ++k, ++j) {
            sampleValues[k] = adapter1.getDouble(data1, j);
        }
        double contrast = statTest.deviation(fullValues, sampleValues);
        if (Double.isNaN(contrast)) {
            // Retry.
            --i;
            continue;
        }
        deviationSum += contrast;
    }
    return deviationSum / m;
}
Also used : Random(java.util.Random) IntegerComparator(de.lmu.ifi.dbs.elki.utilities.datastructures.arrays.IntegerComparator) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Aggregations

AbortException (de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)99 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)25 IOException (java.io.IOException)24 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)22 ArrayList (java.util.ArrayList)16 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)13 MultipleObjectsBundle (de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle)13 NumberVector (de.lmu.ifi.dbs.elki.data.NumberVector)10 DBIDArrayIter (de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter)9 DoubleRelation (de.lmu.ifi.dbs.elki.database.relation.DoubleRelation)9 Clustering (de.lmu.ifi.dbs.elki.data.Clustering)8 Model (de.lmu.ifi.dbs.elki.data.model.Model)8 VectorFieldTypeInformation (de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation)8 Database (de.lmu.ifi.dbs.elki.database.Database)8 WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)8 DBIDRange (de.lmu.ifi.dbs.elki.database.ids.DBIDRange)8 OutlierResult (de.lmu.ifi.dbs.elki.result.outlier.OutlierResult)8 MaterializedDoubleRelation (de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)6 ClassLabel (de.lmu.ifi.dbs.elki.data.ClassLabel)5 DoubleVector (de.lmu.ifi.dbs.elki.data.DoubleVector)5