Search in sources :

Example 71 with AbortException

use of de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException in project elki by elki-project.

the class RandomlyChosenInitialMeans method chooseInitialMeans.

@Override
public <T extends NumberVector> double[][] chooseInitialMeans(Database database, Relation<T> relation, int k, NumberVectorDistanceFunction<? super T> distanceFunction) {
    DBIDs ids = DBIDUtil.randomSample(relation.getDBIDs(), k, rnd);
    if (ids.size() < k) {
        throw new AbortException("Could not choose k means.");
    }
    double[][] means = new double[k][];
    DBIDIter iter = ids.iter();
    for (int i = 0; i < k; i++, iter.advance()) {
        means[i] = relation.get(iter).toArray();
    }
    return means;
}
Also used : DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 72 with AbortException

use of de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException in project elki by elki-project.

the class ExternalClustering method run.

/**
 * Run the algorithm.
 *
 * @param database Database to use
 * @return Result
 */
@Override
public Clustering<? extends Model> run(Database database) {
    Clustering<? extends Model> m = null;
    try (// 
    InputStream in = FileUtil.tryGzipInput(new FileInputStream(file));
        TokenizedReader reader = CSVReaderFormat.DEFAULT_FORMAT.makeReader()) {
        Tokenizer tokenizer = reader.getTokenizer();
        reader.reset(in);
        IntArrayList assignment = new IntArrayList(database.getRelation(TypeUtil.DBID).size());
        ArrayList<String> name = new ArrayList<>();
        line: while (reader.nextLineExceptComments()) {
            for (; /* initialized by nextLineExceptComments */
            tokenizer.valid(); tokenizer.advance()) {
                try {
                    assignment.add(tokenizer.getIntBase10());
                } catch (NumberFormatException e) {
                    name.add(tokenizer.getSubstring());
                }
            }
            if (LOG.isDebuggingFinest()) {
                LOG.debugFinest("Read " + assignment.size() + " assignments and " + name.size() + " labels.");
            }
            for (Relation<?> r : database.getRelations()) {
                if (r.size() == assignment.size()) {
                    attachToRelation(database, r, assignment, name);
                    assignment.clear();
                    name.clear();
                    continue line;
                }
            }
            throw new AbortException("No relation found to match with clustering of size " + assignment.size());
        }
    } catch (IOException e) {
        throw new AbortException("Could not load outlier scores: " + e.getMessage() + " when loading " + file, e);
    }
    return m;
}
Also used : FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) ArrayList(java.util.ArrayList) IntArrayList(it.unimi.dsi.fastutil.ints.IntArrayList) IOException(java.io.IOException) FileInputStream(java.io.FileInputStream) Relation(de.lmu.ifi.dbs.elki.database.relation.Relation) TokenizedReader(de.lmu.ifi.dbs.elki.utilities.io.TokenizedReader) IntArrayList(it.unimi.dsi.fastutil.ints.IntArrayList) Tokenizer(de.lmu.ifi.dbs.elki.utilities.io.Tokenizer) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Example 73 with AbortException

use of de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException in project elki by elki-project.

the class ExternalClustering method attachToRelation.

/**
 * Build a clustering from the file result.
 *
 * @param database Database
 * @param r Result to attach to
 * @param assignment Cluster assignment
 * @param name Name
 */
private void attachToRelation(Database database, Relation<?> r, IntArrayList assignment, ArrayList<String> name) {
    DBIDs ids = r.getDBIDs();
    if (!(ids instanceof ArrayDBIDs)) {
        throw new AbortException("External clusterings can only be used with static DBIDs.");
    }
    Int2IntOpenHashMap sizes = new Int2IntOpenHashMap();
    for (IntListIterator it = assignment.iterator(); it.hasNext(); ) {
        sizes.addTo(it.nextInt(), 1);
    }
    Int2ObjectOpenHashMap<ArrayModifiableDBIDs> cids = new Int2ObjectOpenHashMap<>(sizes.size());
    for (ObjectIterator<Int2IntMap.Entry> it = sizes.int2IntEntrySet().fastIterator(); it.hasNext(); ) {
        Int2IntMap.Entry entry = it.next();
        cids.put(entry.getIntKey(), DBIDUtil.newArray(entry.getIntValue()));
    }
    {
        DBIDArrayIter it = ((ArrayDBIDs) ids).iter();
        for (int i = 0; i < assignment.size(); i++) {
            cids.get(assignment.getInt(i)).add(it.seek(i));
        }
    }
    String nam = FormatUtil.format(name, " ");
    String snam = nam.toLowerCase().replace(' ', '-');
    Clustering<ClusterModel> result = new Clustering<>(nam, snam);
    for (ObjectIterator<Int2ObjectMap.Entry<ArrayModifiableDBIDs>> it = cids.int2ObjectEntrySet().fastIterator(); it.hasNext(); ) {
        Int2ObjectMap.Entry<ArrayModifiableDBIDs> entry = it.next();
        boolean noise = entry.getIntKey() < 0;
        result.addToplevelCluster(new Cluster<>(entry.getValue(), noise, ClusterModel.CLUSTER));
    }
    database.getHierarchy().add(r, result);
}
Also used : Int2ObjectOpenHashMap(it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap) IntListIterator(it.unimi.dsi.fastutil.ints.IntListIterator) ArrayDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) Int2ObjectMap(it.unimi.dsi.fastutil.ints.Int2ObjectMap) DBIDArrayIter(de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter) Clustering(de.lmu.ifi.dbs.elki.data.Clustering) Int2IntOpenHashMap(it.unimi.dsi.fastutil.ints.Int2IntOpenHashMap) ClusterModel(de.lmu.ifi.dbs.elki.data.model.ClusterModel) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) ArrayDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs) Int2IntMap(it.unimi.dsi.fastutil.ints.Int2IntMap) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Example 74 with AbortException

use of de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException in project elki by elki-project.

the class EvaluateConcordantPairs method evaluateClustering.

/**
 * Evaluate a single clustering.
 *
 * @param db Database
 * @param rel Data relation
 * @param c Clustering
 * @return Gamma index
 */
public double evaluateClustering(Database db, Relation<? extends NumberVector> rel, Clustering<?> c) {
    List<? extends Cluster<?>> clusters = c.getAllClusters();
    int ignorednoise = 0, withinPairs = 0;
    for (Cluster<?> cluster : clusters) {
        if ((cluster.size() <= 1 || cluster.isNoise())) {
            switch(noiseHandling) {
                case IGNORE_NOISE:
                    ignorednoise += cluster.size();
                    continue;
                case TREAT_NOISE_AS_SINGLETONS:
                    // No concordant distances.
                    continue;
                case MERGE_NOISE:
                    // Treat like a cluster below.
                    break;
            }
        }
        withinPairs += (cluster.size() * (cluster.size() - 1)) >>> 1;
        if (withinPairs < 0) {
            throw new AbortException("Integer overflow - clusters too large to compute pairwise distances.");
        }
    }
    // Materialize within-cluster distances (sorted):
    double[] withinDistances = computeWithinDistances(rel, clusters, withinPairs);
    int[] withinTies = new int[withinDistances.length];
    // Count ties within
    countTies(withinDistances, withinTies);
    long concordantPairs = 0, discordantPairs = 0, betweenPairs = 0;
    // Step two, compute discordant distances:
    for (int i = 0; i < clusters.size(); i++) {
        Cluster<?> ocluster1 = clusters.get(i);
        if (// 
        (ocluster1.size() <= 1 || ocluster1.isNoise()) && noiseHandling.equals(NoiseHandling.IGNORE_NOISE)) {
            continue;
        }
        for (int j = i + 1; j < clusters.size(); j++) {
            Cluster<?> ocluster2 = clusters.get(j);
            if (// 
            (ocluster2.size() <= 1 || ocluster2.isNoise()) && noiseHandling.equals(NoiseHandling.IGNORE_NOISE)) {
                continue;
            }
            betweenPairs += ocluster1.size() * ocluster2.size();
            for (DBIDIter oit1 = ocluster1.getIDs().iter(); oit1.valid(); oit1.advance()) {
                NumberVector obj = rel.get(oit1);
                for (DBIDIter oit2 = ocluster2.getIDs().iter(); oit2.valid(); oit2.advance()) {
                    double dist = distanceFunction.distance(obj, rel.get(oit2));
                    int p = Arrays.binarySearch(withinDistances, dist);
                    if (p >= 0) {
                        // Tied distances:
                        while (p > 0 && withinDistances[p - 1] >= dist) {
                            --p;
                        }
                        concordantPairs += p;
                        discordantPairs += withinDistances.length - p - withinTies[p];
                        continue;
                    }
                    p = -p - 1;
                    concordantPairs += p;
                    discordantPairs += withinDistances.length - p;
                }
            }
        }
    }
    // Total number of pairs possible:
    final long t = ((rel.size() - ignorednoise) * (long) (rel.size() - ignorednoise - 1)) >>> 1;
    final long tt = (t * (t - 1)) >>> 1;
    double gamma = (concordantPairs - discordantPairs) / (double) (concordantPairs + discordantPairs);
    double tau = computeTau(concordantPairs, discordantPairs, tt, withinDistances.length, betweenPairs);
    // Avoid NaN when everything is in a single cluster:
    gamma = gamma > 0. ? gamma : 0.;
    tau = tau > 0. ? tau : 0.;
    if (LOG.isStatistics()) {
        LOG.statistics(new StringStatistic(key + ".pbm.noise-handling", noiseHandling.toString()));
        if (ignorednoise > 0) {
            LOG.statistics(new LongStatistic(key + ".pbm.ignored", ignorednoise));
        }
        LOG.statistics(new DoubleStatistic(key + ".gamma", gamma));
        LOG.statistics(new DoubleStatistic(key + ".tau", tau));
    }
    EvaluationResult ev = EvaluationResult.findOrCreate(db.getHierarchy(), c, "Internal Clustering Evaluation", "internal evaluation");
    MeasurementGroup g = ev.findOrCreateGroup("Concordance-based Evaluation");
    g.addMeasure("Gamma", gamma, -1., 1., 0., false);
    g.addMeasure("Tau", tau, -1., +1., 0., false);
    db.getHierarchy().resultChanged(ev);
    return gamma;
}
Also used : MeasurementGroup(de.lmu.ifi.dbs.elki.result.EvaluationResult.MeasurementGroup) EvaluationResult(de.lmu.ifi.dbs.elki.result.EvaluationResult) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) DoubleStatistic(de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic) StringStatistic(de.lmu.ifi.dbs.elki.logging.statistics.StringStatistic) NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) LongStatistic(de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Example 75 with AbortException

use of de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException in project elki by elki-project.

the class PrecomputedSimilarityMatrix method initialize.

@Override
public void initialize() {
    DBIDs rids = relation.getDBIDs();
    if (!(rids instanceof DBIDRange)) {
        throw new AbortException("Similarity matrixes are currently only supported for DBID ranges (as used by static databases) for performance reasons (Patches welcome).");
    }
    ids = (DBIDRange) rids;
    size = ids.size();
    if (size > 65536) {
        throw new AbortException("Similarity matrixes currently have a limit of 65536 objects (~16 GB). After this, the array size exceeds the Java integer range, and a different data structure needs to be used.");
    }
    similarityQuery = similarityFunction.instantiate(relation);
    int msize = triangleSize(size);
    matrix = new double[msize];
    DBIDArrayIter ix = ids.iter(), iy = ids.iter();
    FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Precomputing similarity matrix", msize, LOG) : null;
    int pos = 0;
    for (ix.seek(0); ix.valid(); ix.advance()) {
        // y < x -- must match {@link #getOffset}!
        for (iy.seek(0); iy.getOffset() < ix.getOffset(); iy.advance()) {
            matrix[pos] = similarityQuery.similarity(ix, iy);
            pos++;
        }
        if (prog != null) {
            prog.setProcessed(prog.getProcessed() + ix.getOffset(), LOG);
        }
    }
    LOG.ensureCompleted(prog);
}
Also used : DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) DBIDRange(de.lmu.ifi.dbs.elki.database.ids.DBIDRange) DBIDArrayIter(de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Aggregations

AbortException (de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)99 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)25 IOException (java.io.IOException)24 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)22 ArrayList (java.util.ArrayList)16 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)13 MultipleObjectsBundle (de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle)13 NumberVector (de.lmu.ifi.dbs.elki.data.NumberVector)10 DBIDArrayIter (de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter)9 DoubleRelation (de.lmu.ifi.dbs.elki.database.relation.DoubleRelation)9 Clustering (de.lmu.ifi.dbs.elki.data.Clustering)8 Model (de.lmu.ifi.dbs.elki.data.model.Model)8 VectorFieldTypeInformation (de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation)8 Database (de.lmu.ifi.dbs.elki.database.Database)8 WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)8 DBIDRange (de.lmu.ifi.dbs.elki.database.ids.DBIDRange)8 OutlierResult (de.lmu.ifi.dbs.elki.result.outlier.OutlierResult)8 MaterializedDoubleRelation (de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)6 ClassLabel (de.lmu.ifi.dbs.elki.data.ClassLabel)5 DoubleVector (de.lmu.ifi.dbs.elki.data.DoubleVector)5