Search in sources :

Example 6 with Int2ObjectOpenHashMap

use of it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap in project elki by elki-project.

the class ArffParser method loadSparseInstance.

private Object[] loadSparseInstance(StreamTokenizer tokenizer, int[] targ, int[] dimsize, TypeInformation[] elkitypes, int metaLength) throws IOException {
    Int2ObjectOpenHashMap<Object> map = new Int2ObjectOpenHashMap<>();
    while (true) {
        nextToken(tokenizer);
        assert (tokenizer.ttype != StreamTokenizer.TT_EOF && tokenizer.ttype != StreamTokenizer.TT_EOL);
        if (tokenizer.ttype == '}') {
            nextToken(tokenizer);
            assert (tokenizer.ttype == StreamTokenizer.TT_EOF || tokenizer.ttype == StreamTokenizer.TT_EOL);
            break;
        } else {
            // sparse token
            if (tokenizer.ttype != StreamTokenizer.TT_WORD) {
                throw new AbortException("Unexpected token type encountered: " + tokenizer.toString() + " type: " + tokenizer.ttype);
            }
            int dim = ParseUtil.parseIntBase10(tokenizer.sval);
            if (map.containsKey(dim)) {
                throw new AbortException("Duplicate key in sparse vector: " + tokenizer.toString());
            }
            nextToken(tokenizer);
            if (tokenizer.ttype == StreamTokenizer.TT_WORD) {
                map.put(dim, // 
                TypeUtil.NUMBER_VECTOR_FIELD.equals(elkitypes[targ[dim]]) ? (Double) ParseUtil.parseDouble(tokenizer.sval) : tokenizer.sval);
            } else {
                throw new AbortException("Unexpected token type encountered: " + tokenizer.toString());
            }
        }
    }
    Object[] data = new Object[metaLength];
    for (int out = 0; out < metaLength; out++) {
        // Find the first index
        int s = -1;
        for (int i = 0; i < targ.length; i++) {
            if (targ[i] == out && s < 0) {
                s = i;
                break;
            }
        }
        assert (s >= 0);
        if (TypeUtil.NUMBER_VECTOR_FIELD.equals(elkitypes[out])) {
            Int2DoubleOpenHashMap f = new Int2DoubleOpenHashMap(dimsize[out]);
            for (ObjectIterator<Int2ObjectMap.Entry<Object>> iter = map.int2ObjectEntrySet().fastIterator(); iter.hasNext(); ) {
                Int2ObjectMap.Entry<Object> entry = iter.next();
                int i = entry.getIntKey();
                if (i < s || i >= s + dimsize[out]) {
                    continue;
                }
                double v = ((Double) entry.getValue()).doubleValue();
                f.put(i - s, v);
            }
            data[out] = new SparseDoubleVector(f, dimsize[out]);
        } else if (TypeUtil.LABELLIST.equals(elkitypes[out])) {
            // Build a label list out of successive labels
            labels.clear();
            for (ObjectIterator<Int2ObjectMap.Entry<Object>> iter = map.int2ObjectEntrySet().fastIterator(); iter.hasNext(); ) {
                Int2ObjectMap.Entry<Object> entry = iter.next();
                int i = entry.getIntKey();
                if (i < s) {
                    continue;
                }
                if (i >= s + dimsize[out]) {
                    break;
                }
                if (labels.size() < i - s) {
                    LOG.warning("Sparse consecutive labels are currently not correctly supported.");
                }
                labels.add((String) entry.getValue());
            }
            data[out] = LabelList.make(labels);
        } else if (TypeUtil.EXTERNALID.equals(elkitypes[out])) {
            String val = (String) map.get(s);
            if (val == null) {
                throw new AbortException("External ID column not set in sparse instance." + tokenizer.toString());
            }
            data[out] = new ExternalID(val);
        } else if (TypeUtil.CLASSLABEL.equals(elkitypes[out])) {
            Object val = map.get(s);
            if (val == null) {
                throw new AbortException("Class label column not set in sparse instance." + tokenizer.toString());
            }
            // TODO: support other class label types.
            ClassLabel lbl = new SimpleClassLabel(String.valueOf(val));
            data[out] = lbl;
        } else {
            throw new AbortException("Unsupported type for column " + "->" + out + ": " + ((elkitypes[out] != null) ? elkitypes[out].toString() : "null"));
        }
    }
    return data;
}
Also used : Int2ObjectOpenHashMap(it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap) ExternalID(de.lmu.ifi.dbs.elki.data.ExternalID) Int2ObjectMap(it.unimi.dsi.fastutil.ints.Int2ObjectMap) SimpleClassLabel(de.lmu.ifi.dbs.elki.data.SimpleClassLabel) SparseDoubleVector(de.lmu.ifi.dbs.elki.data.SparseDoubleVector) ObjectIterator(it.unimi.dsi.fastutil.objects.ObjectIterator) SimpleClassLabel(de.lmu.ifi.dbs.elki.data.SimpleClassLabel) ClassLabel(de.lmu.ifi.dbs.elki.data.ClassLabel) Int2DoubleOpenHashMap(it.unimi.dsi.fastutil.ints.Int2DoubleOpenHashMap) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Example 7 with Int2ObjectOpenHashMap

use of it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap in project elki by elki-project.

the class MiniMaxAnderberg method run.

/**
 * Run the algorithm
 *
 * @param db Database
 * @param relation Relation
 * @return Clustering hierarchy
 */
public PointerHierarchyRepresentationResult run(Database db, Relation<O> relation) {
    DistanceQuery<O> dq = DatabaseUtil.precomputedDistanceQuery(db, relation, getDistanceFunction(), LOG);
    final DBIDs ids = relation.getDBIDs();
    final int size = ids.size();
    // Initialize space for result:
    PointerHierarchyRepresentationBuilder builder = new PointerHierarchyRepresentationBuilder(ids, dq.getDistanceFunction().isSquared());
    Int2ObjectOpenHashMap<ModifiableDBIDs> clusters = new Int2ObjectOpenHashMap<>();
    // Compute the initial (lower triangular) distance matrix.
    MatrixParadigm mat = new MatrixParadigm(ids);
    ArrayModifiableDBIDs prots = DBIDUtil.newArray(MatrixParadigm.triangleSize(size));
    DBIDArrayMIter protiter = prots.iter();
    MiniMax.initializeMatrices(mat, prots, dq);
    // Arrays used for caching:
    double[] bestd = new double[size];
    int[] besti = new int[size];
    initializeNNCache(mat.matrix, bestd, besti);
    // Repeat until everything merged into 1 cluster
    FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Agglomerative clustering", size - 1, LOG) : null;
    DBIDArrayIter ix = mat.ix;
    for (int i = 1, end = size; i < size; i++) {
        end = // 
        AGNES.shrinkActiveSet(// 
        ix, // 
        builder, // 
        end, findMerge(end, mat, protiter, builder, clusters, bestd, besti, dq));
        LOG.incrementProcessed(prog);
    }
    LOG.ensureCompleted(prog);
    return (PointerPrototypeHierarchyRepresentationResult) builder.complete();
}
Also used : Int2ObjectOpenHashMap(it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)

Example 8 with Int2ObjectOpenHashMap

use of it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap in project elki by elki-project.

the class MiniMax method run.

/**
 * Run the algorithm on a database.
 *
 * @param db Database
 * @param relation Relation to process.
 * @return Hierarchical result
 */
public PointerPrototypeHierarchyRepresentationResult run(Database db, Relation<O> relation) {
    DistanceQuery<O> dq = DatabaseUtil.precomputedDistanceQuery(db, relation, getDistanceFunction(), LOG);
    final DBIDs ids = relation.getDBIDs();
    final int size = ids.size();
    // Initialize space for result:
    PointerHierarchyRepresentationBuilder builder = new PointerHierarchyRepresentationBuilder(ids, dq.getDistanceFunction().isSquared());
    Int2ObjectOpenHashMap<ModifiableDBIDs> clusters = new Int2ObjectOpenHashMap<>(size);
    // Allocate working space:
    MatrixParadigm mat = new MatrixParadigm(ids);
    ArrayModifiableDBIDs prots = DBIDUtil.newArray(MatrixParadigm.triangleSize(size));
    initializeMatrices(mat, prots, dq);
    DBIDArrayMIter protiter = prots.iter();
    FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("MiniMax clustering", size - 1, LOG) : null;
    DBIDArrayIter ix = mat.ix;
    for (int i = 1, end = size; i < size; i++) {
        end = // 
        AGNES.shrinkActiveSet(// 
        ix, // 
        builder, // 
        end, findMerge(end, mat, protiter, builder, clusters, dq));
        LOG.incrementProcessed(progress);
    }
    LOG.ensureCompleted(progress);
    return (PointerPrototypeHierarchyRepresentationResult) builder.complete();
}
Also used : Int2ObjectOpenHashMap(it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)

Example 9 with Int2ObjectOpenHashMap

use of it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap in project elki by elki-project.

the class AffinityPropagationClusteringAlgorithm method run.

/**
 * Perform affinity propagation clustering.
 *
 * @param db Database
 * @param relation Relation
 * @return Clustering result
 */
public Clustering<MedoidModel> run(Database db, Relation<O> relation) {
    ArrayDBIDs ids = DBIDUtil.ensureArray(relation.getDBIDs());
    final int size = ids.size();
    int[] assignment = new int[size];
    double[][] s = initialization.getSimilarityMatrix(db, relation, ids);
    double[][] r = new double[size][size];
    double[][] a = new double[size][size];
    IndefiniteProgress prog = LOG.isVerbose() ? new IndefiniteProgress("Affinity Propagation Iteration", LOG) : null;
    MutableProgress aprog = LOG.isVerbose() ? new MutableProgress("Stable assignments", size + 1, LOG) : null;
    int inactive = 0;
    for (int iteration = 0; iteration < maxiter && inactive < convergence; iteration++) {
        // Update responsibility matrix:
        for (int i = 0; i < size; i++) {
            double[] ai = a[i], ri = r[i], si = s[i];
            // Find the two largest values (as initially maxk == i)
            double max1 = Double.NEGATIVE_INFINITY, max2 = Double.NEGATIVE_INFINITY;
            int maxk = -1;
            for (int k = 0; k < size; k++) {
                double val = ai[k] + si[k];
                if (val > max1) {
                    max2 = max1;
                    max1 = val;
                    maxk = k;
                } else if (val > max2) {
                    max2 = val;
                }
            }
            // With the maximum value known, update r:
            for (int k = 0; k < size; k++) {
                double val = si[k] - ((k != maxk) ? max1 : max2);
                ri[k] = ri[k] * lambda + val * (1. - lambda);
            }
        }
        // Update availability matrix
        for (int k = 0; k < size; k++) {
            // Compute sum of max(0, r_ik) for all i.
            // For r_kk, don't apply the max.
            double colposum = 0.;
            for (int i = 0; i < size; i++) {
                if (i == k || r[i][k] > 0.) {
                    colposum += r[i][k];
                }
            }
            for (int i = 0; i < size; i++) {
                double val = colposum;
                // Adjust column sum by the one extra term.
                if (i == k || r[i][k] > 0.) {
                    val -= r[i][k];
                }
                if (i != k && val > 0.) {
                    // min
                    val = 0.;
                }
                a[i][k] = a[i][k] * lambda + val * (1 - lambda);
            }
        }
        int changed = 0;
        for (int i = 0; i < size; i++) {
            double[] ai = a[i], ri = r[i];
            double max = Double.NEGATIVE_INFINITY;
            int maxj = -1;
            for (int j = 0; j < size; j++) {
                double v = ai[j] + ri[j];
                if (v > max || (i == j && v >= max)) {
                    max = v;
                    maxj = j;
                }
            }
            if (assignment[i] != maxj) {
                changed += 1;
                assignment[i] = maxj;
            }
        }
        inactive = (changed > 0) ? 0 : (inactive + 1);
        LOG.incrementProcessed(prog);
        if (aprog != null) {
            aprog.setProcessed(size - changed, LOG);
        }
    }
    if (aprog != null) {
        aprog.setProcessed(aprog.getTotal(), LOG);
    }
    LOG.setCompleted(prog);
    // Cluster map, by lead object
    Int2ObjectOpenHashMap<ModifiableDBIDs> map = new Int2ObjectOpenHashMap<>();
    DBIDArrayIter i1 = ids.iter();
    for (int i = 0; i1.valid(); i1.advance(), i++) {
        int c = assignment[i];
        // Add to cluster members:
        ModifiableDBIDs cids = map.get(c);
        if (cids == null) {
            cids = DBIDUtil.newArray();
            map.put(c, cids);
        }
        cids.add(i1);
    }
    // If we stopped early, the cluster lead might be in a different cluster.
    for (ObjectIterator<Int2ObjectOpenHashMap.Entry<ModifiableDBIDs>> iter = map.int2ObjectEntrySet().fastIterator(); iter.hasNext(); ) {
        Int2ObjectOpenHashMap.Entry<ModifiableDBIDs> entry = iter.next();
        final int key = entry.getIntKey();
        int targetkey = key;
        ModifiableDBIDs tids = null;
        // Chase arrows:
        while (ids == null && assignment[targetkey] != targetkey) {
            targetkey = assignment[targetkey];
            tids = map.get(targetkey);
        }
        if (tids != null && targetkey != key) {
            tids.addDBIDs(entry.getValue());
            iter.remove();
        }
    }
    Clustering<MedoidModel> clustering = new Clustering<>("Affinity Propagation Clustering", "ap-clustering");
    ModifiableDBIDs noise = DBIDUtil.newArray();
    for (ObjectIterator<Int2ObjectOpenHashMap.Entry<ModifiableDBIDs>> iter = map.int2ObjectEntrySet().fastIterator(); iter.hasNext(); ) {
        Int2ObjectOpenHashMap.Entry<ModifiableDBIDs> entry = iter.next();
        i1.seek(entry.getIntKey());
        if (entry.getValue().size() > 1) {
            MedoidModel mod = new MedoidModel(DBIDUtil.deref(i1));
            clustering.addToplevelCluster(new Cluster<>(entry.getValue(), mod));
        } else {
            noise.add(i1);
        }
    }
    if (noise.size() > 0) {
        MedoidModel mod = new MedoidModel(DBIDUtil.deref(noise.iter()));
        clustering.addToplevelCluster(new Cluster<>(noise, true, mod));
    }
    return clustering;
}
Also used : Int2ObjectOpenHashMap(it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap) DBIDArrayIter(de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter) Clustering(de.lmu.ifi.dbs.elki.data.Clustering) MedoidModel(de.lmu.ifi.dbs.elki.data.model.MedoidModel) MutableProgress(de.lmu.ifi.dbs.elki.logging.progress.MutableProgress) IndefiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress) ArrayDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)

Example 10 with Int2ObjectOpenHashMap

use of it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap in project elki by elki-project.

the class ClusteringVectorParser method nextEvent.

@Override
public Event nextEvent() {
    if (nextevent != null) {
        Event ret = nextevent;
        nextevent = null;
        return ret;
    }
    try {
        while (reader.nextLineExceptComments()) {
            buf1.clear();
            lbl.clear();
            Int2IntOpenHashMap csize = new Int2IntOpenHashMap();
            String name = null;
            for (; /* initialized by nextLineExceptComments() */
            tokenizer.valid(); tokenizer.advance()) {
                try {
                    int cnum = tokenizer.getIntBase10();
                    buf1.add(cnum);
                    // Update cluster sizes:
                    csize.addTo(cnum, 1);
                } catch (NumberFormatException e) {
                    final String label = tokenizer.getSubstring();
                    lbl.add(label);
                    if (name == null) {
                        name = label;
                    }
                }
            }
            if (name == null) {
                name = "Cluster";
            }
            // Update meta on first record:
            boolean metaupdate = (range == null);
            if (range == null) {
                range = DBIDUtil.generateStaticDBIDRange(buf1.size());
            }
            if (buf1.size() != range.size()) {
                throw new AbortException("Clusterings do not contain the same number of elements!");
            }
            // Build clustering to store in the relation.
            Int2ObjectOpenHashMap<ModifiableDBIDs> clusters = new Int2ObjectOpenHashMap<>(csize.size());
            curclu = new Clustering<>(name, name);
            for (ObjectIterator<Int2IntMap.Entry> iter = csize.int2IntEntrySet().fastIterator(); iter.hasNext(); ) {
                Int2IntMap.Entry entry = iter.next();
                if (entry.getIntValue() > 0) {
                    clusters.put(entry.getIntKey(), DBIDUtil.newArray(entry.getIntValue()));
                }
            }
            DBIDArrayIter iter = range.iter();
            for (int i = 0; i < buf1.size(); i++) {
                clusters.get(buf1.getInt(i)).add(iter.seek(i));
            }
            for (ModifiableDBIDs cids : clusters.values()) {
                curclu.addToplevelCluster(new Cluster<Model>(cids, ClusterModel.CLUSTER));
            }
            // Label handling.
            if (!haslbl && !lbl.isEmpty()) {
                haslbl = true;
                metaupdate = true;
            }
            curlbl = LabelList.make(lbl);
            if (metaupdate) {
                // Force a meta update.
                nextevent = Event.NEXT_OBJECT;
                return Event.META_CHANGED;
            }
            return Event.NEXT_OBJECT;
        }
        return Event.END_OF_STREAM;
    } catch (IOException e) {
        throw new IllegalArgumentException("Error while parsing line " + reader.getLineNumber() + ".");
    }
}
Also used : Int2ObjectOpenHashMap(it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap) DBIDArrayIter(de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter) IOException(java.io.IOException) Int2IntOpenHashMap(it.unimi.dsi.fastutil.ints.Int2IntOpenHashMap) ClusterModel(de.lmu.ifi.dbs.elki.data.model.ClusterModel) Model(de.lmu.ifi.dbs.elki.data.model.Model) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) Int2IntMap(it.unimi.dsi.fastutil.ints.Int2IntMap) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Aggregations

Int2ObjectOpenHashMap (it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap)29 Int2ObjectMap (it.unimi.dsi.fastutil.ints.Int2ObjectMap)8 List (java.util.List)7 IOException (java.io.IOException)6 ArrayList (java.util.ArrayList)6 HashSet (java.util.HashSet)6 Int2IntOpenHashMap (it.unimi.dsi.fastutil.ints.Int2IntOpenHashMap)5 HashMap (java.util.HashMap)5 JsonProperty (com.fasterxml.jackson.annotation.JsonProperty)2 Preconditions (com.google.common.base.Preconditions)2 Stopwatch (com.google.common.base.Stopwatch)2 ImmutableMap (com.google.common.collect.ImmutableMap)2 Maps (com.google.common.collect.Maps)2 DBIDArrayIter (de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter)2 ModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)2 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)2 AbortException (de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)2 ObjectIterator (it.unimi.dsi.fastutil.objects.ObjectIterator)2 LinkedHashMap (java.util.LinkedHashMap)2 LinkedHashSet (java.util.LinkedHashSet)2