Search in sources :

Example 41 with ModifiableDBIDs

use of de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs in project elki by elki-project.

the class ClusteringVectorParser method nextEvent.

@Override
public Event nextEvent() {
    if (nextevent != null) {
        Event ret = nextevent;
        nextevent = null;
        return ret;
    }
    try {
        while (reader.nextLineExceptComments()) {
            buf1.clear();
            lbl.clear();
            Int2IntOpenHashMap csize = new Int2IntOpenHashMap();
            String name = null;
            for (; /* initialized by nextLineExceptComments() */
            tokenizer.valid(); tokenizer.advance()) {
                try {
                    int cnum = tokenizer.getIntBase10();
                    buf1.add(cnum);
                    // Update cluster sizes:
                    csize.addTo(cnum, 1);
                } catch (NumberFormatException e) {
                    final String label = tokenizer.getSubstring();
                    lbl.add(label);
                    if (name == null) {
                        name = label;
                    }
                }
            }
            if (name == null) {
                name = "Cluster";
            }
            // Update meta on first record:
            boolean metaupdate = (range == null);
            if (range == null) {
                range = DBIDUtil.generateStaticDBIDRange(buf1.size());
            }
            if (buf1.size() != range.size()) {
                throw new AbortException("Clusterings do not contain the same number of elements!");
            }
            // Build clustering to store in the relation.
            Int2ObjectOpenHashMap<ModifiableDBIDs> clusters = new Int2ObjectOpenHashMap<>(csize.size());
            curclu = new Clustering<>(name, name);
            for (ObjectIterator<Int2IntMap.Entry> iter = csize.int2IntEntrySet().fastIterator(); iter.hasNext(); ) {
                Int2IntMap.Entry entry = iter.next();
                if (entry.getIntValue() > 0) {
                    clusters.put(entry.getIntKey(), DBIDUtil.newArray(entry.getIntValue()));
                }
            }
            DBIDArrayIter iter = range.iter();
            for (int i = 0; i < buf1.size(); i++) {
                clusters.get(buf1.getInt(i)).add(iter.seek(i));
            }
            for (ModifiableDBIDs cids : clusters.values()) {
                curclu.addToplevelCluster(new Cluster<Model>(cids, ClusterModel.CLUSTER));
            }
            // Label handling.
            if (!haslbl && !lbl.isEmpty()) {
                haslbl = true;
                metaupdate = true;
            }
            curlbl = LabelList.make(lbl);
            if (metaupdate) {
                // Force a meta update.
                nextevent = Event.NEXT_OBJECT;
                return Event.META_CHANGED;
            }
            return Event.NEXT_OBJECT;
        }
        return Event.END_OF_STREAM;
    } catch (IOException e) {
        throw new IllegalArgumentException("Error while parsing line " + reader.getLineNumber() + ".");
    }
}
Also used : Int2ObjectOpenHashMap(it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap) DBIDArrayIter(de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter) IOException(java.io.IOException) Int2IntOpenHashMap(it.unimi.dsi.fastutil.ints.Int2IntOpenHashMap) ClusterModel(de.lmu.ifi.dbs.elki.data.model.ClusterModel) Model(de.lmu.ifi.dbs.elki.data.model.Model) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) Int2IntMap(it.unimi.dsi.fastutil.ints.Int2IntMap) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Example 42 with ModifiableDBIDs

use of de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs in project elki by elki-project.

the class LinearWeightedExtendedNeighborhood method getWeightedNeighbors.

@Override
public Collection<DoubleDBIDPair> getWeightedNeighbors(DBIDRef reference) {
    ModifiableDBIDs seen = DBIDUtil.newHashSet();
    List<DoubleDBIDPair> result = new ArrayList<>();
    // Add starting object
    result.add(DBIDUtil.newPair(computeWeight(0), reference));
    seen.add(reference);
    // Extend.
    DBIDs cur = DBIDUtil.deref(reference);
    for (int i = 1; i <= steps; i++) {
        final double weight = computeWeight(i);
        // Collect newly discovered IDs
        ModifiableDBIDs add = DBIDUtil.newHashSet();
        for (DBIDIter iter = cur.iter(); iter.valid(); iter.advance()) {
            for (DBIDIter iter2 = inner.getNeighborDBIDs(iter).iter(); iter2.valid(); iter2.advance()) {
                // Seen before?
                if (seen.contains(iter2)) {
                    continue;
                }
                add.add(iter2);
                result.add(DBIDUtil.newPair(weight, iter2));
            }
        }
        if (add.size() == 0) {
            break;
        }
        cur = add;
    }
    return result;
}
Also used : DoubleDBIDPair(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDPair) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) ArrayList(java.util.ArrayList) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 43 with ModifiableDBIDs

use of de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs in project elki by elki-project.

the class ComputeOutlierHistogram method evaluateOutlierResult.

/**
 * Evaluate a single outlier result as histogram.
 *
 * @param database Database to process
 * @param or Outlier result
 * @return Result
 */
public HistogramResult evaluateOutlierResult(Database database, OutlierResult or) {
    if (scaling instanceof OutlierScalingFunction) {
        OutlierScalingFunction oscaling = (OutlierScalingFunction) scaling;
        oscaling.prepare(or);
    }
    ModifiableDBIDs ids = DBIDUtil.newHashSet(or.getScores().getDBIDs());
    DBIDs outlierIds = DatabaseUtil.getObjectsByLabelMatch(database, positiveClassName);
    // first value for outliers, second for each object
    // If we have useful (finite) min/max, use these for binning.
    double min = scaling.getMin();
    double max = scaling.getMax();
    final ObjHistogram<DoubleDoublePair> hist;
    if (Double.isInfinite(min) || Double.isNaN(min) || Double.isInfinite(max) || Double.isNaN(max)) {
        hist = new AbstractObjDynamicHistogram<DoubleDoublePair>(bins) {

            @Override
            public DoubleDoublePair aggregate(DoubleDoublePair first, DoubleDoublePair second) {
                first.first += second.first;
                first.second += second.second;
                return first;
            }

            @Override
            protected DoubleDoublePair makeObject() {
                return new DoubleDoublePair(0., 0.);
            }

            @Override
            protected DoubleDoublePair cloneForCache(DoubleDoublePair data) {
                return new DoubleDoublePair(data.first, data.second);
            }

            @Override
            protected DoubleDoublePair downsample(Object[] data, int start, int end, int size) {
                DoubleDoublePair sum = new DoubleDoublePair(0, 0);
                for (int i = start; i < end; i++) {
                    DoubleDoublePair p = (DoubleDoublePair) data[i];
                    if (p != null) {
                        sum.first += p.first;
                        sum.second += p.second;
                    }
                }
                return sum;
            }
        };
    } else {
        hist = new AbstractObjStaticHistogram<DoubleDoublePair>(bins, min, max) {

            @Override
            protected DoubleDoublePair makeObject() {
                return new DoubleDoublePair(0., 0.);
            }

            @Override
            public void putData(double coord, DoubleDoublePair data) {
                DoubleDoublePair exist = get(coord);
                exist.first += data.first;
                exist.second += data.second;
            }
        };
    }
    // first fill histogram only with values of outliers
    DoubleDoublePair negative, positive;
    if (!splitfreq) {
        negative = new DoubleDoublePair(1. / ids.size(), 0);
        positive = new DoubleDoublePair(0, 1. / ids.size());
    } else {
        negative = new DoubleDoublePair(1. / (ids.size() - outlierIds.size()), 0);
        positive = new DoubleDoublePair(0, 1. / outlierIds.size());
    }
    ids.removeDBIDs(outlierIds);
    // fill histogram with values of each object
    for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
        double result = or.getScores().doubleValue(iter);
        result = scaling.getScaled(result);
        if (result > Double.NEGATIVE_INFINITY && result < Double.POSITIVE_INFINITY) {
            hist.putData(result, negative);
        }
    }
    for (DBIDIter iter = outlierIds.iter(); iter.valid(); iter.advance()) {
        double result = or.getScores().doubleValue(iter);
        result = scaling.getScaled(result);
        if (result > Double.NEGATIVE_INFINITY && result < Double.POSITIVE_INFINITY) {
            hist.putData(result, positive);
        }
    }
    Collection<double[]> collHist = new ArrayList<>(hist.getNumBins());
    for (ObjHistogram.Iter<DoubleDoublePair> iter = hist.iter(); iter.valid(); iter.advance()) {
        DoubleDoublePair data = iter.getValue();
        collHist.add(new double[] { iter.getCenter(), data.first, data.second });
    }
    return new HistogramResult("Outlier Score Histogram", "outlier-histogram", collHist);
}
Also used : ObjHistogram(de.lmu.ifi.dbs.elki.utilities.datastructures.histogram.ObjHistogram) HistogramResult(de.lmu.ifi.dbs.elki.result.HistogramResult) OutlierScalingFunction(de.lmu.ifi.dbs.elki.utilities.scaling.outlier.OutlierScalingFunction) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) ArrayList(java.util.ArrayList) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) DoubleDoublePair(de.lmu.ifi.dbs.elki.utilities.pairs.DoubleDoublePair) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)

Example 44 with ModifiableDBIDs

use of de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs in project elki by elki-project.

the class MkCoPTree method insertAll.

@Override
public void insertAll(List<MkCoPEntry> entries) {
    if (entries.isEmpty()) {
        return;
    }
    if (LOG.isDebugging()) {
        LOG.debugFine("insert " + entries + "\n");
    }
    if (!initialized) {
        initialize(entries.get(0));
    }
    ModifiableDBIDs ids = DBIDUtil.newArray(entries.size());
    // insert
    for (MkCoPEntry entry : entries) {
        ids.add(entry.getRoutingObjectID());
        // insert the object
        super.insert(entry, false);
    }
    // perform nearest neighbor queries
    Map<DBID, KNNList> knnLists = batchNN(getRoot(), ids, settings.kmax);
    // adjust the knn distances
    adjustApproximatedKNNDistances(getRootEntry(), knnLists);
    if (EXTRA_INTEGRITY_CHECKS) {
        getRoot().integrityCheck(this, getRootEntry());
    }
}
Also used : KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) DBID(de.lmu.ifi.dbs.elki.database.ids.DBID) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)

Example 45 with ModifiableDBIDs

use of de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs in project elki by elki-project.

the class CASH method initHeap.

/**
 * Initializes the heap with the root intervals.
 *
 * @param heap the heap to be initialized
 * @param relation the database storing the parameterization functions
 * @param dim the dimensionality of the database
 * @param ids the ids of the database
 */
private void initHeap(ObjectHeap<IntegerPriorityObject<CASHInterval>> heap, Relation<ParameterizationFunction> relation, int dim, DBIDs ids) {
    CASHIntervalSplit split = new CASHIntervalSplit(relation, minPts);
    // determine minimum and maximum function value of all functions
    double[] minMax = determineMinMaxDistance(relation, dim);
    double d_min = minMax[0], d_max = minMax[1];
    double dIntervalLength = d_max - d_min;
    int numDIntervals = (int) FastMath.ceil(dIntervalLength / jitter);
    double dIntervalSize = dIntervalLength / numDIntervals;
    double[] d_mins = new double[numDIntervals], d_maxs = new double[numDIntervals];
    if (LOG.isVerbose()) {
        LOG.verbose(// 
        new StringBuilder().append("d_min ").append(d_min).append("\nd_max ").append(// 
        d_max).append("\nnumDIntervals ").append(// 
        numDIntervals).append("\ndIntervalSize ").append(dIntervalSize).toString());
    }
    // alpha intervals
    double[] alphaMin = new double[dim - 1], alphaMax = new double[dim - 1];
    Arrays.fill(alphaMax, Math.PI);
    for (int i = 0; i < numDIntervals; i++) {
        d_mins[i] = (i == 0) ? d_min : d_maxs[i - 1];
        d_maxs[i] = (i < numDIntervals - 1) ? d_mins[i] + dIntervalSize : d_max - d_mins[i];
        HyperBoundingBox alphaInterval = new HyperBoundingBox(alphaMin, alphaMax);
        ModifiableDBIDs intervalIDs = split.determineIDs(ids, alphaInterval, d_mins[i], d_maxs[i]);
        if (intervalIDs != null && intervalIDs.size() >= minPts) {
            CASHInterval rootInterval = new CASHInterval(alphaMin, alphaMax, split, intervalIDs, -1, 0, d_mins[i], d_maxs[i]);
            heap.add(new IntegerPriorityObject<>(rootInterval.priority(), rootInterval));
        }
    }
    if (LOG.isDebuggingFiner()) {
        LOG.debugFiner(new StringBuilder().append("heap.size: ").append(heap.size()).toString());
    }
}
Also used : CASHInterval(de.lmu.ifi.dbs.elki.algorithm.clustering.correlation.cash.CASHInterval) CASHIntervalSplit(de.lmu.ifi.dbs.elki.algorithm.clustering.correlation.cash.CASHIntervalSplit) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)

Aggregations

ModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)80 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)44 Clustering (de.lmu.ifi.dbs.elki.data.Clustering)30 ArrayList (java.util.ArrayList)30 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)28 ArrayModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs)18 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)15 WritableIntegerDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore)14 IndefiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress)14 DoubleStatistic (de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic)12 LongStatistic (de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic)12 Model (de.lmu.ifi.dbs.elki.data.model.Model)11 DBID (de.lmu.ifi.dbs.elki.database.ids.DBID)11 KMeansModel (de.lmu.ifi.dbs.elki.data.model.KMeansModel)10 StringStatistic (de.lmu.ifi.dbs.elki.logging.statistics.StringStatistic)10 Cluster (de.lmu.ifi.dbs.elki.data.Cluster)9 WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)9 HashSetModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs)8 KNNList (de.lmu.ifi.dbs.elki.database.ids.KNNList)8 ClusterModel (de.lmu.ifi.dbs.elki.data.model.ClusterModel)7