Search in sources :

Example 46 with ArrayModifiableDBIDs

use of de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs in project elki by elki-project.

the class APRIORI method run.

/**
 * Performs the APRIORI algorithm on the given database.
 *
 * @param relation the Relation to process
 * @return the AprioriResult learned by this APRIORI
 */
public FrequentItemsetsResult run(Relation<BitVector> relation) {
    DBIDs ids = relation.getDBIDs();
    List<Itemset> solution = new ArrayList<>();
    final int size = ids.size();
    final int needed = getMinimumSupport(size);
    // TODO: we don't strictly require a vector field.
    // We could work with knowing just the maximum dimensionality beforehand.
    VectorFieldTypeInformation<BitVector> meta = RelationUtil.assumeVectorField(relation);
    if (size > 0) {
        final int dim = meta.getDimensionality();
        Duration timeone = LOG.newDuration(STAT + "1-items.time").begin();
        List<OneItemset> oneitems = buildFrequentOneItemsets(relation, dim, needed);
        LOG.statistics(timeone.end());
        if (LOG.isStatistics()) {
            LOG.statistics(new LongStatistic(STAT + "1-items.frequent", oneitems.size()));
            LOG.statistics(new LongStatistic(STAT + "1-items.transactions", ids.size()));
        }
        if (LOG.isDebuggingFine()) {
            LOG.debugFine(debugDumpCandidates(new StringBuilder(), oneitems, meta));
        }
        if (minlength <= 1) {
            solution.addAll(oneitems);
        }
        if (oneitems.size() >= 2 && maxlength >= 2) {
            Duration timetwo = LOG.newDuration(STAT + "2-items.time").begin();
            ArrayModifiableDBIDs survivors = DBIDUtil.newArray(ids.size());
            List<? extends Itemset> candidates = buildFrequentTwoItemsets(oneitems, relation, dim, needed, ids, survivors);
            // Continue with reduced set of transactions.
            ids = survivors;
            LOG.statistics(timetwo.end());
            if (LOG.isStatistics()) {
                LOG.statistics(new LongStatistic(STAT + "2-items.frequent", candidates.size()));
                LOG.statistics(new LongStatistic(STAT + "2-items.transactions", ids.size()));
            }
            if (LOG.isDebuggingFine()) {
                LOG.debugFine(debugDumpCandidates(new StringBuilder(), candidates, meta));
            }
            if (minlength <= 2) {
                solution.addAll(candidates);
            }
            for (int length = 3; length <= maxlength && candidates.size() >= length; length++) {
                Duration timel = LOG.newDuration(STAT + length + "-items.time").begin();
                // Join to get the new candidates
                candidates = aprioriGenerate(candidates, length, dim);
                if (LOG.isDebuggingFinest()) {
                    LOG.debugFinest(debugDumpCandidates(new StringBuilder().append("Before pruning: "), candidates, meta));
                }
                survivors = DBIDUtil.newArray(ids.size());
                candidates = frequentItemsets(candidates, relation, needed, ids, survivors, length);
                // Continue with reduced set of transactions.
                ids = survivors;
                LOG.statistics(timel.end());
                if (LOG.isStatistics()) {
                    LOG.statistics(new LongStatistic(STAT + length + "-items.frequent", candidates.size()));
                    LOG.statistics(new LongStatistic(STAT + length + "-items.transactions", ids.size()));
                }
                if (LOG.isDebuggingFine()) {
                    LOG.debugFine(debugDumpCandidates(new StringBuilder(), candidates, meta));
                }
                solution.addAll(candidates);
            }
        }
    }
    return new FrequentItemsetsResult("APRIORI", "apriori", solution, meta, size);
}
Also used : BitVector(de.lmu.ifi.dbs.elki.data.BitVector) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) ArrayList(java.util.ArrayList) Duration(de.lmu.ifi.dbs.elki.logging.statistics.Duration) FrequentItemsetsResult(de.lmu.ifi.dbs.elki.result.FrequentItemsetsResult) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) LongStatistic(de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic)

Example 47 with ArrayModifiableDBIDs

use of de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs in project elki by elki-project.

the class MultipleObjectsBundle method fromStream.

/**
 * Convert an object stream to a bundle
 *
 * @param source Object stream
 * @return Static bundle
 */
public static MultipleObjectsBundle fromStream(BundleStreamSource source) {
    MultipleObjectsBundle bundle = new MultipleObjectsBundle();
    boolean stop = false;
    DBIDVar var = null;
    ArrayModifiableDBIDs ids = null;
    int size = 0;
    while (!stop) {
        BundleStreamSource.Event ev = source.nextEvent();
        switch(ev) {
            case END_OF_STREAM:
                stop = true;
                break;
            case META_CHANGED:
                BundleMeta smeta = source.getMeta();
                // rebuild bundle meta
                bundle.meta = new BundleMeta();
                for (int i = 0; i < bundle.columns.size(); i++) {
                    bundle.meta.add(smeta.get(i));
                }
                for (int i = bundle.metaLength(); i < smeta.size(); i++) {
                    List<Object> data = new ArrayList<>(bundle.dataLength() + 1);
                    bundle.appendColumn(smeta.get(i), data);
                }
                if (var == null && source.hasDBIDs()) {
                    var = DBIDUtil.newVar();
                    ids = DBIDUtil.newArray();
                }
                continue;
            case NEXT_OBJECT:
                if (var != null && source.assignDBID(var)) {
                    ids.add(var);
                }
                for (int i = 0; i < bundle.metaLength(); i++) {
                    @SuppressWarnings("unchecked") final List<Object> col = (List<Object>) bundle.columns.get(i);
                    col.add(source.data(i));
                }
                ++size;
                continue;
            default:
                LoggingUtil.warning("Unknown event: " + ev);
                continue;
        }
    }
    if (ids != null) {
        if (size != ids.size()) {
            LOG.warning("Not every object had an DBID - discarding DBIDs: " + size + " != " + ids.size());
        } else {
            bundle.setDBIDs(ids);
        }
    }
    return bundle;
}
Also used : DBIDVar(de.lmu.ifi.dbs.elki.database.ids.DBIDVar) ArrayList(java.util.ArrayList) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) List(java.util.List) ArrayList(java.util.ArrayList)

Example 48 with ArrayModifiableDBIDs

use of de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs in project elki by elki-project.

the class MaterializeKNNAndRKNNPreprocessor method updateKNNsAndRkNNs.

/**
 * Updates the kNNs and RkNNs after insertion of the specified ids.
 *
 * @param ids the ids of newly inserted objects causing a change of
 *        materialized kNNs and RkNNs
 * @return the RkNNs of the specified ids, i.e. the kNNs which have been
 *         updated
 */
private ArrayDBIDs updateKNNsAndRkNNs(DBIDs ids) {
    ArrayModifiableDBIDs rkNN_ids = DBIDUtil.newArray();
    DBIDs oldids = DBIDUtil.difference(relation.getDBIDs(), ids);
    for (DBIDIter id = oldids.iter(); id.valid(); id.advance()) {
        KNNList oldkNNs = storage.get(id);
        double knnDist = oldkNNs.getKNNDistance();
        // look for new kNNs
        KNNHeap heap = null;
        for (DBIDIter newid = ids.iter(); newid.valid(); newid.advance()) {
            double dist = distanceQuery.distance(id, newid);
            if (dist <= knnDist) {
                // New id changes the kNNs of oldid.
                if (heap == null) {
                    heap = DBIDUtil.newHeap(oldkNNs);
                }
                heap.insert(dist, newid);
            }
        }
        // kNNs for oldid have changed:
        if (heap != null) {
            KNNList newkNNs = heap.toKNNList();
            storage.put(id, newkNNs);
            // get the difference
            int i = 0;
            int j = 0;
            ModifiableDoubleDBIDList added = DBIDUtil.newDistanceDBIDList();
            ModifiableDoubleDBIDList removed = DBIDUtil.newDistanceDBIDList();
            // TODO: use iterators.
            while (i < oldkNNs.size() && j < newkNNs.size()) {
                DoubleDBIDPair drp1 = oldkNNs.get(i);
                DoubleDBIDPair drp2 = newkNNs.get(j);
                // NOTE: we assume that on ties they are ordered the same way!
                if (!DBIDUtil.equal(drp1, drp2)) {
                    added.add(drp2);
                    j++;
                } else {
                    i++;
                    j++;
                }
            }
            if (i != j) {
                for (; i < oldkNNs.size(); i++) {
                    removed.add(oldkNNs.get(i));
                }
                for (; j < newkNNs.size(); i++) {
                    added.add(newkNNs.get(i));
                }
            }
            // add new RkNN
            for (DoubleDBIDListIter newnn = added.iter(); newnn.valid(); newnn.advance()) {
                TreeSet<DoubleDBIDPair> rknns = materialized_RkNN.get(newnn);
                rknns.add(makePair(newnn, id));
            }
            // remove old RkNN
            for (DoubleDBIDListIter oldnn = removed.iter(); oldnn.valid(); oldnn.advance()) {
                TreeSet<DoubleDBIDPair> rknns = materialized_RkNN.get(oldnn);
                rknns.remove(makePair(oldnn, id));
            }
            rkNN_ids.add(id);
        }
    }
    return rkNN_ids;
}
Also used : DoubleDBIDListIter(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) ModifiableDoubleDBIDList(de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList) DoubleDBIDPair(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDPair) ArrayDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) HashSetModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs) SetDBIDs(de.lmu.ifi.dbs.elki.database.ids.SetDBIDs) KNNHeap(de.lmu.ifi.dbs.elki.database.ids.KNNHeap) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 49 with ArrayModifiableDBIDs

use of de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs in project elki by elki-project.

the class MaterializeKNNPreprocessor method updateKNNsAfterInsertion.

/**
 * Updates the kNNs of the RkNNs of the specified ids.
 *
 * @param ids the ids of newly inserted objects causing a change of
 *        materialized kNNs
 * @return the RkNNs of the specified ids, i.e. the kNNs which have been
 *         updated
 */
private ArrayDBIDs updateKNNsAfterInsertion(DBIDs ids) {
    ArrayModifiableDBIDs rkNN_ids = DBIDUtil.newArray();
    DBIDs oldids = DBIDUtil.difference(relation.getDBIDs(), ids);
    for (DBIDIter iter = oldids.iter(); iter.valid(); iter.advance()) {
        KNNList kNNs = storage.get(iter);
        double knnDist = kNNs.getKNNDistance();
        // look for new kNNs
        KNNHeap heap = null;
        for (DBIDIter iter2 = ids.iter(); iter2.valid(); iter2.advance()) {
            double dist = distanceQuery.distance(iter, iter2);
            if (dist <= knnDist) {
                if (heap == null) {
                    heap = DBIDUtil.newHeap(kNNs);
                }
                heap.insert(dist, iter2);
            }
        }
        if (heap != null) {
            kNNs = heap.toKNNList();
            storage.put(iter, kNNs);
            rkNN_ids.add(iter);
        }
    }
    return rkNN_ids;
}
Also used : ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) ArrayDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) SetDBIDs(de.lmu.ifi.dbs.elki.database.ids.SetDBIDs) KNNHeap(de.lmu.ifi.dbs.elki.database.ids.KNNHeap) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Aggregations

ArrayModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs)49 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)23 DBIDArrayIter (de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter)12 ArrayList (java.util.ArrayList)11 DBIDVar (de.lmu.ifi.dbs.elki.database.ids.DBIDVar)10 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)9 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)8 ModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)7 WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)6 ArrayDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs)6 Clustering (de.lmu.ifi.dbs.elki.data.Clustering)5 DBIDRef (de.lmu.ifi.dbs.elki.database.ids.DBIDRef)5 Relation (de.lmu.ifi.dbs.elki.database.relation.Relation)5 NumberVector (de.lmu.ifi.dbs.elki.data.NumberVector)4 SortDBIDsBySingleDimension (de.lmu.ifi.dbs.elki.data.VectorUtil.SortDBIDsBySingleDimension)4 DoubleDBIDListIter (de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter)4 KNNList (de.lmu.ifi.dbs.elki.database.ids.KNNList)4 ModifiableDoubleDBIDList (de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList)4 SetDBIDs (de.lmu.ifi.dbs.elki.database.ids.SetDBIDs)4 Pair (de.lmu.ifi.dbs.elki.utilities.pairs.Pair)4