Search in sources :

Example 1 with BitVector

use of de.lmu.ifi.dbs.elki.data.BitVector in project elki by elki-project.

the class BitVectorLabelParser method parseLineInternal.

@Override
protected boolean parseLineInternal() {
    int curdim = 0;
    for (; tokenizer.valid(); tokenizer.advance()) {
        try {
            final int word = curdim >>> 6;
            final int off = curdim & 0x3F;
            if (word >= buf.size()) {
                // Ensure size.
                buf.add(0L);
            }
            if (tokenizer.getIntBase10() > 0) {
                buf.set(word, buf.getLong(word) | (1L << off));
            }
            ++curdim;
        } catch (NumberFormatException e) {
            labels.add(tokenizer.getSubstring());
        }
    }
    if (curdim == 0) {
        // Maybe a label row
        return false;
    }
    curvec = new BitVector(buf.toLongArray(), curdim);
    curlbl = LabelList.make(labels);
    buf.clear();
    labels.clear();
    return true;
}
Also used : BitVector(de.lmu.ifi.dbs.elki.data.BitVector)

Example 2 with BitVector

use of de.lmu.ifi.dbs.elki.data.BitVector in project elki by elki-project.

the class APRIORI method frequentItemsetsSparse.

/**
 * Returns the frequent BitSets out of the given BitSets with respect to the
 * given database. Optimized implementation for SparseItemset.
 *
 * @param candidates the candidates to be evaluated
 * @param relation the database to evaluate the candidates on
 * @param needed Minimum support needed
 * @param ids Objects to process
 * @param survivors Output: objects that had at least two 1-frequent items.
 * @param length Itemset length
 * @return Itemsets with sufficient support
 */
protected List<SparseItemset> frequentItemsetsSparse(List<SparseItemset> candidates, Relation<BitVector> relation, int needed, DBIDs ids, ArrayModifiableDBIDs survivors, int length) {
    // Current search interval:
    int begin = 0, end = candidates.size();
    int[] scratchi = new int[length], iters = new int[length];
    SparseItemset scratch = new SparseItemset(scratchi);
    for (DBIDIter iditer = ids.iter(); iditer.valid(); iditer.advance()) {
        BitVector bv = relation.get(iditer);
        if (!initializeSearchItemset(bv, scratchi, iters)) {
            continue;
        }
        int lives = 0;
        while (begin < end) {
            begin = binarySearch(candidates, scratch, begin, end);
            if (begin > 0) {
                candidates.get(begin).increaseSupport();
                ++lives;
            } else {
                begin = (-begin) - 1;
            }
            if (begin >= end || !nextSearchItemset(bv, scratchi, iters)) {
                break;
            }
        }
        for (Itemset candidate : candidates) {
            if (candidate.containedIn(bv)) {
                candidate.increaseSupport();
                ++lives;
            }
        }
        if (lives > length) {
            survivors.add(iditer);
        }
    }
    // Retain only those with minimum support:
    List<SparseItemset> frequent = new ArrayList<>(candidates.size());
    for (Iterator<SparseItemset> iter = candidates.iterator(); iter.hasNext(); ) {
        final SparseItemset candidate = iter.next();
        if (candidate.getSupport() >= needed) {
            frequent.add(candidate);
        }
    }
    return frequent;
}
Also used : BitVector(de.lmu.ifi.dbs.elki.data.BitVector) ArrayList(java.util.ArrayList) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 3 with BitVector

use of de.lmu.ifi.dbs.elki.data.BitVector in project elki by elki-project.

the class DiSHPreferenceVectorIndex method determinePreferenceVectorByApriori.

/**
 * Determines the preference vector with the apriori strategy.
 *
 * @param relation the database storing the objects
 * @param neighborIDs the list of ids of the neighbors in each dimension
 * @param msg a string buffer for debug messages
 * @return the preference vector
 */
private long[] determinePreferenceVectorByApriori(Relation<V> relation, ModifiableDBIDs[] neighborIDs, StringBuilder msg) {
    int dimensionality = neighborIDs.length;
    // database for apriori
    UpdatableDatabase apriori_db = new HashmapDatabase();
    SimpleTypeInformation<?> bitmeta = VectorFieldTypeInformation.typeRequest(BitVector.class, dimensionality, dimensionality);
    for (DBIDIter it = relation.iterDBIDs(); it.valid(); it.advance()) {
        long[] bits = BitsUtil.zero(dimensionality);
        boolean allFalse = true;
        for (int d = 0; d < dimensionality; d++) {
            if (neighborIDs[d].contains(it)) {
                BitsUtil.setI(bits, d);
                allFalse = false;
            }
        }
        if (!allFalse) {
            SingleObjectBundle oaa = new SingleObjectBundle();
            oaa.append(bitmeta, new BitVector(bits, dimensionality));
            apriori_db.insert(oaa);
        }
    }
    APRIORI apriori = new APRIORI(minpts);
    FrequentItemsetsResult aprioriResult = apriori.run(apriori_db);
    // result of apriori
    List<Itemset> frequentItemsets = aprioriResult.getItemsets();
    if (msg != null) {
        msg.append("\n Frequent itemsets: ").append(frequentItemsets);
    }
    int maxSupport = 0;
    int maxCardinality = 0;
    long[] preferenceVector = BitsUtil.zero(dimensionality);
    for (Itemset itemset : frequentItemsets) {
        if ((maxCardinality < itemset.length()) || (maxCardinality == itemset.length() && maxSupport == itemset.getSupport())) {
            preferenceVector = Itemset.toBitset(itemset, BitsUtil.zero(dimensionality));
            maxCardinality = itemset.length();
            maxSupport = itemset.getSupport();
        }
    }
    if (msg != null) {
        // 
        msg.append("\n preference ").append(// 
        BitsUtil.toStringLow(preferenceVector, dimensionality)).append('\n');
        LOG.debugFine(msg.toString());
    }
    return preferenceVector;
}
Also used : UpdatableDatabase(de.lmu.ifi.dbs.elki.database.UpdatableDatabase) BitVector(de.lmu.ifi.dbs.elki.data.BitVector) HashmapDatabase(de.lmu.ifi.dbs.elki.database.HashmapDatabase) SingleObjectBundle(de.lmu.ifi.dbs.elki.datasource.bundle.SingleObjectBundle) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) FrequentItemsetsResult(de.lmu.ifi.dbs.elki.result.FrequentItemsetsResult) Itemset(de.lmu.ifi.dbs.elki.algorithm.itemsetmining.Itemset) APRIORI(de.lmu.ifi.dbs.elki.algorithm.itemsetmining.APRIORI)

Example 4 with BitVector

use of de.lmu.ifi.dbs.elki.data.BitVector in project elki by elki-project.

the class APRIORI method frequentItemsets.

/**
 * Returns the frequent BitSets out of the given BitSets with respect to the
 * given database.
 *
 * @param candidates the candidates to be evaluated
 * @param relation the database to evaluate the candidates on
 * @param needed Minimum support needed
 * @param ids Objects to process
 * @param survivors Output: objects that had at least two 1-frequent items.
 * @param length Itemset length
 * @return Itemsets with sufficient support
 */
protected List<? extends Itemset> frequentItemsets(List<? extends Itemset> candidates, Relation<BitVector> relation, int needed, DBIDs ids, ArrayModifiableDBIDs survivors, int length) {
    if (candidates.isEmpty()) {
        return Collections.emptyList();
    }
    Itemset first = candidates.get(0);
    // number of frequent itemsets is about to break down to 0.
    if (candidates.size() > length * length * length * 100 && first instanceof SparseItemset) {
        // Assume that all itemsets are sparse itemsets!
        @SuppressWarnings("unchecked") List<SparseItemset> sparsecand = (List<SparseItemset>) candidates;
        return frequentItemsetsSparse(sparsecand, relation, needed, ids, survivors, length);
    }
    for (DBIDIter iditer = ids.iter(); iditer.valid(); iditer.advance()) {
        BitVector bv = relation.get(iditer);
        // TODO: exploit that the candidate set it sorted?
        int lives = 0;
        for (Itemset candidate : candidates) {
            if (candidate.containedIn(bv)) {
                candidate.increaseSupport();
                ++lives;
            }
        }
        if (lives > length) {
            survivors.add(iditer);
        }
    }
    // Retain only those with minimum support:
    List<Itemset> frequent = new ArrayList<>(candidates.size());
    for (Iterator<? extends Itemset> iter = candidates.iterator(); iter.hasNext(); ) {
        final Itemset candidate = iter.next();
        if (candidate.getSupport() >= needed) {
            frequent.add(candidate);
        }
    }
    return frequent;
}
Also used : BitVector(de.lmu.ifi.dbs.elki.data.BitVector) ArrayList(java.util.ArrayList) ArrayList(java.util.ArrayList) List(java.util.List) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 5 with BitVector

use of de.lmu.ifi.dbs.elki.data.BitVector in project elki by elki-project.

the class APRIORI method run.

/**
 * Performs the APRIORI algorithm on the given database.
 *
 * @param relation the Relation to process
 * @return the AprioriResult learned by this APRIORI
 */
public FrequentItemsetsResult run(Relation<BitVector> relation) {
    DBIDs ids = relation.getDBIDs();
    List<Itemset> solution = new ArrayList<>();
    final int size = ids.size();
    final int needed = getMinimumSupport(size);
    // TODO: we don't strictly require a vector field.
    // We could work with knowing just the maximum dimensionality beforehand.
    VectorFieldTypeInformation<BitVector> meta = RelationUtil.assumeVectorField(relation);
    if (size > 0) {
        final int dim = meta.getDimensionality();
        Duration timeone = LOG.newDuration(STAT + "1-items.time").begin();
        List<OneItemset> oneitems = buildFrequentOneItemsets(relation, dim, needed);
        LOG.statistics(timeone.end());
        if (LOG.isStatistics()) {
            LOG.statistics(new LongStatistic(STAT + "1-items.frequent", oneitems.size()));
            LOG.statistics(new LongStatistic(STAT + "1-items.transactions", ids.size()));
        }
        if (LOG.isDebuggingFine()) {
            LOG.debugFine(debugDumpCandidates(new StringBuilder(), oneitems, meta));
        }
        if (minlength <= 1) {
            solution.addAll(oneitems);
        }
        if (oneitems.size() >= 2 && maxlength >= 2) {
            Duration timetwo = LOG.newDuration(STAT + "2-items.time").begin();
            ArrayModifiableDBIDs survivors = DBIDUtil.newArray(ids.size());
            List<? extends Itemset> candidates = buildFrequentTwoItemsets(oneitems, relation, dim, needed, ids, survivors);
            // Continue with reduced set of transactions.
            ids = survivors;
            LOG.statistics(timetwo.end());
            if (LOG.isStatistics()) {
                LOG.statistics(new LongStatistic(STAT + "2-items.frequent", candidates.size()));
                LOG.statistics(new LongStatistic(STAT + "2-items.transactions", ids.size()));
            }
            if (LOG.isDebuggingFine()) {
                LOG.debugFine(debugDumpCandidates(new StringBuilder(), candidates, meta));
            }
            if (minlength <= 2) {
                solution.addAll(candidates);
            }
            for (int length = 3; length <= maxlength && candidates.size() >= length; length++) {
                Duration timel = LOG.newDuration(STAT + length + "-items.time").begin();
                // Join to get the new candidates
                candidates = aprioriGenerate(candidates, length, dim);
                if (LOG.isDebuggingFinest()) {
                    LOG.debugFinest(debugDumpCandidates(new StringBuilder().append("Before pruning: "), candidates, meta));
                }
                survivors = DBIDUtil.newArray(ids.size());
                candidates = frequentItemsets(candidates, relation, needed, ids, survivors, length);
                // Continue with reduced set of transactions.
                ids = survivors;
                LOG.statistics(timel.end());
                if (LOG.isStatistics()) {
                    LOG.statistics(new LongStatistic(STAT + length + "-items.frequent", candidates.size()));
                    LOG.statistics(new LongStatistic(STAT + length + "-items.transactions", ids.size()));
                }
                if (LOG.isDebuggingFine()) {
                    LOG.debugFine(debugDumpCandidates(new StringBuilder(), candidates, meta));
                }
                solution.addAll(candidates);
            }
        }
    }
    return new FrequentItemsetsResult("APRIORI", "apriori", solution, meta, size);
}
Also used : BitVector(de.lmu.ifi.dbs.elki.data.BitVector) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) ArrayList(java.util.ArrayList) Duration(de.lmu.ifi.dbs.elki.logging.statistics.Duration) FrequentItemsetsResult(de.lmu.ifi.dbs.elki.result.FrequentItemsetsResult) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) LongStatistic(de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic)

Aggregations

BitVector (de.lmu.ifi.dbs.elki.data.BitVector)8 ArrayList (java.util.ArrayList)5 FrequentItemsetsResult (de.lmu.ifi.dbs.elki.result.FrequentItemsetsResult)4 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)3 Duration (de.lmu.ifi.dbs.elki.logging.statistics.Duration)3 LongStatistic (de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic)3 ArrayModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs)2 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)2 APRIORI (de.lmu.ifi.dbs.elki.algorithm.itemsetmining.APRIORI)1 Itemset (de.lmu.ifi.dbs.elki.algorithm.itemsetmining.Itemset)1 HashmapDatabase (de.lmu.ifi.dbs.elki.database.HashmapDatabase)1 UpdatableDatabase (de.lmu.ifi.dbs.elki.database.UpdatableDatabase)1 HashSetDBIDs (de.lmu.ifi.dbs.elki.database.ids.HashSetDBIDs)1 BundleMeta (de.lmu.ifi.dbs.elki.datasource.bundle.BundleMeta)1 SingleObjectBundle (de.lmu.ifi.dbs.elki.datasource.bundle.SingleObjectBundle)1 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)1 IndefiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress)1 DoubleStatistic (de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic)1 Object2IntMap (it.unimi.dsi.fastutil.objects.Object2IntMap)1 IOException (java.io.IOException)1