use of de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs in project elki by elki-project.
the class APRIORI method run.
/**
* Performs the APRIORI algorithm on the given database.
*
* @param relation the Relation to process
* @return the AprioriResult learned by this APRIORI
*/
public FrequentItemsetsResult run(Relation<BitVector> relation) {
DBIDs ids = relation.getDBIDs();
List<Itemset> solution = new ArrayList<>();
final int size = ids.size();
final int needed = getMinimumSupport(size);
// TODO: we don't strictly require a vector field.
// We could work with knowing just the maximum dimensionality beforehand.
VectorFieldTypeInformation<BitVector> meta = RelationUtil.assumeVectorField(relation);
if (size > 0) {
final int dim = meta.getDimensionality();
Duration timeone = LOG.newDuration(STAT + "1-items.time").begin();
List<OneItemset> oneitems = buildFrequentOneItemsets(relation, dim, needed);
LOG.statistics(timeone.end());
if (LOG.isStatistics()) {
LOG.statistics(new LongStatistic(STAT + "1-items.frequent", oneitems.size()));
LOG.statistics(new LongStatistic(STAT + "1-items.transactions", ids.size()));
}
if (LOG.isDebuggingFine()) {
LOG.debugFine(debugDumpCandidates(new StringBuilder(), oneitems, meta));
}
if (minlength <= 1) {
solution.addAll(oneitems);
}
if (oneitems.size() >= 2 && maxlength >= 2) {
Duration timetwo = LOG.newDuration(STAT + "2-items.time").begin();
ArrayModifiableDBIDs survivors = DBIDUtil.newArray(ids.size());
List<? extends Itemset> candidates = buildFrequentTwoItemsets(oneitems, relation, dim, needed, ids, survivors);
// Continue with reduced set of transactions.
ids = survivors;
LOG.statistics(timetwo.end());
if (LOG.isStatistics()) {
LOG.statistics(new LongStatistic(STAT + "2-items.frequent", candidates.size()));
LOG.statistics(new LongStatistic(STAT + "2-items.transactions", ids.size()));
}
if (LOG.isDebuggingFine()) {
LOG.debugFine(debugDumpCandidates(new StringBuilder(), candidates, meta));
}
if (minlength <= 2) {
solution.addAll(candidates);
}
for (int length = 3; length <= maxlength && candidates.size() >= length; length++) {
Duration timel = LOG.newDuration(STAT + length + "-items.time").begin();
// Join to get the new candidates
candidates = aprioriGenerate(candidates, length, dim);
if (LOG.isDebuggingFinest()) {
LOG.debugFinest(debugDumpCandidates(new StringBuilder().append("Before pruning: "), candidates, meta));
}
survivors = DBIDUtil.newArray(ids.size());
candidates = frequentItemsets(candidates, relation, needed, ids, survivors, length);
// Continue with reduced set of transactions.
ids = survivors;
LOG.statistics(timel.end());
if (LOG.isStatistics()) {
LOG.statistics(new LongStatistic(STAT + length + "-items.frequent", candidates.size()));
LOG.statistics(new LongStatistic(STAT + length + "-items.transactions", ids.size()));
}
if (LOG.isDebuggingFine()) {
LOG.debugFine(debugDumpCandidates(new StringBuilder(), candidates, meta));
}
solution.addAll(candidates);
}
}
}
return new FrequentItemsetsResult("APRIORI", "apriori", solution, meta, size);
}
use of de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs in project elki by elki-project.
the class MultipleObjectsBundle method fromStream.
/**
* Convert an object stream to a bundle
*
* @param source Object stream
* @return Static bundle
*/
public static MultipleObjectsBundle fromStream(BundleStreamSource source) {
MultipleObjectsBundle bundle = new MultipleObjectsBundle();
boolean stop = false;
DBIDVar var = null;
ArrayModifiableDBIDs ids = null;
int size = 0;
while (!stop) {
BundleStreamSource.Event ev = source.nextEvent();
switch(ev) {
case END_OF_STREAM:
stop = true;
break;
case META_CHANGED:
BundleMeta smeta = source.getMeta();
// rebuild bundle meta
bundle.meta = new BundleMeta();
for (int i = 0; i < bundle.columns.size(); i++) {
bundle.meta.add(smeta.get(i));
}
for (int i = bundle.metaLength(); i < smeta.size(); i++) {
List<Object> data = new ArrayList<>(bundle.dataLength() + 1);
bundle.appendColumn(smeta.get(i), data);
}
if (var == null && source.hasDBIDs()) {
var = DBIDUtil.newVar();
ids = DBIDUtil.newArray();
}
continue;
case NEXT_OBJECT:
if (var != null && source.assignDBID(var)) {
ids.add(var);
}
for (int i = 0; i < bundle.metaLength(); i++) {
@SuppressWarnings("unchecked") final List<Object> col = (List<Object>) bundle.columns.get(i);
col.add(source.data(i));
}
++size;
continue;
default:
LoggingUtil.warning("Unknown event: " + ev);
continue;
}
}
if (ids != null) {
if (size != ids.size()) {
LOG.warning("Not every object had an DBID - discarding DBIDs: " + size + " != " + ids.size());
} else {
bundle.setDBIDs(ids);
}
}
return bundle;
}
use of de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs in project elki by elki-project.
the class MaterializeKNNAndRKNNPreprocessor method updateKNNsAndRkNNs.
/**
* Updates the kNNs and RkNNs after insertion of the specified ids.
*
* @param ids the ids of newly inserted objects causing a change of
* materialized kNNs and RkNNs
* @return the RkNNs of the specified ids, i.e. the kNNs which have been
* updated
*/
private ArrayDBIDs updateKNNsAndRkNNs(DBIDs ids) {
ArrayModifiableDBIDs rkNN_ids = DBIDUtil.newArray();
DBIDs oldids = DBIDUtil.difference(relation.getDBIDs(), ids);
for (DBIDIter id = oldids.iter(); id.valid(); id.advance()) {
KNNList oldkNNs = storage.get(id);
double knnDist = oldkNNs.getKNNDistance();
// look for new kNNs
KNNHeap heap = null;
for (DBIDIter newid = ids.iter(); newid.valid(); newid.advance()) {
double dist = distanceQuery.distance(id, newid);
if (dist <= knnDist) {
// New id changes the kNNs of oldid.
if (heap == null) {
heap = DBIDUtil.newHeap(oldkNNs);
}
heap.insert(dist, newid);
}
}
// kNNs for oldid have changed:
if (heap != null) {
KNNList newkNNs = heap.toKNNList();
storage.put(id, newkNNs);
// get the difference
int i = 0;
int j = 0;
ModifiableDoubleDBIDList added = DBIDUtil.newDistanceDBIDList();
ModifiableDoubleDBIDList removed = DBIDUtil.newDistanceDBIDList();
// TODO: use iterators.
while (i < oldkNNs.size() && j < newkNNs.size()) {
DoubleDBIDPair drp1 = oldkNNs.get(i);
DoubleDBIDPair drp2 = newkNNs.get(j);
// NOTE: we assume that on ties they are ordered the same way!
if (!DBIDUtil.equal(drp1, drp2)) {
added.add(drp2);
j++;
} else {
i++;
j++;
}
}
if (i != j) {
for (; i < oldkNNs.size(); i++) {
removed.add(oldkNNs.get(i));
}
for (; j < newkNNs.size(); i++) {
added.add(newkNNs.get(i));
}
}
// add new RkNN
for (DoubleDBIDListIter newnn = added.iter(); newnn.valid(); newnn.advance()) {
TreeSet<DoubleDBIDPair> rknns = materialized_RkNN.get(newnn);
rknns.add(makePair(newnn, id));
}
// remove old RkNN
for (DoubleDBIDListIter oldnn = removed.iter(); oldnn.valid(); oldnn.advance()) {
TreeSet<DoubleDBIDPair> rknns = materialized_RkNN.get(oldnn);
rknns.remove(makePair(oldnn, id));
}
rkNN_ids.add(id);
}
}
return rkNN_ids;
}
use of de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs in project elki by elki-project.
the class MaterializeKNNPreprocessor method updateKNNsAfterInsertion.
/**
* Updates the kNNs of the RkNNs of the specified ids.
*
* @param ids the ids of newly inserted objects causing a change of
* materialized kNNs
* @return the RkNNs of the specified ids, i.e. the kNNs which have been
* updated
*/
private ArrayDBIDs updateKNNsAfterInsertion(DBIDs ids) {
ArrayModifiableDBIDs rkNN_ids = DBIDUtil.newArray();
DBIDs oldids = DBIDUtil.difference(relation.getDBIDs(), ids);
for (DBIDIter iter = oldids.iter(); iter.valid(); iter.advance()) {
KNNList kNNs = storage.get(iter);
double knnDist = kNNs.getKNNDistance();
// look for new kNNs
KNNHeap heap = null;
for (DBIDIter iter2 = ids.iter(); iter2.valid(); iter2.advance()) {
double dist = distanceQuery.distance(iter, iter2);
if (dist <= knnDist) {
if (heap == null) {
heap = DBIDUtil.newHeap(kNNs);
}
heap.insert(dist, iter2);
}
}
if (heap != null) {
kNNs = heap.toKNNList();
storage.put(iter, kNNs);
rkNN_ids.add(iter);
}
}
return rkNN_ids;
}
Aggregations