use of de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs in project elki by elki-project.
the class ClusteringVectorParser method nextEvent.
@Override
public Event nextEvent() {
if (nextevent != null) {
Event ret = nextevent;
nextevent = null;
return ret;
}
try {
while (reader.nextLineExceptComments()) {
buf1.clear();
lbl.clear();
Int2IntOpenHashMap csize = new Int2IntOpenHashMap();
String name = null;
for (; /* initialized by nextLineExceptComments() */
tokenizer.valid(); tokenizer.advance()) {
try {
int cnum = tokenizer.getIntBase10();
buf1.add(cnum);
// Update cluster sizes:
csize.addTo(cnum, 1);
} catch (NumberFormatException e) {
final String label = tokenizer.getSubstring();
lbl.add(label);
if (name == null) {
name = label;
}
}
}
if (name == null) {
name = "Cluster";
}
// Update meta on first record:
boolean metaupdate = (range == null);
if (range == null) {
range = DBIDUtil.generateStaticDBIDRange(buf1.size());
}
if (buf1.size() != range.size()) {
throw new AbortException("Clusterings do not contain the same number of elements!");
}
// Build clustering to store in the relation.
Int2ObjectOpenHashMap<ModifiableDBIDs> clusters = new Int2ObjectOpenHashMap<>(csize.size());
curclu = new Clustering<>(name, name);
for (ObjectIterator<Int2IntMap.Entry> iter = csize.int2IntEntrySet().fastIterator(); iter.hasNext(); ) {
Int2IntMap.Entry entry = iter.next();
if (entry.getIntValue() > 0) {
clusters.put(entry.getIntKey(), DBIDUtil.newArray(entry.getIntValue()));
}
}
DBIDArrayIter iter = range.iter();
for (int i = 0; i < buf1.size(); i++) {
clusters.get(buf1.getInt(i)).add(iter.seek(i));
}
for (ModifiableDBIDs cids : clusters.values()) {
curclu.addToplevelCluster(new Cluster<Model>(cids, ClusterModel.CLUSTER));
}
// Label handling.
if (!haslbl && !lbl.isEmpty()) {
haslbl = true;
metaupdate = true;
}
curlbl = LabelList.make(lbl);
if (metaupdate) {
// Force a meta update.
nextevent = Event.NEXT_OBJECT;
return Event.META_CHANGED;
}
return Event.NEXT_OBJECT;
}
return Event.END_OF_STREAM;
} catch (IOException e) {
throw new IllegalArgumentException("Error while parsing line " + reader.getLineNumber() + ".");
}
}
use of de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs in project elki by elki-project.
the class LinearWeightedExtendedNeighborhood method getWeightedNeighbors.
@Override
public Collection<DoubleDBIDPair> getWeightedNeighbors(DBIDRef reference) {
ModifiableDBIDs seen = DBIDUtil.newHashSet();
List<DoubleDBIDPair> result = new ArrayList<>();
// Add starting object
result.add(DBIDUtil.newPair(computeWeight(0), reference));
seen.add(reference);
// Extend.
DBIDs cur = DBIDUtil.deref(reference);
for (int i = 1; i <= steps; i++) {
final double weight = computeWeight(i);
// Collect newly discovered IDs
ModifiableDBIDs add = DBIDUtil.newHashSet();
for (DBIDIter iter = cur.iter(); iter.valid(); iter.advance()) {
for (DBIDIter iter2 = inner.getNeighborDBIDs(iter).iter(); iter2.valid(); iter2.advance()) {
// Seen before?
if (seen.contains(iter2)) {
continue;
}
add.add(iter2);
result.add(DBIDUtil.newPair(weight, iter2));
}
}
if (add.size() == 0) {
break;
}
cur = add;
}
return result;
}
use of de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs in project elki by elki-project.
the class ComputeOutlierHistogram method evaluateOutlierResult.
/**
* Evaluate a single outlier result as histogram.
*
* @param database Database to process
* @param or Outlier result
* @return Result
*/
public HistogramResult evaluateOutlierResult(Database database, OutlierResult or) {
if (scaling instanceof OutlierScalingFunction) {
OutlierScalingFunction oscaling = (OutlierScalingFunction) scaling;
oscaling.prepare(or);
}
ModifiableDBIDs ids = DBIDUtil.newHashSet(or.getScores().getDBIDs());
DBIDs outlierIds = DatabaseUtil.getObjectsByLabelMatch(database, positiveClassName);
// first value for outliers, second for each object
// If we have useful (finite) min/max, use these for binning.
double min = scaling.getMin();
double max = scaling.getMax();
final ObjHistogram<DoubleDoublePair> hist;
if (Double.isInfinite(min) || Double.isNaN(min) || Double.isInfinite(max) || Double.isNaN(max)) {
hist = new AbstractObjDynamicHistogram<DoubleDoublePair>(bins) {
@Override
public DoubleDoublePair aggregate(DoubleDoublePair first, DoubleDoublePair second) {
first.first += second.first;
first.second += second.second;
return first;
}
@Override
protected DoubleDoublePair makeObject() {
return new DoubleDoublePair(0., 0.);
}
@Override
protected DoubleDoublePair cloneForCache(DoubleDoublePair data) {
return new DoubleDoublePair(data.first, data.second);
}
@Override
protected DoubleDoublePair downsample(Object[] data, int start, int end, int size) {
DoubleDoublePair sum = new DoubleDoublePair(0, 0);
for (int i = start; i < end; i++) {
DoubleDoublePair p = (DoubleDoublePair) data[i];
if (p != null) {
sum.first += p.first;
sum.second += p.second;
}
}
return sum;
}
};
} else {
hist = new AbstractObjStaticHistogram<DoubleDoublePair>(bins, min, max) {
@Override
protected DoubleDoublePair makeObject() {
return new DoubleDoublePair(0., 0.);
}
@Override
public void putData(double coord, DoubleDoublePair data) {
DoubleDoublePair exist = get(coord);
exist.first += data.first;
exist.second += data.second;
}
};
}
// first fill histogram only with values of outliers
DoubleDoublePair negative, positive;
if (!splitfreq) {
negative = new DoubleDoublePair(1. / ids.size(), 0);
positive = new DoubleDoublePair(0, 1. / ids.size());
} else {
negative = new DoubleDoublePair(1. / (ids.size() - outlierIds.size()), 0);
positive = new DoubleDoublePair(0, 1. / outlierIds.size());
}
ids.removeDBIDs(outlierIds);
// fill histogram with values of each object
for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
double result = or.getScores().doubleValue(iter);
result = scaling.getScaled(result);
if (result > Double.NEGATIVE_INFINITY && result < Double.POSITIVE_INFINITY) {
hist.putData(result, negative);
}
}
for (DBIDIter iter = outlierIds.iter(); iter.valid(); iter.advance()) {
double result = or.getScores().doubleValue(iter);
result = scaling.getScaled(result);
if (result > Double.NEGATIVE_INFINITY && result < Double.POSITIVE_INFINITY) {
hist.putData(result, positive);
}
}
Collection<double[]> collHist = new ArrayList<>(hist.getNumBins());
for (ObjHistogram.Iter<DoubleDoublePair> iter = hist.iter(); iter.valid(); iter.advance()) {
DoubleDoublePair data = iter.getValue();
collHist.add(new double[] { iter.getCenter(), data.first, data.second });
}
return new HistogramResult("Outlier Score Histogram", "outlier-histogram", collHist);
}
use of de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs in project elki by elki-project.
the class MkCoPTree method insertAll.
@Override
public void insertAll(List<MkCoPEntry> entries) {
if (entries.isEmpty()) {
return;
}
if (LOG.isDebugging()) {
LOG.debugFine("insert " + entries + "\n");
}
if (!initialized) {
initialize(entries.get(0));
}
ModifiableDBIDs ids = DBIDUtil.newArray(entries.size());
// insert
for (MkCoPEntry entry : entries) {
ids.add(entry.getRoutingObjectID());
// insert the object
super.insert(entry, false);
}
// perform nearest neighbor queries
Map<DBID, KNNList> knnLists = batchNN(getRoot(), ids, settings.kmax);
// adjust the knn distances
adjustApproximatedKNNDistances(getRootEntry(), knnLists);
if (EXTRA_INTEGRITY_CHECKS) {
getRoot().integrityCheck(this, getRootEntry());
}
}
use of de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs in project elki by elki-project.
the class CASH method initHeap.
/**
* Initializes the heap with the root intervals.
*
* @param heap the heap to be initialized
* @param relation the database storing the parameterization functions
* @param dim the dimensionality of the database
* @param ids the ids of the database
*/
private void initHeap(ObjectHeap<IntegerPriorityObject<CASHInterval>> heap, Relation<ParameterizationFunction> relation, int dim, DBIDs ids) {
CASHIntervalSplit split = new CASHIntervalSplit(relation, minPts);
// determine minimum and maximum function value of all functions
double[] minMax = determineMinMaxDistance(relation, dim);
double d_min = minMax[0], d_max = minMax[1];
double dIntervalLength = d_max - d_min;
int numDIntervals = (int) FastMath.ceil(dIntervalLength / jitter);
double dIntervalSize = dIntervalLength / numDIntervals;
double[] d_mins = new double[numDIntervals], d_maxs = new double[numDIntervals];
if (LOG.isVerbose()) {
LOG.verbose(//
new StringBuilder().append("d_min ").append(d_min).append("\nd_max ").append(//
d_max).append("\nnumDIntervals ").append(//
numDIntervals).append("\ndIntervalSize ").append(dIntervalSize).toString());
}
// alpha intervals
double[] alphaMin = new double[dim - 1], alphaMax = new double[dim - 1];
Arrays.fill(alphaMax, Math.PI);
for (int i = 0; i < numDIntervals; i++) {
d_mins[i] = (i == 0) ? d_min : d_maxs[i - 1];
d_maxs[i] = (i < numDIntervals - 1) ? d_mins[i] + dIntervalSize : d_max - d_mins[i];
HyperBoundingBox alphaInterval = new HyperBoundingBox(alphaMin, alphaMax);
ModifiableDBIDs intervalIDs = split.determineIDs(ids, alphaInterval, d_mins[i], d_maxs[i]);
if (intervalIDs != null && intervalIDs.size() >= minPts) {
CASHInterval rootInterval = new CASHInterval(alphaMin, alphaMax, split, intervalIDs, -1, 0, d_mins[i], d_maxs[i]);
heap.add(new IntegerPriorityObject<>(rootInterval.priority(), rootInterval));
}
}
if (LOG.isDebuggingFiner()) {
LOG.debugFiner(new StringBuilder().append("heap.size: ").append(heap.size()).toString());
}
}
Aggregations