use of de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs in project elki by elki-project.
the class JudgeOutlierScores method processNewResult.
@Override
public void processNewResult(ResultHierarchy hier, Result result) {
Database db = ResultUtil.findDatabase(hier);
List<OutlierResult> ors = ResultUtil.filterResults(hier, OutlierResult.class);
if (ors == null || ors.isEmpty()) {
// logger.warning("No results found for "+JudgeOutlierScores.class.getSimpleName());
return;
}
ModifiableDBIDs ids = DBIDUtil.newHashSet(ors.iterator().next().getScores().getDBIDs());
DBIDs outlierIds = DatabaseUtil.getObjectsByLabelMatch(db, positiveClassName);
ids.removeDBIDs(outlierIds);
for (OutlierResult or : ors) {
db.getHierarchy().add(or, computeScore(ids, outlierIds, or));
}
}
use of de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs in project elki by elki-project.
the class CTLuRandomWalkEC method run.
/**
* Run the algorithm.
*
* @param spatial Spatial neighborhood relation
* @param relation Attribute value relation
* @return Outlier result
*/
public OutlierResult run(Relation<P> spatial, Relation<? extends NumberVector> relation) {
DistanceQuery<P> distFunc = getDistanceFunction().instantiate(spatial);
WritableDataStore<double[]> similarityVectors = DataStoreUtil.makeStorage(spatial.getDBIDs(), DataStoreFactory.HINT_TEMP, double[].class);
WritableDataStore<DBIDs> neighbors = DataStoreUtil.makeStorage(spatial.getDBIDs(), DataStoreFactory.HINT_TEMP, DBIDs.class);
// Make a static IDs array for matrix column indexing
ArrayDBIDs ids = DBIDUtil.ensureArray(relation.getDBIDs());
// construct the relation Matrix of the ec-graph
double[][] E = new double[ids.size()][ids.size()];
KNNHeap heap = DBIDUtil.newHeap(k);
{
int i = 0;
for (DBIDIter id = ids.iter(); id.valid(); id.advance(), i++) {
final double val = relation.get(id).doubleValue(0);
assert (heap.size() == 0);
int j = 0;
for (DBIDIter n = ids.iter(); n.valid(); n.advance(), j++) {
if (i == j) {
continue;
}
final double e;
final double distance = distFunc.distance(id, n);
heap.insert(distance, n);
if (distance == 0) {
LOG.warning("Zero distances are not supported - skipping: " + DBIDUtil.toString(id) + " " + DBIDUtil.toString(n));
e = 0;
} else {
double diff = Math.abs(val - relation.get(n).doubleValue(0));
double exp = FastMath.exp(FastMath.pow(diff, alpha));
// Implementation note: not inverting exp worked a lot better.
// Therefore we diverge from the article here.
e = exp / distance;
}
E[j][i] = e;
}
// Convert kNN Heap into DBID array
ModifiableDBIDs nids = DBIDUtil.newArray(heap.size());
while (heap.size() > 0) {
nids.add(heap.poll());
}
neighbors.put(id, nids);
}
}
// Also do the -c multiplication in this process.
for (int i = 0; i < E[0].length; i++) {
double sum = 0.0;
for (int j = 0; j < E.length; j++) {
sum += E[j][i];
}
if (sum == 0) {
sum = 1.0;
}
for (int j = 0; j < E.length; j++) {
E[j][i] = -c * E[j][i] / sum;
}
}
// Add identity matrix. The diagonal should still be 0s, so this is trivial.
assert (E.length == E[0].length);
for (int col = 0; col < E[0].length; col++) {
assert (E[col][col] == 0.0);
E[col][col] = 1.0;
}
E = timesEquals(inverse(E), 1 - c);
// Split the matrix into columns
{
int i = 0;
for (DBIDIter id = ids.iter(); id.valid(); id.advance(), i++) {
// Note: matrix times ith unit vector = ith column
double[] sim = getCol(E, i);
similarityVectors.put(id, sim);
}
}
E = null;
// compute the relevance scores between specified Object and its neighbors
DoubleMinMax minmax = new DoubleMinMax();
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(spatial.getDBIDs(), DataStoreFactory.HINT_STATIC);
for (DBIDIter id = ids.iter(); id.valid(); id.advance()) {
double gmean = 1.0;
int cnt = 0;
for (DBIDIter iter = neighbors.get(id).iter(); iter.valid(); iter.advance()) {
if (DBIDUtil.equal(id, iter)) {
continue;
}
double sim = VMath.angle(similarityVectors.get(id), similarityVectors.get(iter));
gmean *= sim;
cnt++;
}
final double score = FastMath.pow(gmean, 1.0 / cnt);
minmax.put(score);
scores.putDouble(id, score);
}
DoubleRelation scoreResult = new MaterializedDoubleRelation("randomwalkec", "RandomWalkEC", scores, relation.getDBIDs());
OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 0.0);
return new OutlierResult(scoreMeta, scoreResult);
}
use of de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs in project elki by elki-project.
the class SelectionTableWindow method handleDelete.
/**
* Handle delete. <br>
* Delete the marked objects in the database.
*/
protected void handleDelete() {
ModifiableDBIDs todel = DBIDUtil.newHashSet();
ModifiableDBIDs remain = DBIDUtil.newHashSet(dbids);
DBIDArrayIter it = dbids.iter();
for (int row : table.getSelectedRows()) {
it.seek(row);
todel.add(it);
remain.remove(it);
}
// Unselect first ...
context.setSelection(new DBIDSelection(remain));
// Now delete them.
for (DBIDIter iter = todel.iter(); iter.valid(); iter.advance()) {
database.delete(iter);
}
}
use of de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs in project elki by elki-project.
the class SameSizeKMeansAlgorithm method refineResult.
/**
* Perform k-means style iterations to improve the clustering result.
*
* @param relation Data relation
* @param means Means list
* @param clusters Cluster list
* @param metas Metadata storage
* @param tids DBIDs array
* @return final means
*/
protected double[][] refineResult(Relation<V> relation, double[][] means, List<ModifiableDBIDs> clusters, final WritableDataStore<Meta> metas, ArrayModifiableDBIDs tids) {
NumberVectorDistanceFunction<? super V> df = getDistanceFunction();
// Our desired cluster size:
// rounded down
final int minsize = tids.size() / k;
// rounded up
final int maxsize = (tids.size() + k - 1) / k;
// Comparator: sort by largest gain by transfer
final Comparator<DBIDRef> comp = new Comparator<DBIDRef>() {
@Override
public int compare(DBIDRef o1, DBIDRef o2) {
Meta c1 = metas.get(o1), c2 = metas.get(o2);
return Double.compare(c1.priority(), c2.priority());
}
};
// List for sorting cluster preferences
final int[] preferences = MathUtil.sequence(0, k);
// Comparator for this list.
final PreferenceComparator pcomp = new PreferenceComparator();
// Initialize transfer lists:
ArrayModifiableDBIDs[] transfers = new ArrayModifiableDBIDs[k];
for (int i = 0; i < k; i++) {
transfers[i] = DBIDUtil.newArray();
}
DBIDArrayIter id = tids.iter();
for (int iter = 0; maxiter <= 0 || iter < maxiter; iter++) {
updateDistances(relation, means, metas, df);
tids.sort(comp);
// Track if anything has changed
int active = 0;
for (id.seek(0); id.valid(); id.advance()) {
Meta c = metas.get(id);
IntegerArrayQuickSort.sort(preferences, pcomp.select(c));
ModifiableDBIDs source = clusters.get(c.primary);
assert (source.contains(id));
tloop: for (int i : preferences) {
if (i == c.primary) {
// Already assigned here
continue;
}
ModifiableDBIDs dest = clusters.get(i);
// Can we pair this transfer?
final double gain = c.gain(i);
for (DBIDMIter other = transfers[i].iter(); other.valid(); other.advance()) {
Meta c2 = metas.get(other);
if (gain + c2.gain(c.primary) > 0) {
transfer(metas, c2, dest, source, other, c.primary);
transfer(metas, c, source, dest, id, i);
active += 2;
// last, as this invalidates the reference!
other.remove();
// We are assigned here now.
source = dest;
// Can try another transfer, with next cluster.
continue tloop;
}
}
// If cluster sizes allow, move a single object.
if (gain > 0 && (dest.size() < maxsize && source.size() > minsize)) {
transfer(metas, c, source, dest, id, i);
active += 1;
// We are assigned here now.
source = dest;
continue tloop;
}
}
// transfer list.
if (c.primary != preferences[0] && c.dists[c.primary] > c.dists[preferences[0]]) {
transfers[c.primary].add(id);
}
}
// TODO: try to get more transfers out of the transfer lists done by
// considering more than one object?
int pending = 0;
// Clear transfer lists for next iteration.
for (int i = 0; i < k; i++) {
pending += transfers[i].size();
transfers[i].clear();
}
if (LOG.isDebuggingFine()) {
LOG.debugFine("Iteration #" + iter + ": performed " + active + " transfers skipped " + pending);
}
if (active <= 0) {
break;
}
// Recompute means after reassignment
means = means(clusters, means, relation);
}
return means;
}
use of de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs in project elki by elki-project.
the class SameSizeKMeansAlgorithm method run.
/**
* Run k-means with cluster size constraints.
*
* @param database Database
* @param relation relation to use
* @return result
*/
@Override
public Clustering<MeanModel> run(Database database, Relation<V> relation) {
// Database objects to process
final DBIDs ids = relation.getDBIDs();
// Choose initial means
double[][] means = initializer.chooseInitialMeans(database, relation, k, getDistanceFunction());
// Setup cluster assignment store
List<ModifiableDBIDs> clusters = new ArrayList<>();
for (int i = 0; i < k; i++) {
clusters.add(DBIDUtil.newHashSet(relation.size() / k + 2));
}
// Meta data storage
final WritableDataStore<Meta> metas = initializeMeta(relation, means);
// Perform the initial assignment
ArrayModifiableDBIDs tids = initialAssignment(clusters, metas, ids);
// Recompute the means after the initial assignment
means = means(clusters, means, relation);
// Refine the result via k-means like iterations
means = refineResult(relation, means, clusters, metas, tids);
// Wrap result
Clustering<MeanModel> result = new Clustering<>("k-Means Samesize Clustering", "kmeans-samesize-clustering");
for (int i = 0; i < clusters.size(); i++) {
result.addToplevelCluster(new Cluster<>(clusters.get(i), new MeanModel(means[i])));
}
return result;
}
Aggregations