use of de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs in project elki by elki-project.
the class LinearScanRKNNQuery method getRKNNForDBID.
@Override
public DoubleDBIDList getRKNNForDBID(DBIDRef id, int k) {
ModifiableDoubleDBIDList rNNList = DBIDUtil.newDistanceDBIDList();
ArrayDBIDs allIDs = DBIDUtil.ensureArray(relation.getDBIDs());
List<? extends KNNList> kNNList = knnQuery.getKNNForBulkDBIDs(allIDs, k);
int i = 0;
for (DBIDIter iter = allIDs.iter(); iter.valid(); iter.advance()) {
KNNList knn = kNNList.get(i);
for (DoubleDBIDListIter n = knn.iter(); n.valid(); n.advance()) {
if (DBIDUtil.equal(n, id)) {
rNNList.add(n.doubleValue(), iter);
}
}
i++;
}
rNNList.sort();
return rNNList;
}
use of de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs in project elki by elki-project.
the class LinearScanRKNNQuery method getRKNNForObject.
@Override
public DoubleDBIDList getRKNNForObject(O obj, int k) {
ModifiableDoubleDBIDList rNNlist = DBIDUtil.newDistanceDBIDList();
ArrayDBIDs allIDs = DBIDUtil.ensureArray(relation.getDBIDs());
List<? extends KNNList> kNNLists = knnQuery.getKNNForBulkDBIDs(allIDs, k);
int i = 0;
for (DBIDIter iter = allIDs.iter(); iter.valid(); iter.advance()) {
KNNList knn = kNNLists.get(i);
int last = Math.min(k - 1, knn.size() - 1);
double dist = distanceQuery.distance(obj, iter);
if (last < k - 1 || dist <= knn.get(last).doubleValue()) {
rNNlist.add(dist, iter);
}
i++;
}
rNNlist.sort();
return rNNlist;
}
use of de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs in project elki by elki-project.
the class HiCS method run.
/**
* Perform HiCS on a given database.
*
* @param relation the database
* @return The aggregated resulting scores that were assigned by the given
* outlier detection algorithm
*/
public OutlierResult run(Relation<V> relation) {
final DBIDs ids = relation.getDBIDs();
ArrayList<ArrayDBIDs> subspaceIndex = buildOneDimIndexes(relation);
Set<HiCSSubspace> subspaces = calculateSubspaces(relation, subspaceIndex, rnd.getSingleThreadedRandom());
if (LOG.isVerbose()) {
LOG.verbose("Number of high-contrast subspaces: " + subspaces.size());
}
List<DoubleRelation> results = new ArrayList<>();
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Calculating Outlier scores for high Contrast subspaces", subspaces.size(), LOG) : null;
// relation instead of SubspaceEuclideanDistanceFunction?)
for (HiCSSubspace dimset : subspaces) {
if (LOG.isVerbose()) {
LOG.verbose("Performing outlier detection in subspace " + dimset);
}
ProxyDatabase pdb = new ProxyDatabase(ids);
pdb.addRelation(new ProjectedView<>(relation, new NumericalFeatureSelection<V>(dimset)));
// run LOF and collect the result
OutlierResult result = outlierAlgorithm.run(pdb);
results.add(result.getScores());
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
DoubleMinMax minmax = new DoubleMinMax();
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
double sum = 0.0;
for (DoubleRelation r : results) {
final double s = r.doubleValue(iditer);
if (!Double.isNaN(s)) {
sum += s;
}
}
scores.putDouble(iditer, sum);
minmax.put(sum);
}
OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax());
DoubleRelation scoreres = new MaterializedDoubleRelation("HiCS", "HiCS-outlier", scores, relation.getDBIDs());
return new OutlierResult(meta, scoreres);
}
use of de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs in project elki by elki-project.
the class HiCS method calculateContrast.
/**
* Calculates the actual contrast of a given subspace.
*
* @param relation Relation to process
* @param subspace Subspace
* @param subspaceIndex Subspace indexes
*/
private void calculateContrast(Relation<? extends NumberVector> relation, HiCSSubspace subspace, ArrayList<ArrayDBIDs> subspaceIndex, Random random) {
final int card = subspace.cardinality();
final double alpha1 = FastMath.pow(alpha, (1.0 / card));
final int windowsize = (int) (relation.size() * alpha1);
final FiniteProgress prog = LOG.isDebugging() ? new FiniteProgress("Monte-Carlo iterations", m, LOG) : null;
int retries = 0;
double deviationSum = 0.0;
for (int i = 0; i < m; i++) {
// Choose a random set bit.
int chosen = -1;
for (int tmp = random.nextInt(card); tmp >= 0; tmp--) {
chosen = subspace.nextSetBit(chosen + 1);
}
// initialize sample
DBIDs conditionalSample = relation.getDBIDs();
for (int j = subspace.nextSetBit(0); j >= 0; j = subspace.nextSetBit(j + 1)) {
if (j == chosen) {
continue;
}
ArrayDBIDs sortedIndices = subspaceIndex.get(j);
ArrayModifiableDBIDs indexBlock = DBIDUtil.newArray(windowsize);
// initialize index block
DBIDArrayIter iter = sortedIndices.iter();
iter.seek(random.nextInt(relation.size() - windowsize));
for (int k = 0; k < windowsize; k++, iter.advance()) {
// select index block
indexBlock.add(iter);
}
conditionalSample = DBIDUtil.intersection(conditionalSample, indexBlock);
}
if (conditionalSample.size() < 10) {
retries++;
if (LOG.isDebugging()) {
LOG.debug("Sample size very small. Retry no. " + retries);
}
if (retries >= MAX_RETRIES) {
LOG.warning("Too many retries, for small samples: " + retries);
} else {
i--;
continue;
}
}
// Project conditional set
double[] sampleValues = new double[conditionalSample.size()];
{
int l = 0;
for (DBIDIter iter = conditionalSample.iter(); iter.valid(); iter.advance()) {
sampleValues[l] = relation.get(iter).doubleValue(chosen);
l++;
}
}
// Project full set
double[] fullValues = new double[relation.size()];
{
int l = 0;
for (DBIDIter iter = subspaceIndex.get(chosen).iter(); iter.valid(); iter.advance()) {
fullValues[l] = relation.get(iter).doubleValue(chosen);
l++;
}
}
double contrast = statTest.deviation(fullValues, sampleValues);
if (Double.isNaN(contrast)) {
i--;
LOG.warning("Contrast was NaN");
continue;
}
deviationSum += contrast;
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
subspace.contrast = deviationSum / m;
}
use of de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs in project elki by elki-project.
the class CTLuRandomWalkEC method run.
/**
* Run the algorithm.
*
* @param spatial Spatial neighborhood relation
* @param relation Attribute value relation
* @return Outlier result
*/
public OutlierResult run(Relation<P> spatial, Relation<? extends NumberVector> relation) {
DistanceQuery<P> distFunc = getDistanceFunction().instantiate(spatial);
WritableDataStore<double[]> similarityVectors = DataStoreUtil.makeStorage(spatial.getDBIDs(), DataStoreFactory.HINT_TEMP, double[].class);
WritableDataStore<DBIDs> neighbors = DataStoreUtil.makeStorage(spatial.getDBIDs(), DataStoreFactory.HINT_TEMP, DBIDs.class);
// Make a static IDs array for matrix column indexing
ArrayDBIDs ids = DBIDUtil.ensureArray(relation.getDBIDs());
// construct the relation Matrix of the ec-graph
double[][] E = new double[ids.size()][ids.size()];
KNNHeap heap = DBIDUtil.newHeap(k);
{
int i = 0;
for (DBIDIter id = ids.iter(); id.valid(); id.advance(), i++) {
final double val = relation.get(id).doubleValue(0);
assert (heap.size() == 0);
int j = 0;
for (DBIDIter n = ids.iter(); n.valid(); n.advance(), j++) {
if (i == j) {
continue;
}
final double e;
final double distance = distFunc.distance(id, n);
heap.insert(distance, n);
if (distance == 0) {
LOG.warning("Zero distances are not supported - skipping: " + DBIDUtil.toString(id) + " " + DBIDUtil.toString(n));
e = 0;
} else {
double diff = Math.abs(val - relation.get(n).doubleValue(0));
double exp = FastMath.exp(FastMath.pow(diff, alpha));
// Implementation note: not inverting exp worked a lot better.
// Therefore we diverge from the article here.
e = exp / distance;
}
E[j][i] = e;
}
// Convert kNN Heap into DBID array
ModifiableDBIDs nids = DBIDUtil.newArray(heap.size());
while (heap.size() > 0) {
nids.add(heap.poll());
}
neighbors.put(id, nids);
}
}
// Also do the -c multiplication in this process.
for (int i = 0; i < E[0].length; i++) {
double sum = 0.0;
for (int j = 0; j < E.length; j++) {
sum += E[j][i];
}
if (sum == 0) {
sum = 1.0;
}
for (int j = 0; j < E.length; j++) {
E[j][i] = -c * E[j][i] / sum;
}
}
// Add identity matrix. The diagonal should still be 0s, so this is trivial.
assert (E.length == E[0].length);
for (int col = 0; col < E[0].length; col++) {
assert (E[col][col] == 0.0);
E[col][col] = 1.0;
}
E = timesEquals(inverse(E), 1 - c);
// Split the matrix into columns
{
int i = 0;
for (DBIDIter id = ids.iter(); id.valid(); id.advance(), i++) {
// Note: matrix times ith unit vector = ith column
double[] sim = getCol(E, i);
similarityVectors.put(id, sim);
}
}
E = null;
// compute the relevance scores between specified Object and its neighbors
DoubleMinMax minmax = new DoubleMinMax();
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(spatial.getDBIDs(), DataStoreFactory.HINT_STATIC);
for (DBIDIter id = ids.iter(); id.valid(); id.advance()) {
double gmean = 1.0;
int cnt = 0;
for (DBIDIter iter = neighbors.get(id).iter(); iter.valid(); iter.advance()) {
if (DBIDUtil.equal(id, iter)) {
continue;
}
double sim = VMath.angle(similarityVectors.get(id), similarityVectors.get(iter));
gmean *= sim;
cnt++;
}
final double score = FastMath.pow(gmean, 1.0 / cnt);
minmax.put(score);
scores.putDouble(id, score);
}
DoubleRelation scoreResult = new MaterializedDoubleRelation("randomwalkec", "RandomWalkEC", scores, relation.getDBIDs());
OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 0.0);
return new OutlierResult(scoreMeta, scoreResult);
}
Aggregations