use of de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore in project elki by elki-project.
the class KNNKernelDensityMinimaClustering method run.
/**
* Run the clustering algorithm on a data relation.
*
* @param relation Relation
* @return Clustering result
*/
public Clustering<ClusterModel> run(Relation<V> relation) {
ArrayModifiableDBIDs ids = DBIDUtil.newArray(relation.getDBIDs());
final int size = ids.size();
// Sort by the sole dimension
ids.sort(new VectorUtil.SortDBIDsBySingleDimension(relation, dim));
// Density storage.
WritableDoubleDataStore density = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, 0.);
DBIDArrayIter iter = ids.iter(), iter2 = ids.iter();
StepProgress sprog = LOG.isVerbose() ? new StepProgress("Clustering steps", 2) : null;
LOG.beginStep(sprog, 1, "Kernel density estimation.");
{
double[] scratch = new double[2 * k];
iter.seek(0);
for (int i = 0; i < size; i++, iter.advance()) {
// Current value.
final double curv = relation.get(iter).doubleValue(dim);
final int pre = Math.max(i - k, 0), prek = i - pre;
final int pos = Math.min(i + k, size - 1), posk = pos - i;
iter2.seek(pre);
for (int j = 0; j < prek; j++, iter2.advance()) {
scratch[j] = curv - relation.get(iter2).doubleValue(dim);
}
assert (iter2.getOffset() == i);
iter2.advance();
for (int j = 0; j < posk; j++, iter2.advance()) {
scratch[prek + j] = relation.get(iter2).doubleValue(dim) - curv;
}
assert (prek + posk >= k);
double kdist = QuickSelect.quickSelect(scratch, 0, prek + posk, k);
switch(mode) {
case BALLOON:
{
double dens = 0.;
if (kdist > 0.) {
for (int j = 0; j < prek + posk; j++) {
dens += kernel.density(scratch[j] / kdist);
}
} else {
dens = Double.POSITIVE_INFINITY;
}
assert (iter.getOffset() == i);
density.putDouble(iter, dens);
break;
}
case SAMPLE:
{
if (kdist > 0.) {
iter2.seek(pre);
for (int j = 0; j < prek; j++, iter2.advance()) {
double delta = curv - relation.get(iter2).doubleValue(dim);
density.putDouble(iter2, density.doubleValue(iter2) + kernel.density(delta / kdist));
}
assert (iter2.getOffset() == i);
iter2.advance();
for (int j = 0; j < posk; j++, iter2.advance()) {
double delta = relation.get(iter2).doubleValue(dim) - curv;
density.putDouble(iter2, density.doubleValue(iter2) + kernel.density(delta / kdist));
}
} else {
iter2.seek(pre);
for (int j = 0; j < prek; j++, iter2.advance()) {
double delta = curv - relation.get(iter2).doubleValue(dim);
if (!(delta > 0.)) {
density.putDouble(iter2, Double.POSITIVE_INFINITY);
}
}
assert (iter2.getOffset() == i);
iter2.advance();
for (int j = 0; j < posk; j++, iter2.advance()) {
double delta = relation.get(iter2).doubleValue(dim) - curv;
if (!(delta > 0.)) {
density.putDouble(iter2, Double.POSITIVE_INFINITY);
}
}
}
break;
}
default:
throw new UnsupportedOperationException("Unknown mode specified.");
}
}
}
LOG.beginStep(sprog, 2, "Local minima detection.");
Clustering<ClusterModel> clustering = new Clustering<>("onedimensional-kde-clustering", "One-Dimensional clustering using kernel density estimation.");
{
double[] scratch = new double[2 * minwindow + 1];
int begin = 0;
int halfw = (minwindow + 1) >> 1;
iter.seek(0);
// Fill initial buffer.
for (int i = 0; i < size; i++, iter.advance()) {
final int m = i % scratch.length, t = (i - minwindow - 1) % scratch.length;
scratch[m] = density.doubleValue(iter);
if (i > scratch.length) {
double min = Double.POSITIVE_INFINITY;
for (int j = 0; j < scratch.length; j++) {
if (j != t && scratch[j] < min) {
min = scratch[j];
}
}
// Local minimum:
if (scratch[t] < min) {
int end = i - minwindow + 1;
{
// Test on which side the kNN is
iter2.seek(end);
double curv = relation.get(iter2).doubleValue(dim);
iter2.seek(end - halfw);
double left = relation.get(iter2).doubleValue(dim) - curv;
iter2.seek(end + halfw);
double right = curv - relation.get(iter2).doubleValue(dim);
if (left < right) {
end++;
}
}
iter2.seek(begin);
ArrayModifiableDBIDs cids = DBIDUtil.newArray(end - begin);
for (int j = 0; j < end - begin; j++, iter2.advance()) {
cids.add(iter2);
}
clustering.addToplevelCluster(new Cluster<>(cids, ClusterModel.CLUSTER));
begin = end;
}
}
}
// Extract last cluster
int end = size;
iter2.seek(begin);
ArrayModifiableDBIDs cids = DBIDUtil.newArray(end - begin);
for (int j = 0; j < end - begin; j++, iter2.advance()) {
cids.add(iter2);
}
clustering.addToplevelCluster(new Cluster<>(cids, ClusterModel.CLUSTER));
}
LOG.ensureCompleted(sprog);
return clustering;
}
use of de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore in project elki by elki-project.
the class FarthestSumPointsInitialMeans method chooseInitialMedoids.
@Override
public DBIDs chooseInitialMedoids(int k, DBIDs ids, DistanceQuery<? super O> distQ) {
@SuppressWarnings("unchecked") final Relation<O> relation = (Relation<O>) distQ.getRelation();
WritableDoubleDataStore store = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, 0.);
ArrayModifiableDBIDs means = DBIDUtil.newArray(k);
DBIDRef first = DBIDUtil.randomSample(ids, rnd);
means.add(first);
DBIDVar prevmean = DBIDUtil.newVar(first);
DBIDVar best = DBIDUtil.newVar(first);
for (int i = (dropfirst ? 0 : 1); i < k; i++) {
// Find farthest object:
double maxdist = Double.NEGATIVE_INFINITY;
for (DBIDIter it = relation.iterDBIDs(); it.valid(); it.advance()) {
final double prev = store.doubleValue(it);
if (prev != prev) {
// NaN: already chosen!
continue;
}
double dsum = prev + distQ.distance(prevmean, it);
// Don't store distance to first mean, when it will be dropped below.
if (i > 0) {
store.putDouble(it, dsum);
}
if (dsum > maxdist) {
maxdist = dsum;
best.set(it);
}
}
// Add new mean:
if (i == 0) {
// Remove temporary first element.
means.clear();
}
// So it won't be chosen twice.
store.putDouble(best, Double.NaN);
prevmean.set(best);
means.add(best);
}
store.destroy();
return means;
}
use of de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore in project elki by elki-project.
the class DBOutlierScore method computeOutlierScores.
@Override
protected DoubleDataStore computeOutlierScores(Database database, Relation<O> relation, double d) {
DistanceQuery<O> distFunc = database.getDistanceQuery(relation, getDistanceFunction());
RangeQuery<O> rangeQuery = database.getRangeQuery(distFunc);
final double size = distFunc.getRelation().size();
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(distFunc.getRelation().getDBIDs(), DataStoreFactory.HINT_STATIC);
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("DBOutlier scores", distFunc.getRelation().size(), LOG) : null;
// TODO: use bulk when implemented.
for (DBIDIter iditer = distFunc.getRelation().iterDBIDs(); iditer.valid(); iditer.advance()) {
// compute percentage of neighbors in the given neighborhood with size d
double n = rangeQuery.getRangeForDBID(iditer, d).size() / size;
scores.putDouble(iditer, 1.0 - n);
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
return scores;
}
use of de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore in project elki by elki-project.
the class KNNDD method run.
/**
* Runs the algorithm in the timed evaluation part.
*
* @param relation Data relation
*/
public OutlierResult run(Relation<O> relation) {
final DistanceQuery<O> distanceQuery = relation.getDistanceQuery(getDistanceFunction());
final KNNQuery<O> knnQuery = relation.getKNNQuery(distanceQuery, k);
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("kNN distance for objects", relation.size(), LOG) : null;
WritableDoubleDataStore knnDist = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
WritableDBIDDataStore neighbor = DataStoreUtil.makeDBIDStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
DBIDVar var = DBIDUtil.newVar();
// Find nearest neighbors, and store the distances.
for (DBIDIter it = relation.iterDBIDs(); it.valid(); it.advance()) {
final KNNList knn = knnQuery.getKNNForDBID(it, k);
knnDist.putDouble(it, knn.getKNNDistance());
neighbor.put(it, knn.assignVar(knn.size() - 1, var));
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
prog = LOG.isVerbose() ? new FiniteProgress("kNN distance descriptor", relation.size(), LOG) : null;
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_DB);
DoubleMinMax minmax = new DoubleMinMax();
for (DBIDIter it = relation.iterDBIDs(); it.valid(); it.advance()) {
// Distance
double d = knnDist.doubleValue(it);
// Distance of neighbor
double nd = knnDist.doubleValue(neighbor.assignVar(it, var));
double knndd = nd > 0 ? d / nd : d > 0 ? Double.POSITIVE_INFINITY : 1.;
scores.put(it, knndd);
minmax.put(knndd);
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
DoubleRelation scoreres = new MaterializedDoubleRelation("kNN Data Descriptor", "knndd-outlier", scores, relation.getDBIDs());
OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0., Double.POSITIVE_INFINITY, 1.);
return new OutlierResult(meta, scoreres);
}
use of de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore in project elki by elki-project.
the class GaussianModel method run.
/**
* Run the algorithm
*
* @param relation Data relation
* @return Outlier result
*/
public OutlierResult run(Relation<V> relation) {
DoubleMinMax mm = new DoubleMinMax();
// resulting scores
WritableDoubleDataStore oscores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT);
// Compute mean and covariance Matrix
CovarianceMatrix temp = CovarianceMatrix.make(relation);
double[] mean = temp.getMeanVector(relation).toArray();
// debugFine(mean.toString());
double[][] covarianceMatrix = temp.destroyToPopulationMatrix();
// debugFine(covarianceMatrix.toString());
double[][] covarianceTransposed = inverse(covarianceMatrix);
// Normalization factors for Gaussian PDF
double det = new LUDecomposition(covarianceMatrix).det();
final double fakt = 1.0 / FastMath.sqrt(MathUtil.powi(MathUtil.TWOPI, RelationUtil.dimensionality(relation)) * det);
// for each object compute Mahalanobis distance
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
double[] x = minusEquals(relation.get(iditer).toArray(), mean);
// Gaussian PDF
final double mDist = transposeTimesTimes(x, covarianceTransposed, x);
final double prob = fakt * FastMath.exp(-mDist * .5);
mm.put(prob);
oscores.putDouble(iditer, prob);
}
final OutlierScoreMeta meta;
if (invert) {
double max = mm.getMax() != 0 ? mm.getMax() : 1.;
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
oscores.putDouble(iditer, (max - oscores.doubleValue(iditer)) / max);
}
meta = new BasicOutlierScoreMeta(0.0, 1.0);
} else {
meta = new InvertedOutlierScoreMeta(mm.getMin(), mm.getMax(), 0.0, Double.POSITIVE_INFINITY);
}
DoubleRelation res = new MaterializedDoubleRelation("Gaussian Model Outlier Score", "gaussian-model-outlier", oscores, relation.getDBIDs());
return new OutlierResult(meta, res);
}
Aggregations