use of de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore in project elki by elki-project.
the class LibSVMOneClassOutlierDetection method run.
/**
* Run one-class SVM.
*
* @param relation Data relation
* @return Outlier result.
*/
public OutlierResult run(Relation<V> relation) {
final int dim = RelationUtil.dimensionality(relation);
final ArrayDBIDs ids = DBIDUtil.ensureArray(relation.getDBIDs());
svm.svm_set_print_string_function(LOG_HELPER);
svm_parameter param = new svm_parameter();
param.svm_type = svm_parameter.ONE_CLASS;
param.kernel_type = svm_parameter.LINEAR;
param.degree = 3;
switch(kernel) {
case LINEAR:
param.kernel_type = svm_parameter.LINEAR;
break;
case QUADRATIC:
param.kernel_type = svm_parameter.POLY;
param.degree = 2;
break;
case CUBIC:
param.kernel_type = svm_parameter.POLY;
param.degree = 3;
break;
case RBF:
param.kernel_type = svm_parameter.RBF;
break;
case SIGMOID:
param.kernel_type = svm_parameter.SIGMOID;
break;
default:
throw new AbortException("Invalid kernel parameter: " + kernel);
}
// TODO: expose additional parameters to the end user!
param.nu = nu;
param.coef0 = 0.;
param.cache_size = 10000;
param.C = 1;
// not used by one-class?
param.eps = 1e-4;
// not used by one-class?
param.p = 0.1;
param.shrinking = 0;
param.probability = 0;
param.nr_weight = 0;
param.weight_label = new int[0];
param.weight = new double[0];
param.gamma = 1. / dim;
// Transform data:
svm_problem prob = new svm_problem();
prob.l = relation.size();
prob.x = new svm_node[prob.l][];
prob.y = new double[prob.l];
{
DBIDIter iter = ids.iter();
for (int i = 0; i < prob.l && iter.valid(); iter.advance(), i++) {
V vec = relation.get(iter);
// TODO: support compact sparse vectors, too!
svm_node[] x = new svm_node[dim];
for (int d = 0; d < dim; d++) {
x[d] = new svm_node();
x[d].index = d + 1;
x[d].value = vec.doubleValue(d);
}
prob.x[i] = x;
prob.y[i] = +1;
}
}
if (LOG.isVerbose()) {
LOG.verbose("Training one-class SVM...");
}
String err = svm.svm_check_parameter(prob, param);
if (err != null) {
LOG.warning("svm_check_parameter: " + err);
}
svm_model model = svm.svm_train(prob, param);
if (LOG.isVerbose()) {
LOG.verbose("Predicting...");
}
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_DB);
DoubleMinMax mm = new DoubleMinMax();
{
DBIDIter iter = ids.iter();
double[] buf = new double[svm.svm_get_nr_class(model)];
for (int i = 0; i < prob.l && iter.valid(); iter.advance(), i++) {
V vec = relation.get(iter);
svm_node[] x = new svm_node[dim];
for (int d = 0; d < dim; d++) {
x[d] = new svm_node();
x[d].index = d + 1;
x[d].value = vec.doubleValue(d);
}
svm.svm_predict_values(model, x, buf);
// / param.gamma; // Heuristic rescaling, sorry.
double score = -buf[0];
// Unfortunately, libsvm one-class currently yields a binary decision.
scores.putDouble(iter, score);
mm.put(score);
}
}
DoubleRelation scoreResult = new MaterializedDoubleRelation("One-Class SVM Decision", "svm-outlier", scores, ids);
OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(mm.getMin(), mm.getMax(), Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY, 0.);
return new OutlierResult(scoreMeta, scoreResult);
}
use of de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore in project elki by elki-project.
the class KMeansElkan method run.
@Override
public Clustering<KMeansModel> run(Database database, Relation<V> relation) {
if (relation.size() <= 0) {
return new Clustering<>("k-Means Clustering", "kmeans-clustering");
}
// Choose initial means
if (LOG.isStatistics()) {
LOG.statistics(new StringStatistic(KEY + ".initialization", initializer.toString()));
}
double[][] means = initializer.chooseInitialMeans(database, relation, k, getDistanceFunction());
// Setup cluster assignment store
List<ModifiableDBIDs> clusters = new ArrayList<>();
for (int i = 0; i < k; i++) {
clusters.add(DBIDUtil.newHashSet((int) (relation.size() * 2. / k)));
}
WritableIntegerDataStore assignment = DataStoreUtil.makeIntegerStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, -1);
// Elkan bounds
WritableDoubleDataStore upper = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, Double.POSITIVE_INFINITY);
WritableDataStore<double[]> lower = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, double[].class);
for (DBIDIter it = relation.iterDBIDs(); it.valid(); it.advance()) {
// Filled with 0.
lower.put(it, new double[k]);
}
// Storage for updated means:
final int dim = means[0].length;
double[][] sums = new double[k][dim];
// Cluster separation
double[] sep = new double[k];
// Cluster distances
double[][] cdist = new double[k][k];
IndefiniteProgress prog = LOG.isVerbose() ? new IndefiniteProgress("K-Means iteration", LOG) : null;
LongStatistic rstat = LOG.isStatistics() ? new LongStatistic(this.getClass().getName() + ".reassignments") : null;
int iteration = 0;
for (; maxiter <= 0 || iteration < maxiter; iteration++) {
LOG.incrementProcessed(prog);
int changed;
if (iteration == 0) {
changed = initialAssignToNearestCluster(relation, means, sums, clusters, assignment, upper, lower);
} else {
// #1
recomputeSeperation(means, sep, cdist);
changed = assignToNearestCluster(relation, means, sums, clusters, assignment, sep, cdist, upper, lower);
}
if (rstat != null) {
rstat.setLong(changed);
LOG.statistics(rstat);
}
// Stop if no cluster assignment changed.
if (changed == 0) {
break;
}
// Recompute means.
for (int i = 0; i < k; i++) {
final int s = clusters.get(i).size();
timesEquals(sums[i], s > 0 ? 1. / s : 1.);
}
// Overwrites sep
maxMoved(means, sums, sep);
updateBounds(relation, assignment, upper, lower, sep);
for (int i = 0; i < k; i++) {
final int s = clusters.get(i).size();
System.arraycopy(sums[i], 0, means[i], 0, dim);
// Restore to sum for next iteration
timesEquals(sums[i], s > 0 ? s : 1.);
}
}
LOG.setCompleted(prog);
if (LOG.isStatistics()) {
LOG.statistics(new LongStatistic(KEY + ".iterations", iteration));
}
upper.destroy();
lower.destroy();
// Wrap result
double totalvariance = 0.;
Clustering<KMeansModel> result = new Clustering<>("k-Means Clustering", "kmeans-clustering");
for (int i = 0; i < clusters.size(); i++) {
DBIDs ids = clusters.get(i);
if (ids.size() == 0) {
continue;
}
double[] mean = means[i];
double varsum = 0.;
if (varstat) {
DoubleVector mvec = DoubleVector.wrap(mean);
for (DBIDIter it = ids.iter(); it.valid(); it.advance()) {
varsum += distanceFunction.distance(mvec, relation.get(it));
}
totalvariance += varsum;
}
KMeansModel model = new KMeansModel(mean, varsum);
result.addToplevelCluster(new Cluster<>(ids, model));
}
if (LOG.isStatistics() && varstat) {
LOG.statistics(new DoubleStatistic(this.getClass().getName() + ".variance-sum", totalvariance));
}
return result;
}
use of de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore in project elki by elki-project.
the class LSDBC method run.
/**
* Run the LSDBC algorithm
*
* @param database Database to process
* @param relation Data relation
* @return Clustering result
*/
public Clustering<Model> run(Database database, Relation<O> relation) {
StepProgress stepprog = LOG.isVerbose() ? new StepProgress("LSDBC", 3) : null;
final int dim = RelationUtil.dimensionality(relation);
final double factor = FastMath.pow(2., alpha / dim);
final DBIDs ids = relation.getDBIDs();
LOG.beginStep(stepprog, 1, "Materializing kNN neighborhoods");
KNNQuery<O> knnq = DatabaseUtil.precomputedKNNQuery(database, relation, getDistanceFunction(), k);
LOG.beginStep(stepprog, 2, "Sorting by density");
WritableDoubleDataStore dens = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
fillDensities(knnq, ids, dens);
ArrayModifiableDBIDs sids = DBIDUtil.newArray(ids);
sids.sort(new DataStoreUtil.AscendingByDoubleDataStore(dens));
LOG.beginStep(stepprog, 3, "Computing clusters");
// Setup progress logging
final FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("LSDBC Clustering", ids.size(), LOG) : null;
final IndefiniteProgress clusprogress = LOG.isVerbose() ? new IndefiniteProgress("Number of clusters found", LOG) : null;
// (Temporary) store the cluster ID assigned.
final WritableIntegerDataStore clusterids = DataStoreUtil.makeIntegerStorage(ids, DataStoreFactory.HINT_TEMP, UNPROCESSED);
// Note: these are not exact, as objects may be stolen from noise.
final IntArrayList clustersizes = new IntArrayList();
// Unprocessed dummy value.
clustersizes.add(0);
// Noise counter.
clustersizes.add(0);
// Implementation Note: using Integer objects should result in
// reduced memory use in the HashMap!
int clusterid = NOISE + 1;
// Iterate over all objects in the database.
for (DBIDIter id = sids.iter(); id.valid(); id.advance()) {
// Skip already processed ids.
if (clusterids.intValue(id) != UNPROCESSED) {
continue;
}
// Evaluate Neighborhood predicate
final KNNList neighbors = knnq.getKNNForDBID(id, k);
// Evaluate Core-Point predicate:
if (isLocalMaximum(neighbors.getKNNDistance(), neighbors, dens)) {
double mindens = factor * neighbors.getKNNDistance();
clusterids.putInt(id, clusterid);
clustersizes.add(expandCluster(clusterid, clusterids, knnq, neighbors, mindens, progress));
// start next cluster on next iteration.
++clusterid;
if (clusprogress != null) {
clusprogress.setProcessed(clusterid, LOG);
}
} else {
// otherwise, it's a noise point
clusterids.putInt(id, NOISE);
clustersizes.set(NOISE, clustersizes.getInt(NOISE) + 1);
}
// We've completed this element
LOG.incrementProcessed(progress);
}
// Finish progress logging.
LOG.ensureCompleted(progress);
LOG.setCompleted(clusprogress);
LOG.setCompleted(stepprog);
// Transform cluster ID mapping into a clustering result:
ArrayList<ArrayModifiableDBIDs> clusterlists = new ArrayList<>(clusterid);
// add storage containers for clusters
for (int i = 0; i < clustersizes.size(); i++) {
clusterlists.add(DBIDUtil.newArray(clustersizes.getInt(i)));
}
// do the actual inversion
for (DBIDIter id = ids.iter(); id.valid(); id.advance()) {
// Negative values are non-core points:
int cid = clusterids.intValue(id);
int cluster = Math.abs(cid);
clusterlists.get(cluster).add(id);
}
clusterids.destroy();
Clustering<Model> result = new Clustering<>("LSDBC", "lsdbc-clustering");
for (int cid = NOISE; cid < clusterlists.size(); cid++) {
boolean isNoise = (cid == NOISE);
Cluster<Model> c;
c = new Cluster<Model>(clusterlists.get(cid), isNoise, ClusterModel.CLUSTER);
result.addToplevelCluster(c);
}
return result;
}
use of de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore in project elki by elki-project.
the class HDBSCANLinearMemory method run.
/**
* Run the algorithm
*
* @param db Database
* @param relation Relation
* @return Clustering hierarchy
*/
public PointerDensityHierarchyRepresentationResult run(Database db, Relation<O> relation) {
final DistanceQuery<O> distQ = db.getDistanceQuery(relation, getDistanceFunction());
final KNNQuery<O> knnQ = db.getKNNQuery(distQ, minPts);
// We need array addressing later.
final ArrayDBIDs ids = DBIDUtil.ensureArray(relation.getDBIDs());
// 1. Compute the core distances
// minPts + 1: ignore query point.
final WritableDoubleDataStore coredists = computeCoreDists(ids, knnQ, minPts);
final int numedges = ids.size() - 1;
DoubleLongHeap heap = new DoubleLongMinHeap(numedges);
// 2. Build spanning tree.
FiniteProgress mprog = LOG.isVerbose() ? new FiniteProgress("Computing minimum spanning tree (n-1 edges)", numedges, LOG) : null;
//
PrimsMinimumSpanningTree.processDense(//
ids, //
new HDBSCANAdapter(ids, coredists, distQ), new HeapMSTCollector(heap, mprog, LOG));
LOG.ensureCompleted(mprog);
// Storage for pointer representation:
WritableDBIDDataStore pi = DataStoreUtil.makeDBIDStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC);
WritableDoubleDataStore lambda = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC, Double.POSITIVE_INFINITY);
convertToPointerRepresentation(ids, heap, pi, lambda);
return new PointerDensityHierarchyRepresentationResult(ids, pi, lambda, distQ.getDistanceFunction().isSquared(), coredists);
}
use of de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore in project elki by elki-project.
the class SLINK method run.
/**
* Performs the SLINK algorithm on the given database.
*
* @param database Database to process
* @param relation Data relation to use
*/
public PointerHierarchyRepresentationResult run(Database database, Relation<O> relation) {
DBIDs ids = relation.getDBIDs();
WritableDBIDDataStore pi = DataStoreUtil.makeDBIDStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC);
WritableDoubleDataStore lambda = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC, Double.POSITIVE_INFINITY);
// Temporary storage for m.
WritableDoubleDataStore m = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
// To allow CLINK logger override
final Logging log = getLogger();
FiniteProgress progress = log.isVerbose() ? new FiniteProgress("Running SLINK", ids.size(), log) : null;
ArrayDBIDs aids = DBIDUtil.ensureArray(ids);
// First element is trivial/special:
DBIDArrayIter id = aids.iter(), it = aids.iter();
// Step 1: initialize
for (; id.valid(); id.advance()) {
// P(n+1) = n+1:
pi.put(id, id);
// L(n+1) = infinity already.
}
// First element is finished already (start at seek(1) below!)
log.incrementProcessed(progress);
// Optimized branch
if (getDistanceFunction() instanceof PrimitiveDistanceFunction) {
PrimitiveDistanceFunction<? super O> distf = (PrimitiveDistanceFunction<? super O>) getDistanceFunction();
for (id.seek(1); id.valid(); id.advance()) {
step2primitive(id, it, id.getOffset(), relation, distf, m);
// SLINK or CLINK
process(id, aids, it, id.getOffset(), pi, lambda, m);
log.incrementProcessed(progress);
}
} else {
// Fallback branch
DistanceQuery<O> distQ = database.getDistanceQuery(relation, getDistanceFunction());
for (id.seek(1); id.valid(); id.advance()) {
step2(id, it, id.getOffset(), distQ, m);
// SLINK or CLINK
process(id, aids, it, id.getOffset(), pi, lambda, m);
log.incrementProcessed(progress);
}
}
log.ensureCompleted(progress);
// We don't need m anymore.
m.destroy();
m = null;
return new PointerHierarchyRepresentationResult(ids, pi, lambda, getDistanceFunction().isSquared());
}
Aggregations