use of de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs in project elki by elki-project.
the class LibSVMOneClassOutlierDetection method run.
/**
* Run one-class SVM.
*
* @param relation Data relation
* @return Outlier result.
*/
public OutlierResult run(Relation<V> relation) {
final int dim = RelationUtil.dimensionality(relation);
final ArrayDBIDs ids = DBIDUtil.ensureArray(relation.getDBIDs());
svm.svm_set_print_string_function(LOG_HELPER);
svm_parameter param = new svm_parameter();
param.svm_type = svm_parameter.ONE_CLASS;
param.kernel_type = svm_parameter.LINEAR;
param.degree = 3;
switch(kernel) {
case LINEAR:
param.kernel_type = svm_parameter.LINEAR;
break;
case QUADRATIC:
param.kernel_type = svm_parameter.POLY;
param.degree = 2;
break;
case CUBIC:
param.kernel_type = svm_parameter.POLY;
param.degree = 3;
break;
case RBF:
param.kernel_type = svm_parameter.RBF;
break;
case SIGMOID:
param.kernel_type = svm_parameter.SIGMOID;
break;
default:
throw new AbortException("Invalid kernel parameter: " + kernel);
}
// TODO: expose additional parameters to the end user!
param.nu = nu;
param.coef0 = 0.;
param.cache_size = 10000;
param.C = 1;
// not used by one-class?
param.eps = 1e-4;
// not used by one-class?
param.p = 0.1;
param.shrinking = 0;
param.probability = 0;
param.nr_weight = 0;
param.weight_label = new int[0];
param.weight = new double[0];
param.gamma = 1. / dim;
// Transform data:
svm_problem prob = new svm_problem();
prob.l = relation.size();
prob.x = new svm_node[prob.l][];
prob.y = new double[prob.l];
{
DBIDIter iter = ids.iter();
for (int i = 0; i < prob.l && iter.valid(); iter.advance(), i++) {
V vec = relation.get(iter);
// TODO: support compact sparse vectors, too!
svm_node[] x = new svm_node[dim];
for (int d = 0; d < dim; d++) {
x[d] = new svm_node();
x[d].index = d + 1;
x[d].value = vec.doubleValue(d);
}
prob.x[i] = x;
prob.y[i] = +1;
}
}
if (LOG.isVerbose()) {
LOG.verbose("Training one-class SVM...");
}
String err = svm.svm_check_parameter(prob, param);
if (err != null) {
LOG.warning("svm_check_parameter: " + err);
}
svm_model model = svm.svm_train(prob, param);
if (LOG.isVerbose()) {
LOG.verbose("Predicting...");
}
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_DB);
DoubleMinMax mm = new DoubleMinMax();
{
DBIDIter iter = ids.iter();
double[] buf = new double[svm.svm_get_nr_class(model)];
for (int i = 0; i < prob.l && iter.valid(); iter.advance(), i++) {
V vec = relation.get(iter);
svm_node[] x = new svm_node[dim];
for (int d = 0; d < dim; d++) {
x[d] = new svm_node();
x[d].index = d + 1;
x[d].value = vec.doubleValue(d);
}
svm.svm_predict_values(model, x, buf);
// / param.gamma; // Heuristic rescaling, sorry.
double score = -buf[0];
// Unfortunately, libsvm one-class currently yields a binary decision.
scores.putDouble(iter, score);
mm.put(score);
}
}
DoubleRelation scoreResult = new MaterializedDoubleRelation("One-Class SVM Decision", "svm-outlier", scores, ids);
OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(mm.getMin(), mm.getMax(), Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY, 0.);
return new OutlierResult(scoreMeta, scoreResult);
}
use of de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs in project elki by elki-project.
the class NaiveAgglomerativeHierarchicalClustering4 method run.
/**
* Run the algorithm
*
* @param db Database
* @param relation Relation
* @return Clustering hierarchy
*/
public PointerHierarchyRepresentationResult run(Database db, Relation<O> relation) {
DistanceQuery<O> dq = db.getDistanceQuery(relation, getDistanceFunction());
ArrayDBIDs ids = DBIDUtil.ensureArray(relation.getDBIDs());
final int size = ids.size();
if (size > 0x10000) {
throw new AbortException("This implementation does not scale to data sets larger than " + 0x10000 + " instances (~17 GB RAM), which results in an integer overflow.");
}
if (Linkage.SINGLE.equals(linkage)) {
LOG.verbose("Notice: SLINK is a much faster algorithm for single-linkage clustering!");
}
// Compute the initial (lower triangular) distance matrix.
double[] scratch = new double[triangleSize(size)];
DBIDArrayIter ix = ids.iter(), iy = ids.iter(), ij = ids.iter();
// Position counter - must agree with computeOffset!
int pos = 0;
boolean square = Linkage.WARD.equals(linkage) && !getDistanceFunction().isSquared();
for (int x = 0; ix.valid(); x++, ix.advance()) {
iy.seek(0);
for (int y = 0; y < x; y++, iy.advance()) {
scratch[pos] = dq.distance(ix, iy);
// Ward uses variances -- i.e. squared values
if (square) {
scratch[pos] *= scratch[pos];
}
pos++;
}
}
// Initialize space for result:
WritableDBIDDataStore parent = DataStoreUtil.makeDBIDStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC);
WritableDoubleDataStore height = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC);
WritableIntegerDataStore csize = DataStoreUtil.makeIntegerStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
for (DBIDIter it = ids.iter(); it.valid(); it.advance()) {
parent.put(it, it);
height.put(it, Double.POSITIVE_INFINITY);
csize.put(it, 1);
}
// Repeat until everything merged, except the desired number of clusters:
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Agglomerative clustering", size - 1, LOG) : null;
for (int i = 1; i < size; i++) {
double min = Double.POSITIVE_INFINITY;
int minx = -1, miny = -1;
for (ix.seek(0); ix.valid(); ix.advance()) {
if (height.doubleValue(ix) < Double.POSITIVE_INFINITY) {
continue;
}
final int xbase = triangleSize(ix.getOffset());
for (iy.seek(0); iy.getOffset() < ix.getOffset(); iy.advance()) {
if (height.doubleValue(iy) < Double.POSITIVE_INFINITY) {
continue;
}
final int idx = xbase + iy.getOffset();
if (scratch[idx] <= min) {
min = scratch[idx];
minx = ix.getOffset();
miny = iy.getOffset();
}
}
}
assert (minx >= 0 && miny >= 0);
// Avoid allocating memory, by reusing existing iterators:
ix.seek(minx);
iy.seek(miny);
// Perform merge in data structure: x -> y
// Since y < x, prefer keeping y, dropping x.
int sizex = csize.intValue(ix), sizey = csize.intValue(iy);
height.put(ix, min);
parent.put(ix, iy);
csize.put(iy, sizex + sizey);
// Update distance matrix. Note: miny < minx
final int xbase = triangleSize(minx), ybase = triangleSize(miny);
// Write to (y, j), with j < y
for (ij.seek(0); ij.getOffset() < miny; ij.advance()) {
if (height.doubleValue(ij) < Double.POSITIVE_INFINITY) {
continue;
}
final int sizej = csize.intValue(ij);
scratch[ybase + ij.getOffset()] = linkage.combine(sizex, scratch[xbase + ij.getOffset()], sizey, scratch[ybase + ij.getOffset()], sizej, min);
}
// Write to (j, y), with y < j < x
for (ij.seek(miny + 1); ij.getOffset() < minx; ij.advance()) {
if (height.doubleValue(ij) < Double.POSITIVE_INFINITY) {
continue;
}
final int jbase = triangleSize(ij.getOffset());
final int sizej = csize.intValue(ij);
scratch[jbase + miny] = linkage.combine(sizex, scratch[xbase + ij.getOffset()], sizey, scratch[jbase + miny], sizej, min);
}
// Write to (j, y), with y < x < j
for (ij.seek(minx + 1); ij.valid(); ij.advance()) {
if (height.doubleValue(ij) < Double.POSITIVE_INFINITY) {
continue;
}
final int jbase = triangleSize(ij.getOffset());
final int sizej = csize.intValue(ij);
scratch[jbase + miny] = linkage.combine(sizex, scratch[jbase + minx], sizey, scratch[jbase + miny], sizej, min);
}
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
return new PointerHierarchyRepresentationResult(ids, parent, height, dq.getDistanceFunction().isSquared());
}
use of de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs in project elki by elki-project.
the class HDBSCANLinearMemory method run.
/**
* Run the algorithm
*
* @param db Database
* @param relation Relation
* @return Clustering hierarchy
*/
public PointerDensityHierarchyRepresentationResult run(Database db, Relation<O> relation) {
final DistanceQuery<O> distQ = db.getDistanceQuery(relation, getDistanceFunction());
final KNNQuery<O> knnQ = db.getKNNQuery(distQ, minPts);
// We need array addressing later.
final ArrayDBIDs ids = DBIDUtil.ensureArray(relation.getDBIDs());
// 1. Compute the core distances
// minPts + 1: ignore query point.
final WritableDoubleDataStore coredists = computeCoreDists(ids, knnQ, minPts);
final int numedges = ids.size() - 1;
DoubleLongHeap heap = new DoubleLongMinHeap(numedges);
// 2. Build spanning tree.
FiniteProgress mprog = LOG.isVerbose() ? new FiniteProgress("Computing minimum spanning tree (n-1 edges)", numedges, LOG) : null;
//
PrimsMinimumSpanningTree.processDense(//
ids, //
new HDBSCANAdapter(ids, coredists, distQ), new HeapMSTCollector(heap, mprog, LOG));
LOG.ensureCompleted(mprog);
// Storage for pointer representation:
WritableDBIDDataStore pi = DataStoreUtil.makeDBIDStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC);
WritableDoubleDataStore lambda = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC, Double.POSITIVE_INFINITY);
convertToPointerRepresentation(ids, heap, pi, lambda);
return new PointerDensityHierarchyRepresentationResult(ids, pi, lambda, distQ.getDistanceFunction().isSquared(), coredists);
}
use of de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs in project elki by elki-project.
the class SLINK method run.
/**
* Performs the SLINK algorithm on the given database.
*
* @param database Database to process
* @param relation Data relation to use
*/
public PointerHierarchyRepresentationResult run(Database database, Relation<O> relation) {
DBIDs ids = relation.getDBIDs();
WritableDBIDDataStore pi = DataStoreUtil.makeDBIDStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC);
WritableDoubleDataStore lambda = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC, Double.POSITIVE_INFINITY);
// Temporary storage for m.
WritableDoubleDataStore m = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
// To allow CLINK logger override
final Logging log = getLogger();
FiniteProgress progress = log.isVerbose() ? new FiniteProgress("Running SLINK", ids.size(), log) : null;
ArrayDBIDs aids = DBIDUtil.ensureArray(ids);
// First element is trivial/special:
DBIDArrayIter id = aids.iter(), it = aids.iter();
// Step 1: initialize
for (; id.valid(); id.advance()) {
// P(n+1) = n+1:
pi.put(id, id);
// L(n+1) = infinity already.
}
// First element is finished already (start at seek(1) below!)
log.incrementProcessed(progress);
// Optimized branch
if (getDistanceFunction() instanceof PrimitiveDistanceFunction) {
PrimitiveDistanceFunction<? super O> distf = (PrimitiveDistanceFunction<? super O>) getDistanceFunction();
for (id.seek(1); id.valid(); id.advance()) {
step2primitive(id, it, id.getOffset(), relation, distf, m);
// SLINK or CLINK
process(id, aids, it, id.getOffset(), pi, lambda, m);
log.incrementProcessed(progress);
}
} else {
// Fallback branch
DistanceQuery<O> distQ = database.getDistanceQuery(relation, getDistanceFunction());
for (id.seek(1); id.valid(); id.advance()) {
step2(id, it, id.getOffset(), distQ, m);
// SLINK or CLINK
process(id, aids, it, id.getOffset(), pi, lambda, m);
log.incrementProcessed(progress);
}
}
log.ensureCompleted(progress);
// We don't need m anymore.
m.destroy();
m = null;
return new PointerHierarchyRepresentationResult(ids, pi, lambda, getDistanceFunction().isSquared());
}
use of de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs in project elki by elki-project.
the class OPTICSXi method extractClusters.
/**
* Extract clusters from a cluster order result.
*
* @param clusterOrderResult cluster order result
* @param relation Relation
* @param ixi Parameter 1 - Xi
* @param minpts Parameter minPts
*/
private Clustering<OPTICSModel> extractClusters(ClusterOrder clusterOrderResult, Relation<?> relation, double ixi, int minpts) {
ArrayDBIDs clusterOrder = clusterOrderResult.ids;
DoubleDataStore reach = clusterOrderResult.reachability;
DBIDArrayIter tmp = clusterOrder.iter();
DBIDVar tmp2 = DBIDUtil.newVar();
double mib = 0.0;
List<SteepArea> salist = keepsteep ? new ArrayList<SteepArea>() : null;
List<SteepDownArea> sdaset = new ArrayList<>();
final Clustering<OPTICSModel> clustering = new Clustering<>("OPTICS Xi-Clusters", "optics");
HashSet<Cluster<OPTICSModel>> curclusters = new HashSet<>();
HashSetModifiableDBIDs unclaimedids = DBIDUtil.newHashSet(relation.getDBIDs());
FiniteProgress scanprog = LOG.isVerbose() ? new FiniteProgress("OPTICS Xi cluster extraction", clusterOrder.size(), LOG) : null;
for (SteepScanPosition scan = new SteepScanPosition(clusterOrderResult); scan.hasNext(); ) {
if (scanprog != null) {
scanprog.setProcessed(scan.index, LOG);
}
// Update maximum-inbetween
mib = MathUtil.max(mib, scan.getReachability());
// The last point cannot be the start of a steep area.
if (!scan.next.valid()) {
break;
}
// Xi-steep down area
if (scan.steepDown(ixi)) {
// Update mib values with current mib and filter
updateFilterSDASet(mib, sdaset, ixi);
final double startval = scan.getReachability();
mib = 0.;
int startsteep = scan.index, endsteep = scan.index;
for (scan.next(); scan.hasNext(); scan.next()) {
// still steep - continue.
if (scan.steepDown(ixi)) {
endsteep = scan.index;
continue;
}
// Always stop looking after minpts "flat" steps.
if (!scan.steepDown(1.0) || scan.index - endsteep > minpts) {
break;
}
}
final SteepDownArea sda = new SteepDownArea(startsteep, endsteep, startval, 0);
if (LOG.isDebuggingFinest()) {
LOG.debugFinest("New steep down area: " + sda.toString());
}
sdaset.add(sda);
if (salist != null) {
salist.add(sda);
}
continue;
}
// Xi-steep up area
if (scan.steepUp(ixi)) {
// Update mib values with current mib and filter
updateFilterSDASet(mib, sdaset, ixi);
final SteepUpArea sua;
// Compute steep-up area
{
int startsteep = scan.index, endsteep = scan.index;
mib = scan.getReachability();
double esuccr = scan.getNextReachability();
// Find end of steep-up-area, eventually updating mib again
while (!Double.isInfinite(esuccr) && scan.hasNext()) {
scan.next();
// still steep - continue.
if (scan.steepUp(ixi)) {
endsteep = scan.index;
mib = scan.getReachability();
esuccr = scan.getNextReachability();
continue;
}
// Stop looking after minpts non-up steps.
if (!scan.steepUp(1.0) || scan.index - endsteep > minpts) {
break;
}
}
if (Double.isInfinite(esuccr)) {
scan.next();
}
sua = new SteepUpArea(startsteep, endsteep, esuccr);
if (LOG.isDebuggingFinest()) {
LOG.debugFinest("New steep up area: " + sua.toString());
}
if (salist != null) {
salist.add(sua);
}
}
// Validate and computer clusters
// LOG.debug("SDA size:"+sdaset.size()+" "+sdaset);
ListIterator<SteepDownArea> sdaiter = sdaset.listIterator(sdaset.size());
// Iterate backwards for correct hierarchy generation.
while (sdaiter.hasPrevious()) {
SteepDownArea sda = sdaiter.previous();
if (LOG.isDebuggingFinest()) {
LOG.debugFinest("Comparing: eU=" + mib + " SDA: " + sda.toString());
}
// Condition 3b: end-of-steep-up > maximum-in-between lower
if (mib * ixi < sda.getMib()) {
if (LOG.isDebuggingFinest()) {
LOG.debugFinest("mib * ixi = " + mib * ixi + " >= sda.getMib() = " + sda.getMib());
}
continue;
}
// By default, clusters cover both the steep up and steep down area
int cstart = sda.getStartIndex(), cend = MathUtil.min(sua.getEndIndex(), clusterOrder.size() - 1);
// However, we sometimes have to adjust this (Condition 4):
{
// Case b)
if (sda.getMaximum() * ixi >= sua.getMaximum()) {
while (//
cstart < cend && reach.doubleValue(tmp.seek(cstart + 1)) > sua.getMaximum()) {
cstart++;
}
} else // Case c)
if (sua.getMaximum() * ixi >= sda.getMaximum()) {
while (//
cend > cstart && reach.doubleValue(tmp.seek(cend - 1)) > sda.getMaximum()) {
cend--;
}
}
// Case a) is the default
}
// removes common artifacts from the Xi method
if (!nocorrect) {
simplify: while (cend > cstart) {
clusterOrderResult.predecessor.assignVar(tmp.seek(cend), tmp2);
for (int i = cstart; i < cend; i++) {
if (DBIDUtil.equal(tmp2, tmp.seek(i))) {
break simplify;
}
}
// Not found.
--cend;
}
}
// Condition 3a: obey minpts
if (cend - cstart + 1 < minpts) {
if (LOG.isDebuggingFinest()) {
LOG.debugFinest("MinPts not satisfied.");
}
continue;
}
// Build the cluster
ModifiableDBIDs dbids = DBIDUtil.newArray();
for (int idx = cstart; idx <= cend; idx++) {
tmp.seek(idx);
// Collect only unclaimed IDs.
if (unclaimedids.remove(tmp)) {
dbids.add(tmp);
}
}
if (LOG.isDebuggingFine()) {
LOG.debugFine("Found cluster with " + dbids.size() + " new objects, length " + (cend - cstart + 1));
}
OPTICSModel model = new OPTICSModel(cstart, cend);
Cluster<OPTICSModel> cluster = new Cluster<>("Cluster_" + cstart + "_" + cend, dbids, model);
// Build the hierarchy
{
Iterator<Cluster<OPTICSModel>> iter = curclusters.iterator();
while (iter.hasNext()) {
Cluster<OPTICSModel> clus = iter.next();
OPTICSModel omodel = clus.getModel();
if (model.getStartIndex() <= omodel.getStartIndex() && omodel.getEndIndex() <= model.getEndIndex()) {
clustering.addChildCluster(cluster, clus);
iter.remove();
}
}
}
curclusters.add(cluster);
}
continue;
}
// Flat - advance anyway.
scan.next();
}
if (scanprog != null) {
scanprog.setProcessed(clusterOrder.size(), LOG);
}
if (!unclaimedids.isEmpty()) {
boolean noise = reach.doubleValue(tmp.seek(clusterOrder.size() - 1)) >= Double.POSITIVE_INFINITY;
Cluster<OPTICSModel> allcluster = new Cluster<>(noise ? "Noise" : "Cluster", unclaimedids, noise, new OPTICSModel(0, clusterOrder.size() - 1));
for (Cluster<OPTICSModel> cluster : curclusters) {
clustering.addChildCluster(allcluster, cluster);
}
clustering.addToplevelCluster(allcluster);
} else {
for (Cluster<OPTICSModel> cluster : curclusters) {
clustering.addToplevelCluster(cluster);
}
}
clustering.addChildResult(clusterOrderResult);
if (salist != null) {
clusterOrderResult.addChildResult(new SteepAreaResult(salist));
}
return clustering;
}
Aggregations