use of de.lmu.ifi.dbs.elki.utilities.pairs.Pair in project elki by elki-project.
the class DocumentReferences method main.
/**
* @param args Command line arguments
*/
public static void main(String[] args) {
if (args.length < 1 || args.length > 2) {
LoggingUtil.warning("I need exactly one or two file names to operate!");
System.exit(1);
}
if (!args[0].endsWith(".html") || (args.length > 1 && !args[1].endsWith(".trac"))) {
LoggingUtil.warning("File name doesn't end in expected extension!");
System.exit(1);
}
List<Pair<Reference, TreeSet<Object>>> refs = sortedReferences();
File references = new File(args[0]);
try (//
FileOutputStream reffo = new FileOutputStream(references);
OutputStream refstream = new BufferedOutputStream(reffo)) {
Document refdoc = documentReferences(refs);
HTMLUtil.writeXHTML(refdoc, refstream);
} catch (IOException e) {
LoggingUtil.exception("IO Exception writing HTML output.", e);
System.exit(1);
}
if (args.length > 1) {
File refwiki = new File(args[1]);
try (//
FileOutputStream reffow = new FileOutputStream(refwiki);
PrintStream refstreamW = new PrintStream(reffow, false, "UTF-8")) {
documentReferencesWiki(refs, refstreamW);
} catch (IOException e) {
LoggingUtil.exception("IO Exception writing Wiki output.", e);
System.exit(1);
}
}
}
use of de.lmu.ifi.dbs.elki.utilities.pairs.Pair in project elki by elki-project.
the class CTLuGLSBackwardSearchAlgorithm method run.
/**
* Run the algorithm
*
* @param database Database to process
* @param relationx Spatial relation
* @param relationy Attribute relation
* @return Algorithm result
*/
public OutlierResult run(Database database, Relation<V> relationx, Relation<? extends NumberVector> relationy) {
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relationx.getDBIDs(), DataStoreFactory.HINT_STATIC);
DoubleMinMax mm = new DoubleMinMax(0.0, 0.0);
// Outlier detection loop
{
ModifiableDBIDs idview = DBIDUtil.newHashSet(relationx.getDBIDs());
ProxyView<V> proxy = new ProxyView<>(idview, relationx);
double phialpha = NormalDistribution.standardNormalQuantile(1.0 - alpha * .5);
// Detect outliers while significant.
while (true) {
Pair<DBIDVar, Double> candidate = singleIteration(proxy, relationy);
if (candidate.second < phialpha) {
break;
}
scores.putDouble(candidate.first, candidate.second);
if (!Double.isNaN(candidate.second)) {
mm.put(candidate.second);
}
idview.remove(candidate.first);
}
// Remaining objects are inliers
for (DBIDIter iter = idview.iter(); iter.valid(); iter.advance()) {
scores.putDouble(iter, 0.0);
}
}
DoubleRelation scoreResult = new MaterializedDoubleRelation("GLSSODBackward", "GLSSODbackward-outlier", scores, relationx.getDBIDs());
OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(mm.getMin(), mm.getMax(), 0, Double.POSITIVE_INFINITY, 0);
return new OutlierResult(scoreMeta, scoreResult);
}
use of de.lmu.ifi.dbs.elki.utilities.pairs.Pair in project elki by elki-project.
the class FeatureBagging method run.
/**
* Run the algorithm on a data set.
*
* @param database Database context
* @param relation Relation to use
* @return Outlier detection result
*/
public OutlierResult run(Database database, Relation<NumberVector> relation) {
final int dbdim = RelationUtil.dimensionality(relation);
final int mindim = dbdim >> 1;
final int maxdim = dbdim - 1;
final Random rand = rnd.getSingleThreadedRandom();
ArrayList<OutlierResult> results = new ArrayList<>(num);
{
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("LOF iterations", num, LOG) : null;
for (int i = 0; i < num; i++) {
long[] dimset = randomSubspace(dbdim, mindim, maxdim, rand);
SubspaceEuclideanDistanceFunction df = new SubspaceEuclideanDistanceFunction(dimset);
LOF<NumberVector> lof = new LOF<>(k, df);
// run LOF and collect the result
OutlierResult result = lof.run(database, relation);
results.add(result);
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
}
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
DoubleMinMax minmax = new DoubleMinMax();
if (breadth) {
FiniteProgress cprog = LOG.isVerbose() ? new FiniteProgress("Combining results", relation.size(), LOG) : null;
@SuppressWarnings("unchecked") Pair<DBIDIter, DoubleRelation>[] IDVectorOntoScoreVector = (Pair<DBIDIter, DoubleRelation>[]) new Pair[results.size()];
// Mapping score-sorted DBID-Iterators onto their corresponding scores.
// We need to initialize them now be able to iterate them "in parallel".
{
int i = 0;
for (OutlierResult r : results) {
IDVectorOntoScoreVector[i] = new Pair<DBIDIter, DoubleRelation>(r.getOrdering().order(relation.getDBIDs()).iter(), r.getScores());
i++;
}
}
// Iterating over the *lines* of the AS_t(i)-matrix.
for (int i = 0; i < relation.size(); i++) {
// Iterating over the elements of a line (breadth-first).
for (Pair<DBIDIter, DoubleRelation> pair : IDVectorOntoScoreVector) {
DBIDIter iter = pair.first;
// for every DBID).
if (iter.valid()) {
double score = pair.second.doubleValue(iter);
if (Double.isNaN(scores.doubleValue(iter))) {
scores.putDouble(iter, score);
minmax.put(score);
}
iter.advance();
} else {
LOG.warning("Incomplete result: Iterator does not contain |DB| DBIDs");
}
}
// Progress does not take the initial mapping into account.
LOG.incrementProcessed(cprog);
}
LOG.ensureCompleted(cprog);
} else {
FiniteProgress cprog = LOG.isVerbose() ? new FiniteProgress("Combining results", relation.size(), LOG) : null;
for (DBIDIter iter = relation.iterDBIDs(); iter.valid(); iter.advance()) {
double sum = 0.0;
for (OutlierResult r : results) {
final double s = r.getScores().doubleValue(iter);
if (!Double.isNaN(s)) {
sum += s;
}
}
scores.putDouble(iter, sum);
minmax.put(sum);
LOG.incrementProcessed(cprog);
}
LOG.ensureCompleted(cprog);
}
OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax());
DoubleRelation scoreres = new MaterializedDoubleRelation("Feature bagging", "fb-outlier", scores, relation.getDBIDs());
return new OutlierResult(meta, scoreres);
}
use of de.lmu.ifi.dbs.elki.utilities.pairs.Pair in project elki by elki-project.
the class NaiveMeanShiftClustering method run.
/**
* Run the mean-shift clustering algorithm.
*
* @param database Database
* @param relation Data relation
* @return Clustering result
*/
public Clustering<MeanModel> run(Database database, Relation<V> relation) {
final DistanceQuery<V> distq = database.getDistanceQuery(relation, getDistanceFunction());
final RangeQuery<V> rangeq = database.getRangeQuery(distq);
final NumberVector.Factory<V> factory = RelationUtil.getNumberVectorFactory(relation);
final int dim = RelationUtil.dimensionality(relation);
// Stopping threshold
final double threshold = bandwidth * 1E-10;
// Result store:
ArrayList<Pair<V, ModifiableDBIDs>> clusters = new ArrayList<>();
ModifiableDBIDs noise = DBIDUtil.newArray();
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Mean-shift clustering", relation.size(), LOG) : null;
for (DBIDIter iter = relation.iterDBIDs(); iter.valid(); iter.advance()) {
// Initial position:
V position = relation.get(iter);
iterations: for (int j = 1; j <= MAXITER; j++) {
// Compute new position:
V newvec = null;
{
DoubleDBIDList neigh = rangeq.getRangeForObject(position, bandwidth);
boolean okay = (neigh.size() > 1) || (neigh.size() >= 1 && j > 1);
if (okay) {
Centroid newpos = new Centroid(dim);
for (DoubleDBIDListIter niter = neigh.iter(); niter.valid(); niter.advance()) {
final double weight = kernel.density(niter.doubleValue() / bandwidth);
newpos.put(relation.get(niter), weight);
}
newvec = factory.newNumberVector(newpos.getArrayRef());
// TODO: detect 0 weight!
}
if (!okay) {
noise.add(iter);
break iterations;
}
}
// Test if we are close to one of the known clusters:
double bestd = Double.POSITIVE_INFINITY;
Pair<V, ModifiableDBIDs> bestp = null;
for (Pair<V, ModifiableDBIDs> pair : clusters) {
final double merged = distq.distance(newvec, pair.first);
if (merged < bestd) {
bestd = merged;
bestp = pair;
}
}
// Check for convergence:
double delta = distq.distance(position, newvec);
if (bestd < 10 * threshold || bestd * 2 < delta) {
bestp.second.add(iter);
break iterations;
}
if (j == MAXITER) {
LOG.warning("No convergence after " + MAXITER + " iterations. Distance: " + delta);
}
if (Double.isNaN(delta)) {
LOG.warning("Encountered NaN distance. Invalid center vector? " + newvec.toString());
break iterations;
}
if (j == MAXITER || delta < threshold) {
if (LOG.isDebuggingFine()) {
LOG.debugFine("New cluster:" + newvec + " delta: " + delta + " threshold: " + threshold + " bestd: " + bestd);
}
ArrayModifiableDBIDs cids = DBIDUtil.newArray();
cids.add(iter);
clusters.add(new Pair<V, ModifiableDBIDs>(newvec, cids));
break iterations;
}
position = newvec;
}
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
ArrayList<Cluster<MeanModel>> cs = new ArrayList<>(clusters.size());
for (Pair<V, ModifiableDBIDs> pair : clusters) {
cs.add(new Cluster<>(pair.second, new MeanModel(pair.first.toArray())));
}
if (noise.size() > 0) {
cs.add(new Cluster<MeanModel>(noise, true));
}
Clustering<MeanModel> c = new Clustering<>("Mean-shift Clustering", "mean-shift-clustering", cs);
return c;
}
use of de.lmu.ifi.dbs.elki.utilities.pairs.Pair in project elki by elki-project.
the class PROCLUS method findDimensions.
/**
* Refinement step that determines the set of correlated dimensions for each
* cluster centroid.
*
* @param clusters the list of clusters
* @param database the database containing the objects
* @return the set of correlated dimensions for each specified cluster
* centroid
*/
private List<Pair<double[], long[]>> findDimensions(ArrayList<PROCLUSCluster> clusters, Relation<V> database) {
// compute x_ij = avg distance from points in c_i to c_i.centroid
final int dim = RelationUtil.dimensionality(database);
final int numc = clusters.size();
double[][] averageDistances = new double[numc][];
for (int i = 0; i < numc; i++) {
PROCLUSCluster c_i = clusters.get(i);
double[] x_i = new double[dim];
for (DBIDIter iter = c_i.objectIDs.iter(); iter.valid(); iter.advance()) {
V o = database.get(iter);
for (int d = 0; d < dim; d++) {
x_i[d] += Math.abs(c_i.centroid[d] - o.doubleValue(d));
}
}
for (int d = 0; d < dim; d++) {
x_i[d] /= c_i.objectIDs.size();
}
averageDistances[i] = x_i;
}
List<DoubleIntInt> z_ijs = computeZijs(averageDistances, dim);
long[][] dimensionMap = computeDimensionMap(z_ijs, dim, numc);
// mapping cluster -> dimensions
List<Pair<double[], long[]>> result = new ArrayList<>(numc);
for (int i = 0; i < numc; i++) {
long[] dims_i = dimensionMap[i];
if (dims_i == null) {
continue;
}
result.add(new Pair<>(clusters.get(i).centroid, dims_i));
}
return result;
}
Aggregations