use of de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException in project elki by elki-project.
the class RandomlyChosenInitialMeans method chooseInitialMeans.
@Override
public <T extends NumberVector> double[][] chooseInitialMeans(Database database, Relation<T> relation, int k, NumberVectorDistanceFunction<? super T> distanceFunction) {
DBIDs ids = DBIDUtil.randomSample(relation.getDBIDs(), k, rnd);
if (ids.size() < k) {
throw new AbortException("Could not choose k means.");
}
double[][] means = new double[k][];
DBIDIter iter = ids.iter();
for (int i = 0; i < k; i++, iter.advance()) {
means[i] = relation.get(iter).toArray();
}
return means;
}
use of de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException in project elki by elki-project.
the class ExternalClustering method run.
/**
* Run the algorithm.
*
* @param database Database to use
* @return Result
*/
@Override
public Clustering<? extends Model> run(Database database) {
Clustering<? extends Model> m = null;
try (//
InputStream in = FileUtil.tryGzipInput(new FileInputStream(file));
TokenizedReader reader = CSVReaderFormat.DEFAULT_FORMAT.makeReader()) {
Tokenizer tokenizer = reader.getTokenizer();
reader.reset(in);
IntArrayList assignment = new IntArrayList(database.getRelation(TypeUtil.DBID).size());
ArrayList<String> name = new ArrayList<>();
line: while (reader.nextLineExceptComments()) {
for (; /* initialized by nextLineExceptComments */
tokenizer.valid(); tokenizer.advance()) {
try {
assignment.add(tokenizer.getIntBase10());
} catch (NumberFormatException e) {
name.add(tokenizer.getSubstring());
}
}
if (LOG.isDebuggingFinest()) {
LOG.debugFinest("Read " + assignment.size() + " assignments and " + name.size() + " labels.");
}
for (Relation<?> r : database.getRelations()) {
if (r.size() == assignment.size()) {
attachToRelation(database, r, assignment, name);
assignment.clear();
name.clear();
continue line;
}
}
throw new AbortException("No relation found to match with clustering of size " + assignment.size());
}
} catch (IOException e) {
throw new AbortException("Could not load outlier scores: " + e.getMessage() + " when loading " + file, e);
}
return m;
}
use of de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException in project elki by elki-project.
the class ExternalClustering method attachToRelation.
/**
* Build a clustering from the file result.
*
* @param database Database
* @param r Result to attach to
* @param assignment Cluster assignment
* @param name Name
*/
private void attachToRelation(Database database, Relation<?> r, IntArrayList assignment, ArrayList<String> name) {
DBIDs ids = r.getDBIDs();
if (!(ids instanceof ArrayDBIDs)) {
throw new AbortException("External clusterings can only be used with static DBIDs.");
}
Int2IntOpenHashMap sizes = new Int2IntOpenHashMap();
for (IntListIterator it = assignment.iterator(); it.hasNext(); ) {
sizes.addTo(it.nextInt(), 1);
}
Int2ObjectOpenHashMap<ArrayModifiableDBIDs> cids = new Int2ObjectOpenHashMap<>(sizes.size());
for (ObjectIterator<Int2IntMap.Entry> it = sizes.int2IntEntrySet().fastIterator(); it.hasNext(); ) {
Int2IntMap.Entry entry = it.next();
cids.put(entry.getIntKey(), DBIDUtil.newArray(entry.getIntValue()));
}
{
DBIDArrayIter it = ((ArrayDBIDs) ids).iter();
for (int i = 0; i < assignment.size(); i++) {
cids.get(assignment.getInt(i)).add(it.seek(i));
}
}
String nam = FormatUtil.format(name, " ");
String snam = nam.toLowerCase().replace(' ', '-');
Clustering<ClusterModel> result = new Clustering<>(nam, snam);
for (ObjectIterator<Int2ObjectMap.Entry<ArrayModifiableDBIDs>> it = cids.int2ObjectEntrySet().fastIterator(); it.hasNext(); ) {
Int2ObjectMap.Entry<ArrayModifiableDBIDs> entry = it.next();
boolean noise = entry.getIntKey() < 0;
result.addToplevelCluster(new Cluster<>(entry.getValue(), noise, ClusterModel.CLUSTER));
}
database.getHierarchy().add(r, result);
}
use of de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException in project elki by elki-project.
the class EvaluateConcordantPairs method evaluateClustering.
/**
* Evaluate a single clustering.
*
* @param db Database
* @param rel Data relation
* @param c Clustering
* @return Gamma index
*/
public double evaluateClustering(Database db, Relation<? extends NumberVector> rel, Clustering<?> c) {
List<? extends Cluster<?>> clusters = c.getAllClusters();
int ignorednoise = 0, withinPairs = 0;
for (Cluster<?> cluster : clusters) {
if ((cluster.size() <= 1 || cluster.isNoise())) {
switch(noiseHandling) {
case IGNORE_NOISE:
ignorednoise += cluster.size();
continue;
case TREAT_NOISE_AS_SINGLETONS:
// No concordant distances.
continue;
case MERGE_NOISE:
// Treat like a cluster below.
break;
}
}
withinPairs += (cluster.size() * (cluster.size() - 1)) >>> 1;
if (withinPairs < 0) {
throw new AbortException("Integer overflow - clusters too large to compute pairwise distances.");
}
}
// Materialize within-cluster distances (sorted):
double[] withinDistances = computeWithinDistances(rel, clusters, withinPairs);
int[] withinTies = new int[withinDistances.length];
// Count ties within
countTies(withinDistances, withinTies);
long concordantPairs = 0, discordantPairs = 0, betweenPairs = 0;
// Step two, compute discordant distances:
for (int i = 0; i < clusters.size(); i++) {
Cluster<?> ocluster1 = clusters.get(i);
if (//
(ocluster1.size() <= 1 || ocluster1.isNoise()) && noiseHandling.equals(NoiseHandling.IGNORE_NOISE)) {
continue;
}
for (int j = i + 1; j < clusters.size(); j++) {
Cluster<?> ocluster2 = clusters.get(j);
if (//
(ocluster2.size() <= 1 || ocluster2.isNoise()) && noiseHandling.equals(NoiseHandling.IGNORE_NOISE)) {
continue;
}
betweenPairs += ocluster1.size() * ocluster2.size();
for (DBIDIter oit1 = ocluster1.getIDs().iter(); oit1.valid(); oit1.advance()) {
NumberVector obj = rel.get(oit1);
for (DBIDIter oit2 = ocluster2.getIDs().iter(); oit2.valid(); oit2.advance()) {
double dist = distanceFunction.distance(obj, rel.get(oit2));
int p = Arrays.binarySearch(withinDistances, dist);
if (p >= 0) {
// Tied distances:
while (p > 0 && withinDistances[p - 1] >= dist) {
--p;
}
concordantPairs += p;
discordantPairs += withinDistances.length - p - withinTies[p];
continue;
}
p = -p - 1;
concordantPairs += p;
discordantPairs += withinDistances.length - p;
}
}
}
}
// Total number of pairs possible:
final long t = ((rel.size() - ignorednoise) * (long) (rel.size() - ignorednoise - 1)) >>> 1;
final long tt = (t * (t - 1)) >>> 1;
double gamma = (concordantPairs - discordantPairs) / (double) (concordantPairs + discordantPairs);
double tau = computeTau(concordantPairs, discordantPairs, tt, withinDistances.length, betweenPairs);
// Avoid NaN when everything is in a single cluster:
gamma = gamma > 0. ? gamma : 0.;
tau = tau > 0. ? tau : 0.;
if (LOG.isStatistics()) {
LOG.statistics(new StringStatistic(key + ".pbm.noise-handling", noiseHandling.toString()));
if (ignorednoise > 0) {
LOG.statistics(new LongStatistic(key + ".pbm.ignored", ignorednoise));
}
LOG.statistics(new DoubleStatistic(key + ".gamma", gamma));
LOG.statistics(new DoubleStatistic(key + ".tau", tau));
}
EvaluationResult ev = EvaluationResult.findOrCreate(db.getHierarchy(), c, "Internal Clustering Evaluation", "internal evaluation");
MeasurementGroup g = ev.findOrCreateGroup("Concordance-based Evaluation");
g.addMeasure("Gamma", gamma, -1., 1., 0., false);
g.addMeasure("Tau", tau, -1., +1., 0., false);
db.getHierarchy().resultChanged(ev);
return gamma;
}
use of de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException in project elki by elki-project.
the class PrecomputedSimilarityMatrix method initialize.
@Override
public void initialize() {
DBIDs rids = relation.getDBIDs();
if (!(rids instanceof DBIDRange)) {
throw new AbortException("Similarity matrixes are currently only supported for DBID ranges (as used by static databases) for performance reasons (Patches welcome).");
}
ids = (DBIDRange) rids;
size = ids.size();
if (size > 65536) {
throw new AbortException("Similarity matrixes currently have a limit of 65536 objects (~16 GB). After this, the array size exceeds the Java integer range, and a different data structure needs to be used.");
}
similarityQuery = similarityFunction.instantiate(relation);
int msize = triangleSize(size);
matrix = new double[msize];
DBIDArrayIter ix = ids.iter(), iy = ids.iter();
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Precomputing similarity matrix", msize, LOG) : null;
int pos = 0;
for (ix.seek(0); ix.valid(); ix.advance()) {
// y < x -- must match {@link #getOffset}!
for (iy.seek(0); iy.getOffset() < ix.getOffset(); iy.advance()) {
matrix[pos] = similarityQuery.similarity(ix, iy);
pos++;
}
if (prog != null) {
prog.setProcessed(prog.getProcessed() + ix.getOffset(), LOG);
}
}
LOG.ensureCompleted(prog);
}
Aggregations