use of de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs in project elki by elki-project.
the class GaussianAffinityMatrixBuilder method computeAffinityMatrix.
@Override
public <T extends O> AffinityMatrix computeAffinityMatrix(Relation<T> relation, double initialScale) {
DistanceQuery<T> dq = relation.getDistanceQuery(distanceFunction);
ArrayDBIDs ids = DBIDUtil.ensureArray(relation.getDBIDs());
// Compute desired affinities.
double[][] dist = buildDistanceMatrix(ids, dq);
return new DenseAffinityMatrix(computePij(dist, sigma, initialScale), ids);
}
use of de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs in project elki by elki-project.
the class AbstractBiclustering method defineBicluster.
/**
* Defines a Bicluster as given by the included rows and columns.
*
* @param rows the rows included in the Bicluster
* @param cols the columns included in the Bicluster
* @return A Bicluster as given by the included rows and columns
*/
protected Cluster<BiclusterModel> defineBicluster(long[] rows, long[] cols) {
ArrayDBIDs rowIDs = rowsBitsetToIDs(rows);
int[] colIDs = colsBitsetToIDs(cols);
return new Cluster<>(rowIDs, new BiclusterModel(colIDs));
}
use of de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs in project elki by elki-project.
the class AbstractBiclustering method defineBicluster.
/**
* Defines a Bicluster as given by the included rows and columns.
*
* @param rows the rows included in the Bicluster
* @param cols the columns included in the Bicluster
* @return a Bicluster as given by the included rows and columns
*/
protected Cluster<BiclusterModel> defineBicluster(BitSet rows, BitSet cols) {
ArrayDBIDs rowIDs = rowsBitsetToIDs(rows);
int[] colIDs = colsBitsetToIDs(cols);
return new Cluster<>(rowIDs, new BiclusterModel(colIDs));
}
use of de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs in project elki by elki-project.
the class AffinityPropagationClusteringAlgorithm method run.
/**
* Perform affinity propagation clustering.
*
* @param db Database
* @param relation Relation
* @return Clustering result
*/
public Clustering<MedoidModel> run(Database db, Relation<O> relation) {
ArrayDBIDs ids = DBIDUtil.ensureArray(relation.getDBIDs());
final int size = ids.size();
int[] assignment = new int[size];
double[][] s = initialization.getSimilarityMatrix(db, relation, ids);
double[][] r = new double[size][size];
double[][] a = new double[size][size];
IndefiniteProgress prog = LOG.isVerbose() ? new IndefiniteProgress("Affinity Propagation Iteration", LOG) : null;
MutableProgress aprog = LOG.isVerbose() ? new MutableProgress("Stable assignments", size + 1, LOG) : null;
int inactive = 0;
for (int iteration = 0; iteration < maxiter && inactive < convergence; iteration++) {
// Update responsibility matrix:
for (int i = 0; i < size; i++) {
double[] ai = a[i], ri = r[i], si = s[i];
// Find the two largest values (as initially maxk == i)
double max1 = Double.NEGATIVE_INFINITY, max2 = Double.NEGATIVE_INFINITY;
int maxk = -1;
for (int k = 0; k < size; k++) {
double val = ai[k] + si[k];
if (val > max1) {
max2 = max1;
max1 = val;
maxk = k;
} else if (val > max2) {
max2 = val;
}
}
// With the maximum value known, update r:
for (int k = 0; k < size; k++) {
double val = si[k] - ((k != maxk) ? max1 : max2);
ri[k] = ri[k] * lambda + val * (1. - lambda);
}
}
// Update availability matrix
for (int k = 0; k < size; k++) {
// Compute sum of max(0, r_ik) for all i.
// For r_kk, don't apply the max.
double colposum = 0.;
for (int i = 0; i < size; i++) {
if (i == k || r[i][k] > 0.) {
colposum += r[i][k];
}
}
for (int i = 0; i < size; i++) {
double val = colposum;
// Adjust column sum by the one extra term.
if (i == k || r[i][k] > 0.) {
val -= r[i][k];
}
if (i != k && val > 0.) {
// min
val = 0.;
}
a[i][k] = a[i][k] * lambda + val * (1 - lambda);
}
}
int changed = 0;
for (int i = 0; i < size; i++) {
double[] ai = a[i], ri = r[i];
double max = Double.NEGATIVE_INFINITY;
int maxj = -1;
for (int j = 0; j < size; j++) {
double v = ai[j] + ri[j];
if (v > max || (i == j && v >= max)) {
max = v;
maxj = j;
}
}
if (assignment[i] != maxj) {
changed += 1;
assignment[i] = maxj;
}
}
inactive = (changed > 0) ? 0 : (inactive + 1);
LOG.incrementProcessed(prog);
if (aprog != null) {
aprog.setProcessed(size - changed, LOG);
}
}
if (aprog != null) {
aprog.setProcessed(aprog.getTotal(), LOG);
}
LOG.setCompleted(prog);
// Cluster map, by lead object
Int2ObjectOpenHashMap<ModifiableDBIDs> map = new Int2ObjectOpenHashMap<>();
DBIDArrayIter i1 = ids.iter();
for (int i = 0; i1.valid(); i1.advance(), i++) {
int c = assignment[i];
// Add to cluster members:
ModifiableDBIDs cids = map.get(c);
if (cids == null) {
cids = DBIDUtil.newArray();
map.put(c, cids);
}
cids.add(i1);
}
// If we stopped early, the cluster lead might be in a different cluster.
for (ObjectIterator<Int2ObjectOpenHashMap.Entry<ModifiableDBIDs>> iter = map.int2ObjectEntrySet().fastIterator(); iter.hasNext(); ) {
Int2ObjectOpenHashMap.Entry<ModifiableDBIDs> entry = iter.next();
final int key = entry.getIntKey();
int targetkey = key;
ModifiableDBIDs tids = null;
// Chase arrows:
while (ids == null && assignment[targetkey] != targetkey) {
targetkey = assignment[targetkey];
tids = map.get(targetkey);
}
if (tids != null && targetkey != key) {
tids.addDBIDs(entry.getValue());
iter.remove();
}
}
Clustering<MedoidModel> clustering = new Clustering<>("Affinity Propagation Clustering", "ap-clustering");
ModifiableDBIDs noise = DBIDUtil.newArray();
for (ObjectIterator<Int2ObjectOpenHashMap.Entry<ModifiableDBIDs>> iter = map.int2ObjectEntrySet().fastIterator(); iter.hasNext(); ) {
Int2ObjectOpenHashMap.Entry<ModifiableDBIDs> entry = iter.next();
i1.seek(entry.getIntKey());
if (entry.getValue().size() > 1) {
MedoidModel mod = new MedoidModel(DBIDUtil.deref(i1));
clustering.addToplevelCluster(new Cluster<>(entry.getValue(), mod));
} else {
noise.add(i1);
}
}
if (noise.size() > 0) {
MedoidModel mod = new MedoidModel(DBIDUtil.deref(noise.iter()));
clustering.addToplevelCluster(new Cluster<>(noise, true, mod));
}
return clustering;
}
use of de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs in project elki by elki-project.
the class PROCLUS method run.
/**
* Performs the PROCLUS algorithm on the given database.
*
* @param database Database to process
* @param relation Relation to process
*/
public Clustering<SubspaceModel> run(Database database, Relation<V> relation) {
if (RelationUtil.dimensionality(relation) < l) {
throw new IllegalStateException("Dimensionality of data < parameter l! (" + RelationUtil.dimensionality(relation) + " < " + l + ")");
}
DistanceQuery<V> distFunc = database.getDistanceQuery(relation, SquaredEuclideanDistanceFunction.STATIC);
RangeQuery<V> rangeQuery = database.getRangeQuery(distFunc);
final Random random = rnd.getSingleThreadedRandom();
// initialization phase
if (LOG.isVerbose()) {
LOG.verbose("1. Initialization phase...");
}
int sampleSize = Math.min(relation.size(), k_i * k);
DBIDs sampleSet = DBIDUtil.randomSample(relation.getDBIDs(), sampleSize, random);
int medoidSize = Math.min(relation.size(), m_i * k);
ArrayDBIDs medoids = greedy(distFunc, sampleSet, medoidSize, random);
if (LOG.isDebugging()) {
LOG.debugFine(//
new StringBuilder().append("sampleSize ").append(sampleSize).append('\n').append("sampleSet ").append(sampleSet).append(//
'\n').append("medoidSize ").append(medoidSize).append(//
'\n').append("m ").append(medoids).toString());
}
// iterative phase
if (LOG.isVerbose()) {
LOG.verbose("2. Iterative phase...");
}
double bestObjective = Double.POSITIVE_INFINITY;
ArrayDBIDs m_best = null;
DBIDs m_bad = null;
ArrayDBIDs m_current = initialSet(medoids, k, random);
if (LOG.isDebugging()) {
LOG.debugFine(new StringBuilder().append("m_c ").append(m_current).toString());
}
IndefiniteProgress cprogress = LOG.isVerbose() ? new IndefiniteProgress("Current number of clusters:", LOG) : null;
ArrayList<PROCLUSCluster> clusters = null;
int loops = 0;
while (loops < 10) {
long[][] dimensions = findDimensions(m_current, relation, distFunc, rangeQuery);
clusters = assignPoints(m_current, dimensions, relation);
double objectiveFunction = evaluateClusters(clusters, dimensions, relation);
if (objectiveFunction < bestObjective) {
// restart counting loops
loops = 0;
bestObjective = objectiveFunction;
m_best = m_current;
m_bad = computeBadMedoids(m_current, clusters, (int) (relation.size() * 0.1 / k));
}
m_current = computeM_current(medoids, m_best, m_bad, random);
loops++;
if (cprogress != null) {
cprogress.setProcessed(clusters.size(), LOG);
}
}
LOG.setCompleted(cprogress);
// refinement phase
if (LOG.isVerbose()) {
LOG.verbose("3. Refinement phase...");
}
List<Pair<double[], long[]>> dimensions = findDimensions(clusters, relation);
List<PROCLUSCluster> finalClusters = finalAssignment(dimensions, relation);
// build result
int numClusters = 1;
Clustering<SubspaceModel> result = new Clustering<>("ProClus clustering", "proclus-clustering");
for (PROCLUSCluster c : finalClusters) {
Cluster<SubspaceModel> cluster = new Cluster<>(c.objectIDs);
cluster.setModel(new SubspaceModel(new Subspace(c.getDimensions()), c.centroid));
cluster.setName("cluster_" + numClusters++);
result.addToplevelCluster(cluster);
}
return result;
}
Aggregations