use of de.lmu.ifi.dbs.elki.logging.progress.StepProgress in project elki by elki-project.
the class ERiC method run.
/**
* Performs the ERiC algorithm on the given database.
*
* @param relation Relation to process
* @return Clustering result
*/
public Clustering<CorrelationModel> run(Database database, Relation<V> relation) {
final int dimensionality = RelationUtil.dimensionality(relation);
StepProgress stepprog = LOG.isVerbose() ? new StepProgress(3) : null;
// Run Generalized DBSCAN
LOG.beginStep(stepprog, 1, "Preprocessing local correlation dimensionalities and partitioning data");
// FIXME: how to ensure we are running on the same relation?
ERiCNeighborPredicate<V>.Instance npred = new ERiCNeighborPredicate<V>(settings).instantiate(database, relation);
CorePredicate.Instance<DBIDs> cpred = new MinPtsCorePredicate(settings.minpts).instantiate(database);
Clustering<Model> copacResult = new GeneralizedDBSCAN.Instance<>(npred, cpred, false).run();
// extract correlation clusters
LOG.beginStep(stepprog, 2, "Extract correlation clusters");
List<List<Cluster<CorrelationModel>>> clusterMap = extractCorrelationClusters(copacResult, relation, dimensionality, npred);
if (LOG.isDebugging()) {
StringBuilder msg = new StringBuilder("Step 2: Extract correlation clusters...");
for (int corrDim = 0; corrDim < clusterMap.size(); corrDim++) {
List<Cluster<CorrelationModel>> correlationClusters = clusterMap.get(corrDim);
msg.append("\n\ncorrDim ").append(corrDim);
for (Cluster<CorrelationModel> cluster : correlationClusters) {
msg.append("\n cluster ").append(cluster).append(", ids: ").append(cluster.getIDs().size());
// .append(", level: ").append(cluster.getLevel()).append(", index:
// ").append(cluster.getLevelIndex());
// msg.append("\n basis " +
// cluster.getPCA().getWeakEigenvectors().toString(" ", NF) +
// " ids " + cluster.getIDs().size());
}
}
LOG.debugFine(msg.toString());
}
if (LOG.isVerbose()) {
int clusters = 0;
for (List<Cluster<CorrelationModel>> correlationClusters : clusterMap) {
clusters += correlationClusters.size();
}
LOG.verbose(clusters + " clusters extracted.");
}
// build hierarchy
LOG.beginStep(stepprog, 3, "Building hierarchy");
Clustering<CorrelationModel> clustering = new Clustering<>("ERiC clustering", "eric-clustering");
buildHierarchy(clustering, clusterMap, npred);
if (LOG.isDebugging()) {
StringBuilder msg = new StringBuilder("Step 3: Build hierarchy");
for (int corrDim = 0; corrDim < clusterMap.size(); corrDim++) {
List<Cluster<CorrelationModel>> correlationClusters = clusterMap.get(corrDim);
for (Cluster<CorrelationModel> cluster : correlationClusters) {
msg.append("\n cluster ").append(cluster).append(", ids: ").append(cluster.getIDs().size());
// ").append(cluster.getLevelIndex());
for (It<Cluster<CorrelationModel>> iter = clustering.getClusterHierarchy().iterParents(cluster); iter.valid(); iter.advance()) {
msg.append("\n parent ").append(iter.get());
}
for (It<Cluster<CorrelationModel>> iter = clustering.getClusterHierarchy().iterChildren(cluster); iter.valid(); iter.advance()) {
msg.append("\n child ").append(iter.get());
}
}
}
LOG.debugFine(msg.toString());
}
LOG.setCompleted(stepprog);
for (Cluster<CorrelationModel> rc : clusterMap.get(clusterMap.size() - 1)) {
clustering.addToplevelCluster(rc);
}
return clustering;
}
use of de.lmu.ifi.dbs.elki.logging.progress.StepProgress in project elki by elki-project.
the class ClassicMultidimensionalScalingTransform method filter.
@Override
public MultipleObjectsBundle filter(MultipleObjectsBundle objects) {
final int size = objects.dataLength();
if (size == 0) {
return objects;
}
MultipleObjectsBundle bundle = new MultipleObjectsBundle();
for (int r = 0; r < objects.metaLength(); r++) {
@SuppressWarnings("unchecked") SimpleTypeInformation<Object> type = (SimpleTypeInformation<Object>) objects.meta(r);
@SuppressWarnings("unchecked") final List<Object> column = (List<Object>) objects.getColumn(r);
if (!dist.getInputTypeRestriction().isAssignableFromType(type)) {
bundle.appendColumn(type, column);
continue;
}
// Get the replacement type information
@SuppressWarnings("unchecked") final List<I> castColumn = (List<I>) column;
bundle.appendColumn(new VectorFieldTypeInformation<>(factory, tdim), castColumn);
StepProgress prog = LOG.isVerbose() ? new StepProgress("Classic MDS", 2) : null;
// Compute distance matrix.
LOG.beginStep(prog, 1, "Computing distance matrix");
double[][] mat = computeSquaredDistanceMatrix(castColumn, dist);
doubleCenterSymmetric(mat);
// Find eigenvectors.
{
LOG.beginStep(prog, 2, "Computing singular value decomposition");
SingularValueDecomposition svd = new SingularValueDecomposition(mat);
double[][] u = svd.getU();
double[] lambda = svd.getSingularValues();
// Undo squared, unless we were given a squared distance function:
if (!dist.isSquared()) {
for (int i = 0; i < tdim; i++) {
lambda[i] = FastMath.sqrt(Math.abs(lambda[i]));
}
}
double[] buf = new double[tdim];
for (int i = 0; i < size; i++) {
double[] row = u[i];
for (int x = 0; x < buf.length; x++) {
buf[x] = lambda[x] * row[x];
}
column.set(i, factory.newNumberVector(buf));
}
}
LOG.setCompleted(prog);
}
return bundle;
}
use of de.lmu.ifi.dbs.elki.logging.progress.StepProgress in project elki by elki-project.
the class P3C method run.
/**
* Performs the P3C algorithm on the given Database.
*/
public Clustering<SubspaceModel> run(Database database, Relation<V> relation) {
final int dim = RelationUtil.dimensionality(relation);
// Overall progress.
StepProgress stepProgress = LOG.isVerbose() ? new StepProgress(8) : null;
if (stepProgress != null) {
stepProgress.beginStep(1, "Grid-partitioning data.", LOG);
}
// Desired number of bins, as per Sturge:
final int binCount = (int) Math.ceil(1 + MathUtil.log2(relation.size()));
// Perform 1-dimensional projections, and split into bins.
SetDBIDs[][] partitions = partitionData(relation, binCount);
if (stepProgress != null) {
stepProgress.beginStep(2, "Searching for non-uniform bins in support histograms.", LOG);
}
// Set markers for each attribute until they're all deemed uniform.
final long[][] markers = new long[dim][];
for (int d = 0; d < dim; d++) {
final SetDBIDs[] parts = partitions[d];
if (parts == null) {
// Never mark any on constant dimensions.
continue;
}
final long[] marked = markers[d] = BitsUtil.zero(binCount);
int card = 0;
while (card < dim - 1) {
// Find bin with largest support, test only the dimensions that were not
// previously marked.
int bestBin = chiSquaredUniformTest(parts, marked, card);
if (bestBin < 0) {
// Uniform
break;
}
BitsUtil.setI(marked, bestBin);
card++;
}
if (LOG.isDebugging()) {
LOG.debug("Marked bins in dim " + d + ": " + BitsUtil.toString(marked, binCount));
}
}
if (stepProgress != null) {
stepProgress.beginStep(3, "Merging marked bins to 1-signatures.", LOG);
}
ArrayList<Signature> signatures = constructOneSignatures(partitions, markers);
if (stepProgress != null) {
stepProgress.beginStep(4, "Computing cluster cores from merged p-signatures.", LOG);
}
ArrayList<Signature> clusterCores = mergeClusterCores(binCount, signatures);
if (stepProgress != null) {
stepProgress.beginStep(5, "Pruning redundant cluster cores.", LOG);
}
clusterCores = pruneRedundantClusterCores(clusterCores);
if (LOG.isVerbose()) {
LOG.verbose("Number of cluster cores found: " + clusterCores.size());
}
if (clusterCores.isEmpty()) {
LOG.setCompleted(stepProgress);
Clustering<SubspaceModel> c = new Clustering<>("P3C", "P3C");
c.addToplevelCluster(new Cluster<SubspaceModel>(relation.getDBIDs(), true));
return c;
}
if (stepProgress != null) {
stepProgress.beginStep(5, "Refining cluster cores to clusters via EM.", LOG);
}
// Track objects not assigned to any cluster:
ModifiableDBIDs noise = DBIDUtil.newHashSet();
WritableDataStore<double[]> probClusterIGivenX = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_SORTED, double[].class);
int k = clusterCores.size();
List<MultivariateGaussianModel> models = new ArrayList<>(k);
computeFuzzyMembership(relation, clusterCores, noise, probClusterIGivenX, models, dim);
// Initial estimate of covariances, to assign noise objects
EM.recomputeCovarianceMatrices(relation, probClusterIGivenX, models, 0.);
assignUnassigned(relation, probClusterIGivenX, models, noise);
double emNew = EM.assignProbabilitiesToInstances(relation, models, probClusterIGivenX);
for (int it = 1; it <= maxEmIterations || maxEmIterations < 0; it++) {
final double emOld = emNew;
EM.recomputeCovarianceMatrices(relation, probClusterIGivenX, models, 0.);
// reassign probabilities
emNew = EM.assignProbabilitiesToInstances(relation, models, probClusterIGivenX);
if (LOG.isVerbose()) {
LOG.verbose("iteration " + it + " - expectation value: " + emNew);
}
if ((emNew - emOld) <= emDelta) {
break;
}
}
if (stepProgress != null) {
stepProgress.beginStep(6, "Generating hard clustering.", LOG);
}
// Create a hard clustering, making sure each data point only is part of one
// cluster, based on the best match from the membership matrix.
ArrayList<ClusterCandidate> clusterCandidates = hardClustering(probClusterIGivenX, clusterCores, relation.getDBIDs());
if (stepProgress != null) {
stepProgress.beginStep(7, "Looking for outliers and moving them to the noise set.", LOG);
}
// Outlier detection. Remove points from clusters that have a Mahalanobis
// distance larger than the critical value of the ChiSquare distribution.
findOutliers(relation, models, clusterCandidates, noise);
if (stepProgress != null) {
stepProgress.beginStep(8, "Removing empty clusters.", LOG);
}
// Remove near-empty clusters.
for (Iterator<ClusterCandidate> it = clusterCandidates.iterator(); it.hasNext(); ) {
ClusterCandidate cand = it.next();
final int size = cand.ids.size();
if (size < minClusterSize) {
if (size > 0) {
noise.addDBIDs(cand.ids);
}
it.remove();
}
}
if (LOG.isVerbose()) {
LOG.verbose("Number of clusters remaining: " + clusterCandidates.size());
}
if (stepProgress != null) {
stepProgress.beginStep(9, "Generating final result.", LOG);
}
// Generate final output.
Clustering<SubspaceModel> result = new Clustering<>("P3C", "P3C");
for (int cluster = 0; cluster < clusterCandidates.size(); ++cluster) {
ClusterCandidate candidate = clusterCandidates.get(cluster);
CovarianceMatrix cvm = CovarianceMatrix.make(relation, candidate.ids);
result.addToplevelCluster(new Cluster<>(candidate.ids, new SubspaceModel(new Subspace(candidate.dimensions), cvm.getMeanVector())));
}
LOG.verbose("Noise size: " + noise.size());
if (noise.size() > 0) {
result.addToplevelCluster(new Cluster<SubspaceModel>(noise, true));
}
LOG.ensureCompleted(stepProgress);
return result;
}
use of de.lmu.ifi.dbs.elki.logging.progress.StepProgress in project elki by elki-project.
the class CLIQUE method run.
/**
* Performs the CLIQUE algorithm on the given database.
*
* @param relation Data relation to process
* @return Clustering result
*/
public Clustering<SubspaceModel> run(Relation<V> relation) {
final int dimensionality = RelationUtil.dimensionality(relation);
StepProgress step = new StepProgress(2);
// 1. Identification of subspaces that contain clusters
step.beginStep(1, "Identification of subspaces that contain clusters", LOG);
ArrayList<List<CLIQUESubspace<V>>> dimensionToDenseSubspaces = new ArrayList<>(dimensionality);
List<CLIQUESubspace<V>> denseSubspaces = findOneDimensionalDenseSubspaces(relation);
dimensionToDenseSubspaces.add(denseSubspaces);
if (LOG.isVerbose()) {
LOG.verbose("1-dimensional dense subspaces: " + denseSubspaces.size());
}
if (LOG.isDebugging()) {
for (CLIQUESubspace<V> s : denseSubspaces) {
LOG.debug(s.toString(" "));
}
}
for (int k = 2; k <= dimensionality && !denseSubspaces.isEmpty(); k++) {
denseSubspaces = findDenseSubspaces(relation, denseSubspaces);
assert (dimensionToDenseSubspaces.size() == k - 1);
dimensionToDenseSubspaces.add(denseSubspaces);
if (LOG.isVerbose()) {
LOG.verbose(k + "-dimensional dense subspaces: " + denseSubspaces.size());
}
if (LOG.isDebugging()) {
for (CLIQUESubspace<V> s : denseSubspaces) {
LOG.debug(s.toString(" "));
}
}
}
// 2. Identification of clusters
step.beginStep(2, "Identification of clusters", LOG);
// build result
Clustering<SubspaceModel> result = new Clustering<>("CLIQUE clustering", "clique-clustering");
for (int dim = 0; dim < dimensionToDenseSubspaces.size(); dim++) {
List<CLIQUESubspace<V>> subspaces = dimensionToDenseSubspaces.get(dim);
List<Pair<Subspace, ModifiableDBIDs>> modelsAndClusters = determineClusters(subspaces);
if (LOG.isVerbose()) {
LOG.verbose((dim + 1) + "-dimensional clusters: " + modelsAndClusters.size());
}
for (Pair<Subspace, ModifiableDBIDs> modelAndCluster : modelsAndClusters) {
Cluster<SubspaceModel> newCluster = new Cluster<>(modelAndCluster.second);
newCluster.setModel(new SubspaceModel(modelAndCluster.first, Centroid.make(relation, modelAndCluster.second).getArrayRef()));
result.addToplevelCluster(newCluster);
}
}
return result;
}
use of de.lmu.ifi.dbs.elki.logging.progress.StepProgress in project elki by elki-project.
the class LogPanel method publish.
/**
* Publish a logging record.
*
* @param record Log record to publish
*/
protected void publish(final LogRecord record) {
if (record instanceof ProgressLogRecord) {
ProgressLogRecord preg = (ProgressLogRecord) record;
Progress prog = preg.getProgress();
JProgressBar pbar = getOrCreateProgressBar(prog);
updateProgressBar(prog, pbar);
if (prog.isComplete()) {
removeProgressBar(prog, pbar);
}
if (prog.isComplete() || prog instanceof StepProgress) {
publishTextRecord(record);
}
} else {
publishTextRecord(record);
}
}
Aggregations