use of de.lmu.ifi.dbs.elki.data.model.CorrelationModel in project elki by elki-project.
the class ERiC method extractCorrelationClusters.
/**
* Extracts the correlation clusters and noise from the copac result and
* returns a mapping of correlation dimension to maps of clusters within this
* correlation dimension. Each cluster is defined by the basis vectors
* defining the subspace in which the cluster appears.
*
* @param dbscanResult
*
* @param relation the database containing the objects
* @param dimensionality the dimensionality of the feature space
* @param npred ERiC predicate
* @return a list of clusters for each dimensionality
*/
private List<List<Cluster<CorrelationModel>>> extractCorrelationClusters(Clustering<Model> dbscanResult, Relation<V> relation, int dimensionality, ERiCNeighborPredicate<V>.Instance npred) {
// result
List<List<Cluster<CorrelationModel>>> clusterMap = new ArrayList<>();
for (int i = 0; i <= dimensionality; i++) {
clusterMap.add(new ArrayList<Cluster<CorrelationModel>>());
}
// noise cluster containing all noise objects over all partitions
Cluster<Model> noise = null;
// iterate over correlation dimensions
for (Cluster<Model> clus : dbscanResult.getAllClusters()) {
DBIDs group = clus.getIDs();
int dim = clus.isNoise() ? dimensionality : npred.dimensionality(clus.getIDs().iter());
if (dim < dimensionality) {
EigenPairFilter filter = new FirstNEigenPairFilter(dim);
// get cluster list for this dimension.
List<Cluster<CorrelationModel>> correlationClusters = clusterMap.get(dim);
SortedEigenPairs epairs = settings.pca.processIds(group, relation).getEigenPairs();
int numstrong = filter.filter(epairs.eigenValues());
PCAFilteredResult pcares = new PCAFilteredResult(epairs, numstrong, 1., 0.);
double[] centroid = Centroid.make(relation, group).getArrayRef();
Cluster<CorrelationModel> correlationCluster = new Cluster<>("[" + dim + "_" + correlationClusters.size() + "]", group, new CorrelationModel(pcares, centroid));
correlationClusters.add(correlationCluster);
} else // partition containing noise
{
if (noise == null) {
noise = clus;
} else {
ModifiableDBIDs merged = DBIDUtil.newHashSet(noise.getIDs());
merged.addDBIDs(clus.getIDs());
noise.setIDs(merged);
}
}
}
if (noise != null && noise.size() > 0) {
// get cluster list for this dimension.
List<Cluster<CorrelationModel>> correlationClusters = clusterMap.get(dimensionality);
EigenPairFilter filter = new FirstNEigenPairFilter(dimensionality);
SortedEigenPairs epairs = settings.pca.processIds(noise.getIDs(), relation).getEigenPairs();
int numstrong = filter.filter(epairs.eigenValues());
PCAFilteredResult pcares = new PCAFilteredResult(epairs, numstrong, 1., 0.);
double[] centroid = Centroid.make(relation, noise.getIDs()).getArrayRef();
Cluster<CorrelationModel> correlationCluster = new Cluster<>("[noise]", noise.getIDs(), new CorrelationModel(pcares, centroid));
correlationClusters.add(correlationCluster);
}
// Delete dimensionalities not found.
for (int i = dimensionality; i > 0; i--) {
if (!clusterMap.get(i).isEmpty()) {
break;
}
clusterMap.remove(i);
}
return clusterMap;
}
use of de.lmu.ifi.dbs.elki.data.model.CorrelationModel in project elki by elki-project.
the class ERiC method run.
/**
* Performs the ERiC algorithm on the given database.
*
* @param relation Relation to process
* @return Clustering result
*/
public Clustering<CorrelationModel> run(Database database, Relation<V> relation) {
final int dimensionality = RelationUtil.dimensionality(relation);
StepProgress stepprog = LOG.isVerbose() ? new StepProgress(3) : null;
// Run Generalized DBSCAN
LOG.beginStep(stepprog, 1, "Preprocessing local correlation dimensionalities and partitioning data");
// FIXME: how to ensure we are running on the same relation?
ERiCNeighborPredicate<V>.Instance npred = new ERiCNeighborPredicate<V>(settings).instantiate(database, relation);
CorePredicate.Instance<DBIDs> cpred = new MinPtsCorePredicate(settings.minpts).instantiate(database);
Clustering<Model> copacResult = new GeneralizedDBSCAN.Instance<>(npred, cpred, false).run();
// extract correlation clusters
LOG.beginStep(stepprog, 2, "Extract correlation clusters");
List<List<Cluster<CorrelationModel>>> clusterMap = extractCorrelationClusters(copacResult, relation, dimensionality, npred);
if (LOG.isDebugging()) {
StringBuilder msg = new StringBuilder("Step 2: Extract correlation clusters...");
for (int corrDim = 0; corrDim < clusterMap.size(); corrDim++) {
List<Cluster<CorrelationModel>> correlationClusters = clusterMap.get(corrDim);
msg.append("\n\ncorrDim ").append(corrDim);
for (Cluster<CorrelationModel> cluster : correlationClusters) {
msg.append("\n cluster ").append(cluster).append(", ids: ").append(cluster.getIDs().size());
// .append(", level: ").append(cluster.getLevel()).append(", index:
// ").append(cluster.getLevelIndex());
// msg.append("\n basis " +
// cluster.getPCA().getWeakEigenvectors().toString(" ", NF) +
// " ids " + cluster.getIDs().size());
}
}
LOG.debugFine(msg.toString());
}
if (LOG.isVerbose()) {
int clusters = 0;
for (List<Cluster<CorrelationModel>> correlationClusters : clusterMap) {
clusters += correlationClusters.size();
}
LOG.verbose(clusters + " clusters extracted.");
}
// build hierarchy
LOG.beginStep(stepprog, 3, "Building hierarchy");
Clustering<CorrelationModel> clustering = new Clustering<>("ERiC clustering", "eric-clustering");
buildHierarchy(clustering, clusterMap, npred);
if (LOG.isDebugging()) {
StringBuilder msg = new StringBuilder("Step 3: Build hierarchy");
for (int corrDim = 0; corrDim < clusterMap.size(); corrDim++) {
List<Cluster<CorrelationModel>> correlationClusters = clusterMap.get(corrDim);
for (Cluster<CorrelationModel> cluster : correlationClusters) {
msg.append("\n cluster ").append(cluster).append(", ids: ").append(cluster.getIDs().size());
// ").append(cluster.getLevelIndex());
for (It<Cluster<CorrelationModel>> iter = clustering.getClusterHierarchy().iterParents(cluster); iter.valid(); iter.advance()) {
msg.append("\n parent ").append(iter.get());
}
for (It<Cluster<CorrelationModel>> iter = clustering.getClusterHierarchy().iterChildren(cluster); iter.valid(); iter.advance()) {
msg.append("\n child ").append(iter.get());
}
}
}
LOG.debugFine(msg.toString());
}
LOG.setCompleted(stepprog);
for (Cluster<CorrelationModel> rc : clusterMap.get(clusterMap.size() - 1)) {
clustering.addToplevelCluster(rc);
}
return clustering;
}
use of de.lmu.ifi.dbs.elki.data.model.CorrelationModel in project elki by elki-project.
the class ERiCTest method testERiCResults.
/**
* Run ERiC with fixed parameters and compare the result to a golden standard.
*/
@Test
public void testERiCResults() {
Database db = makeSimpleDatabase(UNITTEST + "hierarchical-3d2d1d.csv", 600);
Clustering<CorrelationModel> result = //
new ELKIBuilder<ERiC<DoubleVector>>(ERiC.class).with(DBSCAN.Parameterizer.MINPTS_ID, //
30).with(ERiC.Parameterizer.DELTA_ID, //
0.20).with(ERiC.Parameterizer.TAU_ID, //
0.04).with(ERiC.Parameterizer.K_ID, //
50).with(PCARunner.Parameterizer.PCA_COVARIANCE_MATRIX, //
WeightedCovarianceMatrixBuilder.class).with(WeightedCovarianceMatrixBuilder.Parameterizer.WEIGHT_ID, //
ErfcWeight.class).with(EigenPairFilter.PCA_EIGENPAIR_FILTER, //
RelativeEigenPairFilter.class).with(RelativeEigenPairFilter.Parameterizer.EIGENPAIR_FILTER_RALPHA, //
1.60).build().run(db);
// Hierarchical pairs scored: 0.9204825
testFMeasure(db, result, 0.728074);
testClusterSizes(result, new int[] { 109, 188, 303 });
}
use of de.lmu.ifi.dbs.elki.data.model.CorrelationModel in project elki by elki-project.
the class ERiCTest method testERiCOverlap.
/**
* Run ERiC with fixed parameters and compare the result to a golden standard.
*/
@Test
public void testERiCOverlap() {
Database db = makeSimpleDatabase(UNITTEST + "correlation-overlap-3-5d.ascii", 650);
Clustering<CorrelationModel> result = //
new ELKIBuilder<ERiC<DoubleVector>>(ERiC.class).with(DBSCAN.Parameterizer.MINPTS_ID, //
15).with(ERiC.Parameterizer.DELTA_ID, //
1.0).with(ERiC.Parameterizer.TAU_ID, //
1.0).with(ERiC.Parameterizer.K_ID, //
45).with(PCARunner.Parameterizer.PCA_COVARIANCE_MATRIX, //
WeightedCovarianceMatrixBuilder.class).with(WeightedCovarianceMatrixBuilder.Parameterizer.WEIGHT_ID, //
ErfcWeight.class).with(EigenPairFilter.PCA_EIGENPAIR_FILTER, //
PercentageEigenPairFilter.class).with(PercentageEigenPairFilter.Parameterizer.ALPHA_ID, //
0.6).build().run(db);
testFMeasure(db, result, 0.831136946);
testClusterSizes(result, new int[] { 29, 189, 207, 225 });
}
Aggregations