use of de.lmu.ifi.dbs.elki.database.ids.DBIDs in project elki by elki-project.
the class BarnesHutTSNE method run.
public Relation<DoubleVector> run(Database database, Relation<O> relation) {
AffinityMatrix neighbors = affinity.computeAffinityMatrix(relation, EARLY_EXAGGERATION);
double[][] solution = randomInitialSolution(neighbors.size(), dim, random.getSingleThreadedRandom());
projectedDistances.setLong(0L);
optimizetSNE(neighbors, solution);
LOG.statistics(projectedDistances);
// Remove the original (unprojected) data unless configured otherwise.
removePreviousRelation(relation);
DBIDs ids = relation.getDBIDs();
WritableDataStore<DoubleVector> proj = DataStoreFactory.FACTORY.makeStorage(ids, DataStoreFactory.HINT_DB | DataStoreFactory.HINT_SORTED, DoubleVector.class);
VectorFieldTypeInformation<DoubleVector> otype = new VectorFieldTypeInformation<>(DoubleVector.FACTORY, dim);
for (DBIDArrayIter it = neighbors.iterDBIDs(); it.valid(); it.advance()) {
proj.put(it, DoubleVector.wrap(solution[it.getOffset()]));
}
return new MaterializedRelation<>("tSNE", "t-SNE", otype, proj, ids);
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDs in project elki by elki-project.
the class DependencyDerivator method run.
/**
* Computes quantitatively linear dependencies among the attributes of the
* given database based on a linear correlation PCA.
*
* @param database the database to run this DependencyDerivator on
* @param relation the relation to use
* @return the CorrelationAnalysisSolution computed by this
* DependencyDerivator
*/
public CorrelationAnalysisSolution<V> run(Database database, Relation<V> relation) {
if (LOG.isVerbose()) {
LOG.verbose("retrieving database objects...");
}
Centroid centroid = Centroid.make(relation, relation.getDBIDs());
NumberVector.Factory<V> factory = RelationUtil.getNumberVectorFactory(relation);
V centroidDV = factory.newNumberVector(centroid.getArrayRef());
DBIDs ids;
if (this.sampleSize > 0) {
if (randomsample) {
ids = DBIDUtil.randomSample(relation.getDBIDs(), this.sampleSize, RandomFactory.DEFAULT);
} else {
DistanceQuery<V> distanceQuery = database.getDistanceQuery(relation, getDistanceFunction());
KNNList queryResults = //
database.getKNNQuery(distanceQuery, this.sampleSize).getKNNForObject(centroidDV, this.sampleSize);
ids = DBIDUtil.newHashSet(queryResults);
}
} else {
ids = relation.getDBIDs();
}
return generateModel(relation, ids, centroid.getArrayRef());
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDs in project elki by elki-project.
the class ERiC method extractCorrelationClusters.
/**
* Extracts the correlation clusters and noise from the copac result and
* returns a mapping of correlation dimension to maps of clusters within this
* correlation dimension. Each cluster is defined by the basis vectors
* defining the subspace in which the cluster appears.
*
* @param dbscanResult
*
* @param relation the database containing the objects
* @param dimensionality the dimensionality of the feature space
* @param npred ERiC predicate
* @return a list of clusters for each dimensionality
*/
private List<List<Cluster<CorrelationModel>>> extractCorrelationClusters(Clustering<Model> dbscanResult, Relation<V> relation, int dimensionality, ERiCNeighborPredicate<V>.Instance npred) {
// result
List<List<Cluster<CorrelationModel>>> clusterMap = new ArrayList<>();
for (int i = 0; i <= dimensionality; i++) {
clusterMap.add(new ArrayList<Cluster<CorrelationModel>>());
}
// noise cluster containing all noise objects over all partitions
Cluster<Model> noise = null;
// iterate over correlation dimensions
for (Cluster<Model> clus : dbscanResult.getAllClusters()) {
DBIDs group = clus.getIDs();
int dim = clus.isNoise() ? dimensionality : npred.dimensionality(clus.getIDs().iter());
if (dim < dimensionality) {
EigenPairFilter filter = new FirstNEigenPairFilter(dim);
// get cluster list for this dimension.
List<Cluster<CorrelationModel>> correlationClusters = clusterMap.get(dim);
SortedEigenPairs epairs = settings.pca.processIds(group, relation).getEigenPairs();
int numstrong = filter.filter(epairs.eigenValues());
PCAFilteredResult pcares = new PCAFilteredResult(epairs, numstrong, 1., 0.);
double[] centroid = Centroid.make(relation, group).getArrayRef();
Cluster<CorrelationModel> correlationCluster = new Cluster<>("[" + dim + "_" + correlationClusters.size() + "]", group, new CorrelationModel(pcares, centroid));
correlationClusters.add(correlationCluster);
} else // partition containing noise
{
if (noise == null) {
noise = clus;
} else {
ModifiableDBIDs merged = DBIDUtil.newHashSet(noise.getIDs());
merged.addDBIDs(clus.getIDs());
noise.setIDs(merged);
}
}
}
if (noise != null && noise.size() > 0) {
// get cluster list for this dimension.
List<Cluster<CorrelationModel>> correlationClusters = clusterMap.get(dimensionality);
EigenPairFilter filter = new FirstNEigenPairFilter(dimensionality);
SortedEigenPairs epairs = settings.pca.processIds(noise.getIDs(), relation).getEigenPairs();
int numstrong = filter.filter(epairs.eigenValues());
PCAFilteredResult pcares = new PCAFilteredResult(epairs, numstrong, 1., 0.);
double[] centroid = Centroid.make(relation, noise.getIDs()).getArrayRef();
Cluster<CorrelationModel> correlationCluster = new Cluster<>("[noise]", noise.getIDs(), new CorrelationModel(pcares, centroid));
correlationClusters.add(correlationCluster);
}
// Delete dimensionalities not found.
for (int i = dimensionality; i > 0; i--) {
if (!clusterMap.get(i).isEmpty()) {
break;
}
clusterMap.remove(i);
}
return clusterMap;
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDs in project elki by elki-project.
the class ERiC method run.
/**
* Performs the ERiC algorithm on the given database.
*
* @param relation Relation to process
* @return Clustering result
*/
public Clustering<CorrelationModel> run(Database database, Relation<V> relation) {
final int dimensionality = RelationUtil.dimensionality(relation);
StepProgress stepprog = LOG.isVerbose() ? new StepProgress(3) : null;
// Run Generalized DBSCAN
LOG.beginStep(stepprog, 1, "Preprocessing local correlation dimensionalities and partitioning data");
// FIXME: how to ensure we are running on the same relation?
ERiCNeighborPredicate<V>.Instance npred = new ERiCNeighborPredicate<V>(settings).instantiate(database, relation);
CorePredicate.Instance<DBIDs> cpred = new MinPtsCorePredicate(settings.minpts).instantiate(database);
Clustering<Model> copacResult = new GeneralizedDBSCAN.Instance<>(npred, cpred, false).run();
// extract correlation clusters
LOG.beginStep(stepprog, 2, "Extract correlation clusters");
List<List<Cluster<CorrelationModel>>> clusterMap = extractCorrelationClusters(copacResult, relation, dimensionality, npred);
if (LOG.isDebugging()) {
StringBuilder msg = new StringBuilder("Step 2: Extract correlation clusters...");
for (int corrDim = 0; corrDim < clusterMap.size(); corrDim++) {
List<Cluster<CorrelationModel>> correlationClusters = clusterMap.get(corrDim);
msg.append("\n\ncorrDim ").append(corrDim);
for (Cluster<CorrelationModel> cluster : correlationClusters) {
msg.append("\n cluster ").append(cluster).append(", ids: ").append(cluster.getIDs().size());
// .append(", level: ").append(cluster.getLevel()).append(", index:
// ").append(cluster.getLevelIndex());
// msg.append("\n basis " +
// cluster.getPCA().getWeakEigenvectors().toString(" ", NF) +
// " ids " + cluster.getIDs().size());
}
}
LOG.debugFine(msg.toString());
}
if (LOG.isVerbose()) {
int clusters = 0;
for (List<Cluster<CorrelationModel>> correlationClusters : clusterMap) {
clusters += correlationClusters.size();
}
LOG.verbose(clusters + " clusters extracted.");
}
// build hierarchy
LOG.beginStep(stepprog, 3, "Building hierarchy");
Clustering<CorrelationModel> clustering = new Clustering<>("ERiC clustering", "eric-clustering");
buildHierarchy(clustering, clusterMap, npred);
if (LOG.isDebugging()) {
StringBuilder msg = new StringBuilder("Step 3: Build hierarchy");
for (int corrDim = 0; corrDim < clusterMap.size(); corrDim++) {
List<Cluster<CorrelationModel>> correlationClusters = clusterMap.get(corrDim);
for (Cluster<CorrelationModel> cluster : correlationClusters) {
msg.append("\n cluster ").append(cluster).append(", ids: ").append(cluster.getIDs().size());
// ").append(cluster.getLevelIndex());
for (It<Cluster<CorrelationModel>> iter = clustering.getClusterHierarchy().iterParents(cluster); iter.valid(); iter.advance()) {
msg.append("\n parent ").append(iter.get());
}
for (It<Cluster<CorrelationModel>> iter = clustering.getClusterHierarchy().iterChildren(cluster); iter.valid(); iter.advance()) {
msg.append("\n child ").append(iter.get());
}
}
}
LOG.debugFine(msg.toString());
}
LOG.setCompleted(stepprog);
for (Cluster<CorrelationModel> rc : clusterMap.get(clusterMap.size() - 1)) {
clustering.addToplevelCluster(rc);
}
return clustering;
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDs in project elki by elki-project.
the class UKMeans method run.
/**
* Run the clustering.
*
* @param database the Database
* @param relation the Relation
* @return Clustering result
*/
public Clustering<?> run(final Database database, final Relation<DiscreteUncertainObject> relation) {
if (relation.size() <= 0) {
return new Clustering<>("Uk-Means Clustering", "ukmeans-clustering");
}
// Choose initial means randomly
DBIDs sampleids = DBIDUtil.randomSample(relation.getDBIDs(), k, rnd);
List<double[]> means = new ArrayList<>(k);
for (DBIDIter iter = sampleids.iter(); iter.valid(); iter.advance()) {
means.add(ArrayLikeUtil.toPrimitiveDoubleArray(relation.get(iter).getCenterOfMass()));
}
// Setup cluster assignment store
List<ModifiableDBIDs> clusters = new ArrayList<>();
for (int i = 0; i < k; i++) {
clusters.add(DBIDUtil.newHashSet((int) (relation.size() * 2. / k)));
}
WritableIntegerDataStore assignment = DataStoreUtil.makeIntegerStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, -1);
double[] varsum = new double[k];
IndefiniteProgress prog = LOG.isVerbose() ? new IndefiniteProgress("UK-Means iteration", LOG) : null;
DoubleStatistic varstat = LOG.isStatistics() ? new DoubleStatistic(this.getClass().getName() + ".variance-sum") : null;
int iteration = 0;
for (; maxiter <= 0 || iteration < maxiter; iteration++) {
LOG.incrementProcessed(prog);
boolean changed = assignToNearestCluster(relation, means, clusters, assignment, varsum);
logVarstat(varstat, varsum);
// Stop if no cluster assignment changed.
if (!changed) {
break;
}
// Recompute means.
means = means(clusters, means, relation);
}
LOG.setCompleted(prog);
if (LOG.isStatistics()) {
LOG.statistics(new LongStatistic(KEY + ".iterations", iteration));
}
// Wrap result
Clustering<KMeansModel> result = new Clustering<>("Uk-Means Clustering", "ukmeans-clustering");
for (int i = 0; i < clusters.size(); i++) {
DBIDs ids = clusters.get(i);
if (ids.isEmpty()) {
continue;
}
result.addToplevelCluster(new Cluster<>(ids, new KMeansModel(means.get(i), varsum[i])));
}
return result;
}
Aggregations