use of de.lmu.ifi.dbs.elki.utilities.pairs.Pair in project elki by elki-project.
the class PROCLUS method run.
/**
* Performs the PROCLUS algorithm on the given database.
*
* @param database Database to process
* @param relation Relation to process
*/
public Clustering<SubspaceModel> run(Database database, Relation<V> relation) {
if (RelationUtil.dimensionality(relation) < l) {
throw new IllegalStateException("Dimensionality of data < parameter l! (" + RelationUtil.dimensionality(relation) + " < " + l + ")");
}
DistanceQuery<V> distFunc = database.getDistanceQuery(relation, SquaredEuclideanDistanceFunction.STATIC);
RangeQuery<V> rangeQuery = database.getRangeQuery(distFunc);
final Random random = rnd.getSingleThreadedRandom();
// initialization phase
if (LOG.isVerbose()) {
LOG.verbose("1. Initialization phase...");
}
int sampleSize = Math.min(relation.size(), k_i * k);
DBIDs sampleSet = DBIDUtil.randomSample(relation.getDBIDs(), sampleSize, random);
int medoidSize = Math.min(relation.size(), m_i * k);
ArrayDBIDs medoids = greedy(distFunc, sampleSet, medoidSize, random);
if (LOG.isDebugging()) {
LOG.debugFine(//
new StringBuilder().append("sampleSize ").append(sampleSize).append('\n').append("sampleSet ").append(sampleSet).append(//
'\n').append("medoidSize ").append(medoidSize).append(//
'\n').append("m ").append(medoids).toString());
}
// iterative phase
if (LOG.isVerbose()) {
LOG.verbose("2. Iterative phase...");
}
double bestObjective = Double.POSITIVE_INFINITY;
ArrayDBIDs m_best = null;
DBIDs m_bad = null;
ArrayDBIDs m_current = initialSet(medoids, k, random);
if (LOG.isDebugging()) {
LOG.debugFine(new StringBuilder().append("m_c ").append(m_current).toString());
}
IndefiniteProgress cprogress = LOG.isVerbose() ? new IndefiniteProgress("Current number of clusters:", LOG) : null;
ArrayList<PROCLUSCluster> clusters = null;
int loops = 0;
while (loops < 10) {
long[][] dimensions = findDimensions(m_current, relation, distFunc, rangeQuery);
clusters = assignPoints(m_current, dimensions, relation);
double objectiveFunction = evaluateClusters(clusters, dimensions, relation);
if (objectiveFunction < bestObjective) {
// restart counting loops
loops = 0;
bestObjective = objectiveFunction;
m_best = m_current;
m_bad = computeBadMedoids(m_current, clusters, (int) (relation.size() * 0.1 / k));
}
m_current = computeM_current(medoids, m_best, m_bad, random);
loops++;
if (cprogress != null) {
cprogress.setProcessed(clusters.size(), LOG);
}
}
LOG.setCompleted(cprogress);
// refinement phase
if (LOG.isVerbose()) {
LOG.verbose("3. Refinement phase...");
}
List<Pair<double[], long[]>> dimensions = findDimensions(clusters, relation);
List<PROCLUSCluster> finalClusters = finalAssignment(dimensions, relation);
// build result
int numClusters = 1;
Clustering<SubspaceModel> result = new Clustering<>("ProClus clustering", "proclus-clustering");
for (PROCLUSCluster c : finalClusters) {
Cluster<SubspaceModel> cluster = new Cluster<>(c.objectIDs);
cluster.setModel(new SubspaceModel(new Subspace(c.getDimensions()), c.centroid));
cluster.setName("cluster_" + numClusters++);
result.addToplevelCluster(cluster);
}
return result;
}
use of de.lmu.ifi.dbs.elki.utilities.pairs.Pair in project elki by elki-project.
the class CLIQUE method run.
/**
* Performs the CLIQUE algorithm on the given database.
*
* @param relation Data relation to process
* @return Clustering result
*/
public Clustering<SubspaceModel> run(Relation<V> relation) {
final int dimensionality = RelationUtil.dimensionality(relation);
StepProgress step = new StepProgress(2);
// 1. Identification of subspaces that contain clusters
step.beginStep(1, "Identification of subspaces that contain clusters", LOG);
ArrayList<List<CLIQUESubspace<V>>> dimensionToDenseSubspaces = new ArrayList<>(dimensionality);
List<CLIQUESubspace<V>> denseSubspaces = findOneDimensionalDenseSubspaces(relation);
dimensionToDenseSubspaces.add(denseSubspaces);
if (LOG.isVerbose()) {
LOG.verbose("1-dimensional dense subspaces: " + denseSubspaces.size());
}
if (LOG.isDebugging()) {
for (CLIQUESubspace<V> s : denseSubspaces) {
LOG.debug(s.toString(" "));
}
}
for (int k = 2; k <= dimensionality && !denseSubspaces.isEmpty(); k++) {
denseSubspaces = findDenseSubspaces(relation, denseSubspaces);
assert (dimensionToDenseSubspaces.size() == k - 1);
dimensionToDenseSubspaces.add(denseSubspaces);
if (LOG.isVerbose()) {
LOG.verbose(k + "-dimensional dense subspaces: " + denseSubspaces.size());
}
if (LOG.isDebugging()) {
for (CLIQUESubspace<V> s : denseSubspaces) {
LOG.debug(s.toString(" "));
}
}
}
// 2. Identification of clusters
step.beginStep(2, "Identification of clusters", LOG);
// build result
Clustering<SubspaceModel> result = new Clustering<>("CLIQUE clustering", "clique-clustering");
for (int dim = 0; dim < dimensionToDenseSubspaces.size(); dim++) {
List<CLIQUESubspace<V>> subspaces = dimensionToDenseSubspaces.get(dim);
List<Pair<Subspace, ModifiableDBIDs>> modelsAndClusters = determineClusters(subspaces);
if (LOG.isVerbose()) {
LOG.verbose((dim + 1) + "-dimensional clusters: " + modelsAndClusters.size());
}
for (Pair<Subspace, ModifiableDBIDs> modelAndCluster : modelsAndClusters) {
Cluster<SubspaceModel> newCluster = new Cluster<>(modelAndCluster.second);
newCluster.setModel(new SubspaceModel(modelAndCluster.first, Centroid.make(relation, modelAndCluster.second).getArrayRef()));
result.addToplevelCluster(newCluster);
}
}
return result;
}
use of de.lmu.ifi.dbs.elki.utilities.pairs.Pair in project elki by elki-project.
the class DocumentParameters method makeByOptOverviewHTML.
private static Document makeByOptOverviewHTML(Map<OptionID, List<Pair<Parameter<?>, Class<?>>>> byopt) throws IOException {
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
DocumentBuilder builder;
try {
builder = factory.newDocumentBuilder();
} catch (ParserConfigurationException e1) {
throw new IOException(e1);
}
DOMImplementation impl = builder.getDOMImplementation();
Document htmldoc = impl.createDocument(HTMLUtil.HTML_NAMESPACE, HTMLUtil.HTML_HTML_TAG, null);
// head
Element head = htmldoc.createElement(HTMLUtil.HTML_HEAD_TAG);
htmldoc.getDocumentElement().appendChild(head);
// body
Element body = htmldoc.createElement(HTMLUtil.HTML_BODY_TAG);
htmldoc.getDocumentElement().appendChild(body);
// modification warnings
head.appendChild(htmldoc.createComment(MODIFICATION_WARNING));
body.appendChild(htmldoc.createComment(MODIFICATION_WARNING));
// meta with charset information
{
Element meta = htmldoc.createElement(HTMLUtil.HTML_META_TAG);
meta.setAttribute(HTMLUtil.HTML_HTTP_EQUIV_ATTRIBUTE, HTMLUtil.HTML_HTTP_EQUIV_CONTENT_TYPE);
meta.setAttribute(HTMLUtil.HTML_CONTENT_ATTRIBUTE, HTMLUtil.CONTENT_TYPE_HTML_UTF8);
head.appendChild(meta);
}
// stylesheet
{
Element css = htmldoc.createElement(HTMLUtil.HTML_LINK_TAG);
css.setAttribute(HTMLUtil.HTML_REL_ATTRIBUTE, HTMLUtil.HTML_REL_STYLESHEET);
css.setAttribute(HTMLUtil.HTML_TYPE_ATTRIBUTE, HTMLUtil.CONTENT_TYPE_CSS);
css.setAttribute(HTMLUtil.HTML_HREF_ATTRIBUTE, CSSFILE);
head.appendChild(css);
}
// title
{
Element title = htmldoc.createElement(HTMLUtil.HTML_TITLE_TAG);
title.setTextContent("Command line parameter overview - by option");
head.appendChild(title);
}
// Heading
{
Element h1 = htmldoc.createElement(HTMLUtil.HTML_H1_TAG);
h1.setTextContent("ELKI command line parameter overview:");
body.appendChild(h1);
}
// Main definition list
Element maindl = htmldoc.createElement(HTMLUtil.HTML_DL_TAG);
body.appendChild(maindl);
final Comparator<OptionID> osort = new SortByOption();
final Comparator<Class<?>> csort = new ELKIServiceScanner.ClassSorter();
Comparator<Pair<Parameter<?>, Class<?>>> psort = new Comparator<Pair<Parameter<?>, Class<?>>>() {
@Override
public int compare(Pair<Parameter<?>, Class<?>> o1, Pair<Parameter<?>, Class<?>> o2) {
int c = osort.compare(o1.first.getOptionID(), o2.first.getOptionID());
return (c != 0) ? c : csort.compare(o1.second, o2.second);
}
};
List<OptionID> opts = new ArrayList<>(byopt.keySet());
Collections.sort(opts, osort);
for (OptionID oid : opts) {
final Parameter<?> firstopt = byopt.get(oid).get(0).getFirst();
// DT = definition term
Element optdt = htmldoc.createElement(HTMLUtil.HTML_DT_TAG);
// Anchor for references
{
Element optan = htmldoc.createElement(HTMLUtil.HTML_A_TAG);
optan.setAttribute(HTMLUtil.HTML_NAME_ATTRIBUTE, firstopt.getOptionID().getName());
optdt.appendChild(optan);
}
// option name
{
Element elemtt = htmldoc.createElement(HTMLUtil.HTML_TT_TAG);
elemtt.setTextContent(SerializedParameterization.OPTION_PREFIX + firstopt.getOptionID().getName() + " " + firstopt.getSyntax());
optdt.appendChild(elemtt);
}
maindl.appendChild(optdt);
// DD = definition description
Element optdd = htmldoc.createElement(HTMLUtil.HTML_DD_TAG);
{
Element elemp = htmldoc.createElement(HTMLUtil.HTML_P_TAG);
HTMLUtil.appendMultilineText(htmldoc, elemp, firstopt.getShortDescription());
optdd.appendChild(elemp);
}
// class restriction?
Class<?> superclass = getRestrictionClass(oid, firstopt, byopt);
if (superclass != null) {
appendClassRestriction(htmldoc, superclass, optdd);
}
// default value?
appendDefaultValueIfSet(htmldoc, firstopt, optdd);
// known values?
if (superclass != null) {
appendKnownImplementationsIfNonempty(htmldoc, superclass, optdd);
}
maindl.appendChild(optdd);
// nested definition list for options
Element classesul = htmldoc.createElement(HTMLUtil.HTML_UL_TAG);
{
Element p = htmldoc.createElement(HTMLUtil.HTML_P_TAG);
p.appendChild(htmldoc.createTextNode(HEADER_PARAMETER_FOR));
optdd.appendChild(p);
}
optdd.appendChild(classesul);
List<Pair<Parameter<?>, Class<?>>> plist = byopt.get(oid);
Collections.sort(plist, psort);
for (Pair<Parameter<?>, Class<?>> clinst : plist) {
// DT definition term: option name, in TT for typewriter optics
Element classli = htmldoc.createElement(HTMLUtil.HTML_LI_TAG);
// Link back to original class
{
Element classa = htmldoc.createElement(HTMLUtil.HTML_A_TAG);
classa.setAttribute(HTMLUtil.HTML_HREF_ATTRIBUTE, linkForClassName(clinst.getSecond().getName()));
classa.setTextContent(clinst.getSecond().getName());
classli.appendChild(classa);
}
if (clinst.getFirst() instanceof ClassParameter<?> && firstopt instanceof ClassParameter<?>) {
ClassParameter<?> cls = (ClassParameter<?>) clinst.getFirst();
if (cls.getRestrictionClass() != null) {
// TODO: if it is null, it could still be different!
if (!cls.getRestrictionClass().equals(superclass)) {
appendClassRestriction(htmldoc, cls.getRestrictionClass(), classli);
}
} else {
appendNoClassRestriction(htmldoc, classli);
}
}
Parameter<?> param = clinst.getFirst();
if (param.getDefaultValue() != null) {
if (!param.getDefaultValue().equals(firstopt.getDefaultValue())) {
appendDefaultValueIfSet(htmldoc, param, classli);
}
} else if (firstopt.getDefaultValue() != null) {
appendNoDefaultValue(htmldoc, classli);
}
classesul.appendChild(classli);
}
}
return htmldoc;
}
use of de.lmu.ifi.dbs.elki.utilities.pairs.Pair in project elki by elki-project.
the class DocumentParameters method buildParameterIndex.
private static void buildParameterIndex(Map<Class<?>, List<Parameter<?>>> byclass, Map<OptionID, List<Pair<Parameter<?>, Class<?>>>> byopt) {
final ArrayList<TrackedParameter> options = new ArrayList<>();
ExecutorService es = Executors.newSingleThreadExecutor();
List<Class<?>> objs = ELKIServiceRegistry.findAllImplementations(Object.class, false, true);
Collections.sort(objs, new ELKIServiceScanner.ClassSorter());
Class<?> appc = appBaseClass();
for (final Class<?> cls : objs) {
// Doesn't have a proper name?
if (cls.getCanonicalName() == null) {
continue;
}
// constructors / parameterizers and may start AWT threads - skip them.
if (appc != null && appc.isAssignableFrom(cls)) {
continue;
}
UnParameterization config = new UnParameterization();
TrackParameters track = new TrackParameters(config, cls);
try {
// Wait up to one second.
es.submit(new //
FutureTask<Object>(new Instancer(cls, track, options), //
null)).get(1L, TimeUnit.SECONDS);
} catch (TimeoutException e) {
LOG.warning("Timeout on instantiating " + cls.getName());
es.shutdownNow();
throw new RuntimeException(e);
} catch (Exception e) {
LOG.warning("Error instantiating " + cls.getName(), e.getCause());
continue;
}
}
LOG.debug("Documenting " + options.size() + " parameter instances.");
for (TrackedParameter pp : options) {
if (pp.getOwner() == null || pp.getParameter() == null) {
LOG.debugFiner("Null: " + pp.getOwner() + " " + pp.getParameter());
continue;
}
Class<?> c = (Class<?>) ((pp.getOwner() instanceof Class) ? pp.getOwner() : pp.getOwner().getClass());
Parameter<?> o = pp.getParameter();
// just collect unique occurrences
{
List<Parameter<?>> byc = byclass.get(c);
boolean inlist = false;
if (byc != null) {
for (Parameter<?> par : byc) {
if (par.getOptionID() == o.getOptionID()) {
inlist = true;
break;
}
}
}
if (!inlist) {
List<Parameter<?>> ex = byclass.get(c);
if (ex == null) {
byclass.put(c, ex = new ArrayList<>());
}
ex.add(o);
}
}
{
List<Pair<Parameter<?>, Class<?>>> byo = byopt.get(o.getOptionID());
boolean inlist = false;
if (byo != null) {
for (Pair<Parameter<?>, Class<?>> pair : byo) {
if (pair.second.equals(c)) {
inlist = true;
break;
}
}
}
if (!inlist) {
List<Pair<Parameter<?>, Class<?>>> ex = byopt.get(o.getOptionID());
if (ex == null) {
byopt.put(o.getOptionID(), ex = new ArrayList<>());
}
ex.add(new Pair<Parameter<?>, Class<?>>(o, c));
}
}
}
}
use of de.lmu.ifi.dbs.elki.utilities.pairs.Pair in project elki by elki-project.
the class DiSH method extractClusters.
/**
* Extracts the clusters from the cluster order.
*
* @param relation the database storing the objects
* @param clusterOrder the cluster order to extract the clusters from
* @return the extracted clusters
*/
private Object2ObjectOpenCustomHashMap<long[], List<ArrayModifiableDBIDs>> extractClusters(Relation<V> relation, DiSHClusterOrder clusterOrder) {
FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Extract Clusters", relation.size(), LOG) : null;
Object2ObjectOpenCustomHashMap<long[], List<ArrayModifiableDBIDs>> clustersMap = new Object2ObjectOpenCustomHashMap<>(BitsUtil.FASTUTIL_HASH_STRATEGY);
// Note clusterOrder currently contains DBID objects anyway.
WritableDataStore<Pair<long[], ArrayModifiableDBIDs>> entryToClusterMap = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, Pair.class);
for (DBIDIter iter = clusterOrder.iter(); iter.valid(); iter.advance()) {
V object = relation.get(iter);
long[] preferenceVector = clusterOrder.getCommonPreferenceVector(iter);
// get the list of (parallel) clusters for the preference vector
List<ArrayModifiableDBIDs> parallelClusters = clustersMap.get(preferenceVector);
if (parallelClusters == null) {
parallelClusters = new ArrayList<>();
clustersMap.put(preferenceVector, parallelClusters);
}
// look for the proper cluster
ArrayModifiableDBIDs cluster = null;
for (ArrayModifiableDBIDs c : parallelClusters) {
NumberVector c_centroid = ProjectedCentroid.make(preferenceVector, relation, c);
long[] commonPreferenceVector = BitsUtil.andCMin(preferenceVector, preferenceVector);
int subspaceDim = subspaceDimensionality(object, c_centroid, preferenceVector, preferenceVector, commonPreferenceVector);
if (subspaceDim == clusterOrder.getCorrelationValue(iter)) {
double d = weightedDistance(object, c_centroid, commonPreferenceVector);
if (d <= 2 * epsilon) {
cluster = c;
break;
}
}
}
if (cluster == null) {
cluster = DBIDUtil.newArray();
parallelClusters.add(cluster);
}
cluster.add(iter);
entryToClusterMap.put(iter, new Pair<>(preferenceVector, cluster));
LOG.incrementProcessed(progress);
}
LOG.ensureCompleted(progress);
if (LOG.isDebuggingFiner()) {
int dim = RelationUtil.dimensionality(relation);
StringBuilder msg = new StringBuilder("Step 0");
for (Map.Entry<long[], List<ArrayModifiableDBIDs>> clusterList : clustersMap.entrySet()) {
for (ArrayModifiableDBIDs c : clusterList.getValue()) {
msg.append('\n').append(BitsUtil.toStringLow(clusterList.getKey(), dim)).append(" ids ").append(c.size());
}
}
LOG.debugFiner(msg.toString());
}
// add the predecessor to the cluster
DBIDVar cur = DBIDUtil.newVar(), pre = DBIDUtil.newVar();
for (long[] pv : clustersMap.keySet()) {
List<ArrayModifiableDBIDs> parallelClusters = clustersMap.get(pv);
for (ArrayModifiableDBIDs cluster : parallelClusters) {
if (cluster.isEmpty()) {
continue;
}
cluster.assignVar(0, cur);
clusterOrder.getPredecessor(cur, pre);
if (!pre.isSet() || DBIDUtil.equal(pre, cur)) {
continue;
}
// parallel cluster
if (BitsUtil.equal(clusterOrder.getCommonPreferenceVector(pre), clusterOrder.getCommonPreferenceVector(cur))) {
continue;
}
if (//
clusterOrder.getCorrelationValue(pre) < clusterOrder.getCorrelationValue(cur) || clusterOrder.getReachability(pre) < clusterOrder.getReachability(cur)) {
continue;
}
Pair<long[], ArrayModifiableDBIDs> oldCluster = entryToClusterMap.get(pre);
oldCluster.second.remove(pre);
cluster.add(pre);
entryToClusterMap.put(pre, new Pair<>(pv, cluster));
}
}
return clustersMap;
}
Aggregations