Search in sources :

Example 1 with CLIQUEUnit

use of de.lmu.ifi.dbs.elki.algorithm.clustering.subspace.clique.CLIQUEUnit in project elki by elki-project.

the class CLIQUE method initOneDimensionalUnits.

/**
 * Initializes and returns the one dimensional units.
 *
 * @param database the database to run the algorithm on
 * @return the created one dimensional units
 */
private Collection<CLIQUEUnit<V>> initOneDimensionalUnits(Relation<V> database) {
    StringBuilder buf = LOG.isDebuggingFiner() ? new StringBuilder(1000) : null;
    int dimensionality = RelationUtil.dimensionality(database);
    // initialize minima and maxima
    double[] minima = new double[dimensionality];
    double[] maxima = new double[dimensionality];
    Arrays.fill(minima, Double.MAX_VALUE);
    Arrays.fill(maxima, -Double.MAX_VALUE);
    // update minima and maxima
    for (DBIDIter it = database.iterDBIDs(); it.valid(); it.advance()) {
        updateMinMax(database.get(it), minima, maxima);
    }
    for (int i = 0; i < maxima.length; i++) {
        maxima[i] += 0.0001;
    }
    // determine the unit length in each dimension
    double[] unit_lengths = new double[dimensionality];
    for (int d = 0; d < dimensionality; d++) {
        unit_lengths[d] = (maxima[d] - minima[d]) / xsi;
    }
    if (buf != null) {
        FormatUtil.formatTo(buf.append("   minima: "), minima, ", ", FormatUtil.NF2);
        FormatUtil.formatTo(buf.append("\n   maxima: "), maxima, ", ", FormatUtil.NF2);
        FormatUtil.formatTo(buf.append("\n   unit lengths: "), unit_lengths, ", ", FormatUtil.NF2);
    }
    // determine the boundaries of the units
    double[][] unit_bounds = new double[xsi + 1][dimensionality];
    for (int x = 0; x <= xsi; x++) {
        for (int d = 0; d < dimensionality; d++) {
            unit_bounds[x][d] = (x < xsi) ? minima[d] + x * unit_lengths[d] : maxima[d];
        }
    }
    if (buf != null) {
        FormatUtil.formatTo(buf.append("   unit bounds "), unit_bounds, "    [", "]\n", ", ", FormatUtil.NF2);
    }
    // build the 1 dimensional units
    List<CLIQUEUnit<V>> units = new ArrayList<>(xsi * dimensionality);
    for (int x = 0; x < xsi; x++) {
        for (int d = 0; d < dimensionality; d++) {
            units.add(new CLIQUEUnit<V>(new CLIQUEInterval(d, unit_bounds[x][d], unit_bounds[x + 1][d])));
        }
    }
    if (buf != null) {
        LOG.debugFiner(buf.append("   total number of 1-dim units: ").append(units.size()).toString());
    }
    return units;
}
Also used : CLIQUEInterval(de.lmu.ifi.dbs.elki.algorithm.clustering.subspace.clique.CLIQUEInterval) ArrayList(java.util.ArrayList) CLIQUEUnit(de.lmu.ifi.dbs.elki.algorithm.clustering.subspace.clique.CLIQUEUnit) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 2 with CLIQUEUnit

use of de.lmu.ifi.dbs.elki.algorithm.clustering.subspace.clique.CLIQUEUnit in project elki by elki-project.

the class CLIQUE method findOneDimensionalDenseSubspaceCandidates.

/**
 * Determines the one-dimensional dense subspace candidates by making a pass
 * over the database.
 *
 * @param database the database to run the algorithm on
 * @return the one-dimensional dense subspace candidates reverse ordered by
 *         their coverage
 */
private List<CLIQUESubspace<V>> findOneDimensionalDenseSubspaceCandidates(Relation<V> database) {
    Collection<CLIQUEUnit<V>> units = initOneDimensionalUnits(database);
    // identify dense units
    double total = database.size();
    for (DBIDIter it = database.iterDBIDs(); it.valid(); it.advance()) {
        V featureVector = database.get(it);
        for (CLIQUEUnit<V> unit : units) {
            unit.addFeatureVector(it, featureVector);
        }
    }
    Collection<CLIQUEUnit<V>> denseUnits = new ArrayList<>();
    Map<Integer, CLIQUESubspace<V>> denseSubspaces = new HashMap<>();
    for (CLIQUEUnit<V> unit : units) {
        // unit is a dense unit
        if (unit.selectivity(total) >= tau) {
            denseUnits.add(unit);
            // add the dense unit to its subspace
            int dim = unit.getIntervals().iterator().next().getDimension();
            CLIQUESubspace<V> subspace_d = denseSubspaces.get(Integer.valueOf(dim));
            if (subspace_d == null) {
                denseSubspaces.put(Integer.valueOf(dim), subspace_d = new CLIQUESubspace<>(dim));
            }
            subspace_d.addDenseUnit(unit);
        }
    }
    if (LOG.isDebugging()) {
        LOG.debugFine(// 
        new StringBuilder().append("   number of 1-dim dense units: ").append(denseUnits.size()).append("\n   number of 1-dim dense subspace candidates: ").append(denseSubspaces.size()).toString());
    }
    List<CLIQUESubspace<V>> subspaceCandidates = new ArrayList<>(denseSubspaces.values());
    Collections.sort(subspaceCandidates, new CLIQUESubspace.CoverageComparator());
    return subspaceCandidates;
}
Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) CLIQUESubspace(de.lmu.ifi.dbs.elki.algorithm.clustering.subspace.clique.CLIQUESubspace) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) CLIQUEUnit(de.lmu.ifi.dbs.elki.algorithm.clustering.subspace.clique.CLIQUEUnit)

Aggregations

CLIQUEUnit (de.lmu.ifi.dbs.elki.algorithm.clustering.subspace.clique.CLIQUEUnit)2 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)2 ArrayList (java.util.ArrayList)2 CLIQUEInterval (de.lmu.ifi.dbs.elki.algorithm.clustering.subspace.clique.CLIQUEInterval)1 CLIQUESubspace (de.lmu.ifi.dbs.elki.algorithm.clustering.subspace.clique.CLIQUESubspace)1 HashMap (java.util.HashMap)1