Search in sources :

Example 26 with AbortException

use of de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException in project elki by elki-project.

the class CacheDoubleDistanceInOnDiskMatrix method run.

@Override
public void run() {
    database.initialize();
    Relation<O> relation = database.getRelation(distance.getInputTypeRestriction());
    DistanceQuery<O> distanceQuery = database.getDistanceQuery(relation, distance);
    DBIDRange ids = DBIDUtil.assertRange(relation.getDBIDs());
    int size = ids.size();
    FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Precomputing distances", (int) (((size + 1) * (long) size) >>> 1), LOG) : null;
    try (// 
    OnDiskUpperTriangleMatrix matrix = new OnDiskUpperTriangleMatrix(out, DiskCacheBasedDoubleDistanceFunction.DOUBLE_CACHE_MAGIC, 0, ByteArrayUtil.SIZE_DOUBLE, size)) {
        DBIDArrayIter id1 = ids.iter(), id2 = ids.iter();
        for (; id1.valid(); id1.advance()) {
            for (id2.seek(id1.getOffset()); id2.valid(); id2.advance()) {
                double d = distanceQuery.distance(id1, id2);
                if (debugExtraCheckSymmetry) {
                    double d2 = distanceQuery.distance(id2, id1);
                    if (Math.abs(d - d2) > 0.0000001) {
                        LOG.warning("Distance function doesn't appear to be symmetric!");
                    }
                }
                try {
                    matrix.getRecordBuffer(id1.getOffset(), id2.getOffset()).putDouble(d);
                } catch (IOException e) {
                    throw new AbortException("Error writing distance record " + DBIDUtil.toString(id1) + "," + DBIDUtil.toString(id2) + " to matrix.", e);
                }
            }
            if (prog != null) {
                prog.setProcessed(prog.getProcessed() + (size - id1.getOffset()), LOG);
            }
        }
    } catch (IOException e) {
        throw new AbortException("Error precomputing distance matrix.", e);
    }
    LOG.ensureCompleted(prog);
}
Also used : OnDiskUpperTriangleMatrix(de.lmu.ifi.dbs.elki.persistent.OnDiskUpperTriangleMatrix) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) DBIDRange(de.lmu.ifi.dbs.elki.database.ids.DBIDRange) DBIDArrayIter(de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter) IOException(java.io.IOException) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Example 27 with AbortException

use of de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException in project elki by elki-project.

the class CanopyPreClustering method run.

/**
 * Run the algorithm
 *
 * @param database Database
 * @param relation Relation to process
 */
public Clustering<PrototypeModel<O>> run(Database database, Relation<O> relation) {
    if (!(t1 >= t2)) {
        throw new AbortException("T1 must be at least as large as T2.");
    }
    DistanceQuery<O> dq = database.getDistanceQuery(relation, getDistanceFunction());
    ModifiableDBIDs ids = DBIDUtil.newHashSet(relation.getDBIDs());
    ArrayList<Cluster<PrototypeModel<O>>> clusters = new ArrayList<>();
    final int size = relation.size();
    FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Canopy clustering", size, LOG) : null;
    DBIDVar first = DBIDUtil.newVar();
    while (!ids.isEmpty()) {
        // Remove first element:
        ids.pop(first);
        // Start a new cluster:
        ModifiableDBIDs cids = DBIDUtil.newArray();
        cids.add(first);
        // Compare to remaining objects:
        for (DBIDMIter iter = ids.iter(); iter.valid(); iter.advance()) {
            double dist = dq.distance(first, iter);
            // Inclusion threshold:
            if (dist > t1) {
                continue;
            }
            cids.add(iter);
            // Removal threshold:
            if (dist <= t2) {
                iter.remove();
            }
        }
        // TODO: remember the central object using a CanopyModel?
        // Construct cluster:
        clusters.add(new Cluster<>(cids, new SimplePrototypeModel<>(relation.get(first))));
        if (prog != null) {
            prog.setProcessed(size - ids.size(), LOG);
        }
    }
    LOG.ensureCompleted(prog);
    return new Clustering<>("Canopy clustering", "canopy-clustering", clusters);
}
Also used : DBIDVar(de.lmu.ifi.dbs.elki.database.ids.DBIDVar) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) DBIDMIter(de.lmu.ifi.dbs.elki.database.ids.DBIDMIter) ArrayList(java.util.ArrayList) Cluster(de.lmu.ifi.dbs.elki.data.Cluster) Clustering(de.lmu.ifi.dbs.elki.data.Clustering) SimplePrototypeModel(de.lmu.ifi.dbs.elki.data.model.SimplePrototypeModel) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Example 28 with AbortException

use of de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException in project elki by elki-project.

the class NaiveAgglomerativeHierarchicalClustering3 method run.

/**
 * Run the algorithm
 *
 * @param db Database
 * @param relation Relation
 * @return Clustering hierarchy
 */
public Result run(Database db, Relation<O> relation) {
    DistanceQuery<O> dq = db.getDistanceQuery(relation, getDistanceFunction());
    ArrayDBIDs ids = DBIDUtil.ensureArray(relation.getDBIDs());
    final int size = ids.size();
    if (size > 0x10000) {
        throw new AbortException("This implementation does not scale to data sets larger than " + 0x10000 + " instances (~17 GB RAM), which results in an integer overflow.");
    }
    if (Linkage.SINGLE.equals(linkage)) {
        LOG.verbose("Notice: SLINK is a much faster algorithm for single-linkage clustering!");
    }
    // Compute the initial (lower triangular) distance matrix.
    double[] scratch = new double[triangleSize(size)];
    DBIDArrayIter ix = ids.iter(), iy = ids.iter();
    // Position counter - must agree with computeOffset!
    int pos = 0;
    boolean square = Linkage.WARD.equals(linkage) && !getDistanceFunction().isSquared();
    for (int x = 0; ix.valid(); x++, ix.advance()) {
        iy.seek(0);
        for (int y = 0; y < x; y++, iy.advance()) {
            scratch[pos] = dq.distance(ix, iy);
            // Ward uses variances -- i.e. squared values
            if (square) {
                scratch[pos] *= scratch[pos];
            }
            pos++;
        }
    }
    // Initialize space for result:
    double[] height = new double[size];
    Arrays.fill(height, Double.POSITIVE_INFINITY);
    // Parent node, to track merges
    // have every object point to itself initially
    ArrayModifiableDBIDs parent = DBIDUtil.newArray(ids);
    // Active clusters, when not trivial.
    Int2ReferenceMap<ModifiableDBIDs> clusters = new Int2ReferenceOpenHashMap<>();
    // Repeat until everything merged, except the desired number of clusters:
    final int stop = size - numclusters;
    FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Agglomerative clustering", stop, LOG) : null;
    for (int i = 0; i < stop; i++) {
        double min = Double.POSITIVE_INFINITY;
        int minx = -1, miny = -1;
        for (int x = 0; x < size; x++) {
            if (height[x] < Double.POSITIVE_INFINITY) {
                continue;
            }
            final int xbase = triangleSize(x);
            for (int y = 0; y < x; y++) {
                if (height[y] < Double.POSITIVE_INFINITY) {
                    continue;
                }
                final int idx = xbase + y;
                if (scratch[idx] < min) {
                    min = scratch[idx];
                    minx = x;
                    miny = y;
                }
            }
        }
        assert (minx >= 0 && miny >= 0);
        // Avoid allocating memory, by reusing existing iterators:
        ix.seek(minx);
        iy.seek(miny);
        // Perform merge in data structure: x -> y
        // Since y < x, prefer keeping y, dropping x.
        height[minx] = min;
        parent.set(minx, iy);
        // Merge into cluster
        ModifiableDBIDs cx = clusters.get(minx);
        ModifiableDBIDs cy = clusters.get(miny);
        // cluster sizes, for averaging
        int sizex = 1, sizey = 1;
        if (cy == null) {
            cy = DBIDUtil.newHashSet();
            cy.add(iy);
        } else {
            sizey = cy.size();
        }
        if (cx == null) {
            cy.add(ix);
        } else {
            sizex = cx.size();
            cy.addDBIDs(cx);
            clusters.remove(minx);
        }
        clusters.put(miny, cy);
        // Update distance matrix. Note: miny < minx
        // Implementation note: most will not need sizej, and could save the
        // hashmap lookup.
        final int xbase = triangleSize(minx), ybase = triangleSize(miny);
        // Write to (y, j), with j < y
        for (int j = 0; j < miny; j++) {
            if (height[j] < Double.POSITIVE_INFINITY) {
                continue;
            }
            final DBIDs idsj = clusters.get(j);
            final int sizej = (idsj == null) ? 1 : idsj.size();
            scratch[ybase + j] = linkage.combine(sizex, scratch[xbase + j], sizey, scratch[ybase + j], sizej, min);
        }
        // Write to (j, y), with y < j < x
        for (int j = miny + 1; j < minx; j++) {
            if (height[j] < Double.POSITIVE_INFINITY) {
                continue;
            }
            final int jbase = triangleSize(j);
            final DBIDs idsj = clusters.get(j);
            final int sizej = (idsj == null) ? 1 : idsj.size();
            scratch[jbase + miny] = linkage.combine(sizex, scratch[xbase + j], sizey, scratch[jbase + miny], sizej, min);
        }
        // Write to (j, y), with y < x < j
        for (int j = minx + 1; j < size; j++) {
            if (height[j] < Double.POSITIVE_INFINITY) {
                continue;
            }
            final DBIDs idsj = clusters.get(j);
            final int sizej = (idsj == null) ? 1 : idsj.size();
            final int jbase = triangleSize(j);
            scratch[jbase + miny] = linkage.combine(sizex, scratch[jbase + minx], sizey, scratch[jbase + miny], sizej, min);
        }
        LOG.incrementProcessed(prog);
    }
    LOG.ensureCompleted(prog);
    // Build the clustering result
    final Clustering<Model> dendrogram = new Clustering<>("Hierarchical-Clustering", "hierarchical-clustering");
    for (int x = 0; x < size; x++) {
        if (height[x] < Double.POSITIVE_INFINITY) {
            DBIDs cids = clusters.get(x);
            if (cids == null) {
                ix.seek(x);
                cids = DBIDUtil.deref(ix);
            }
            Cluster<Model> cluster = new Cluster<>("Cluster", cids);
            dendrogram.addToplevelCluster(cluster);
        }
    }
    return dendrogram;
}
Also used : FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) Cluster(de.lmu.ifi.dbs.elki.data.Cluster) Clustering(de.lmu.ifi.dbs.elki.data.Clustering) Model(de.lmu.ifi.dbs.elki.data.model.Model) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException) Int2ReferenceOpenHashMap(it.unimi.dsi.fastutil.ints.Int2ReferenceOpenHashMap)

Example 29 with AbortException

use of de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException in project elki by elki-project.

the class AbstractXTree method initializeCapacities.

@Override
protected void initializeCapacities(SpatialEntry exampleLeaf) {
    /* Simulate the creation of a leaf page to get the page capacity */
    try {
        int cap = 0;
        ByteArrayOutputStream baos = new ByteArrayOutputStream();
        ObjectOutputStream oos = new ObjectOutputStream(baos);
        SpatialPointLeafEntry sl = new SpatialPointLeafEntry(DBIDUtil.importInteger(0), new double[exampleLeaf.getDimensionality()]);
        while (baos.size() <= getPageSize()) {
            sl.writeExternal(oos);
            oos.flush();
            cap++;
        }
        // the last one caused the page to overflow.
        leafCapacity = cap - 1;
    } catch (IOException e) {
        throw new AbortException("Error determining page sizes.", e);
    }
    /* Simulate the creation of a directory page to get the capacity */
    try {
        int cap = 0;
        ByteArrayOutputStream baos = new ByteArrayOutputStream();
        ObjectOutputStream oos = new ObjectOutputStream(baos);
        ModifiableHyperBoundingBox hb = new ModifiableHyperBoundingBox(new double[exampleLeaf.getDimensionality()], new double[exampleLeaf.getDimensionality()]);
        XTreeDirectoryEntry xl = new XTreeDirectoryEntry(0, hb);
        while (baos.size() <= getPageSize()) {
            xl.writeExternal(oos);
            oos.flush();
            cap++;
        }
        dirCapacity = cap - 1;
    } catch (IOException e) {
        throw new AbortException("Error determining page sizes.", e);
    }
    if (dirCapacity <= 1) {
        throw new IllegalArgumentException("Node size of " + getPageSize() + " Bytes is chosen too small!");
    }
    if (dirCapacity < 10) {
        getLogger().warning("Page size is choosen very small! Maximum number of entries " + "in a directory node = " + (dirCapacity - 1));
    }
    // minimum entries per directory node
    dirMinimum = (int) Math.round((dirCapacity - 1) * settings.relativeMinEntries);
    if (dirMinimum < 2) {
        dirMinimum = 2;
    }
    // minimum entries per directory node
    settings.min_fanout = (int) Math.round((dirCapacity - 1) * settings.relativeMinFanout);
    if (settings.min_fanout < 2) {
        settings.min_fanout = 2;
    }
    if (leafCapacity <= 1) {
        throw new IllegalArgumentException("Node size of " + getPageSize() + " Bytes is chosen too small!");
    }
    if (leafCapacity < 10) {
        getLogger().warning("Page size is choosen very small! Maximum number of entries " + "in a leaf node = " + (leafCapacity - 1));
    }
    // minimum entries per leaf node
    leafMinimum = (int) Math.round((leafCapacity - 1) * settings.relativeMinEntries);
    if (leafMinimum < 2) {
        leafMinimum = 2;
    }
    dimensionality = exampleLeaf.getDimensionality();
    if (getLogger().isVerbose()) {
        getLogger().verbose("Directory Capacity:  " + (dirCapacity - 1) + "\nDirectory minimum: " + dirMinimum + "\nLeaf Capacity:     " + (leafCapacity - 1) + "\nLeaf Minimum:      " + leafMinimum + "\nminimum fanout: " + settings.min_fanout);
    }
}
Also used : SpatialPointLeafEntry(de.lmu.ifi.dbs.elki.index.tree.spatial.SpatialPointLeafEntry) ModifiableHyperBoundingBox(de.lmu.ifi.dbs.elki.data.ModifiableHyperBoundingBox) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Example 30 with AbortException

use of de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException in project elki by elki-project.

the class WeightedUncertainSplitFilter method filterSingleObject.

@Override
protected WeightedDiscreteUncertainObject filterSingleObject(NumberVector vec) {
    final int dim = vec.getDimensionality();
    if (dim % mod != 0) {
        throw new AbortException("Vector length " + dim + " not divisible by the number of dimensions + 1 (for probability): " + mod);
    }
    final int num = dim / mod;
    final DoubleVector[] samples = new DoubleVector[num];
    final double[] weights = new double[dims];
    final double[] buf = new double[dims];
    for (int i = 0, j = 0, k = 0, l = 0; i < mod; i++) {
        if (l++ == probcol) {
            weights[k] = vec.doubleValue(i);
        } else {
            buf[j++] = vec.doubleValue(i);
        }
        if (l == mod) {
            samples[k] = DoubleVector.copy(buf);
            j = 0;
            l = 0;
            k++;
        }
    }
    return new WeightedDiscreteUncertainObject(samples, weights);
}
Also used : DoubleVector(de.lmu.ifi.dbs.elki.data.DoubleVector) LessConstraint(de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.LessConstraint) GreaterEqualConstraint(de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint) WeightedDiscreteUncertainObject(de.lmu.ifi.dbs.elki.data.uncertain.WeightedDiscreteUncertainObject) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Aggregations

AbortException (de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)99 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)25 IOException (java.io.IOException)24 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)22 ArrayList (java.util.ArrayList)16 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)13 MultipleObjectsBundle (de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle)13 NumberVector (de.lmu.ifi.dbs.elki.data.NumberVector)10 DBIDArrayIter (de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter)9 DoubleRelation (de.lmu.ifi.dbs.elki.database.relation.DoubleRelation)9 Clustering (de.lmu.ifi.dbs.elki.data.Clustering)8 Model (de.lmu.ifi.dbs.elki.data.model.Model)8 VectorFieldTypeInformation (de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation)8 Database (de.lmu.ifi.dbs.elki.database.Database)8 WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)8 DBIDRange (de.lmu.ifi.dbs.elki.database.ids.DBIDRange)8 OutlierResult (de.lmu.ifi.dbs.elki.result.outlier.OutlierResult)8 MaterializedDoubleRelation (de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)6 ClassLabel (de.lmu.ifi.dbs.elki.data.ClassLabel)5 DoubleVector (de.lmu.ifi.dbs.elki.data.DoubleVector)5