Search in sources :

Example 11 with Pair

use of de.lmu.ifi.dbs.elki.utilities.pairs.Pair in project elki by elki-project.

the class DocumentReferences method main.

/**
 * @param args Command line arguments
 */
public static void main(String[] args) {
    if (args.length < 1 || args.length > 2) {
        LoggingUtil.warning("I need exactly one or two file names to operate!");
        System.exit(1);
    }
    if (!args[0].endsWith(".html") || (args.length > 1 && !args[1].endsWith(".trac"))) {
        LoggingUtil.warning("File name doesn't end in expected extension!");
        System.exit(1);
    }
    List<Pair<Reference, TreeSet<Object>>> refs = sortedReferences();
    File references = new File(args[0]);
    try (// 
    FileOutputStream reffo = new FileOutputStream(references);
        OutputStream refstream = new BufferedOutputStream(reffo)) {
        Document refdoc = documentReferences(refs);
        HTMLUtil.writeXHTML(refdoc, refstream);
    } catch (IOException e) {
        LoggingUtil.exception("IO Exception writing HTML output.", e);
        System.exit(1);
    }
    if (args.length > 1) {
        File refwiki = new File(args[1]);
        try (// 
        FileOutputStream reffow = new FileOutputStream(refwiki);
            PrintStream refstreamW = new PrintStream(reffow, false, "UTF-8")) {
            documentReferencesWiki(refs, refstreamW);
        } catch (IOException e) {
            LoggingUtil.exception("IO Exception writing Wiki output.", e);
            System.exit(1);
        }
    }
}
Also used : PrintStream(java.io.PrintStream) FileOutputStream(java.io.FileOutputStream) BufferedOutputStream(java.io.BufferedOutputStream) OutputStream(java.io.OutputStream) FileOutputStream(java.io.FileOutputStream) IOException(java.io.IOException) Document(org.w3c.dom.Document) File(java.io.File) BufferedOutputStream(java.io.BufferedOutputStream) Pair(de.lmu.ifi.dbs.elki.utilities.pairs.Pair)

Example 12 with Pair

use of de.lmu.ifi.dbs.elki.utilities.pairs.Pair in project elki by elki-project.

the class CTLuGLSBackwardSearchAlgorithm method run.

/**
 * Run the algorithm
 *
 * @param database Database to process
 * @param relationx Spatial relation
 * @param relationy Attribute relation
 * @return Algorithm result
 */
public OutlierResult run(Database database, Relation<V> relationx, Relation<? extends NumberVector> relationy) {
    WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relationx.getDBIDs(), DataStoreFactory.HINT_STATIC);
    DoubleMinMax mm = new DoubleMinMax(0.0, 0.0);
    // Outlier detection loop
    {
        ModifiableDBIDs idview = DBIDUtil.newHashSet(relationx.getDBIDs());
        ProxyView<V> proxy = new ProxyView<>(idview, relationx);
        double phialpha = NormalDistribution.standardNormalQuantile(1.0 - alpha * .5);
        // Detect outliers while significant.
        while (true) {
            Pair<DBIDVar, Double> candidate = singleIteration(proxy, relationy);
            if (candidate.second < phialpha) {
                break;
            }
            scores.putDouble(candidate.first, candidate.second);
            if (!Double.isNaN(candidate.second)) {
                mm.put(candidate.second);
            }
            idview.remove(candidate.first);
        }
        // Remaining objects are inliers
        for (DBIDIter iter = idview.iter(); iter.valid(); iter.advance()) {
            scores.putDouble(iter, 0.0);
        }
    }
    DoubleRelation scoreResult = new MaterializedDoubleRelation("GLSSODBackward", "GLSSODbackward-outlier", scores, relationx.getDBIDs());
    OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(mm.getMin(), mm.getMax(), 0, Double.POSITIVE_INFINITY, 0);
    return new OutlierResult(scoreMeta, scoreResult);
}
Also used : ProxyView(de.lmu.ifi.dbs.elki.database.relation.ProxyView) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) Pair(de.lmu.ifi.dbs.elki.utilities.pairs.Pair) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 13 with Pair

use of de.lmu.ifi.dbs.elki.utilities.pairs.Pair in project elki by elki-project.

the class FeatureBagging method run.

/**
 * Run the algorithm on a data set.
 *
 * @param database Database context
 * @param relation Relation to use
 * @return Outlier detection result
 */
public OutlierResult run(Database database, Relation<NumberVector> relation) {
    final int dbdim = RelationUtil.dimensionality(relation);
    final int mindim = dbdim >> 1;
    final int maxdim = dbdim - 1;
    final Random rand = rnd.getSingleThreadedRandom();
    ArrayList<OutlierResult> results = new ArrayList<>(num);
    {
        FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("LOF iterations", num, LOG) : null;
        for (int i = 0; i < num; i++) {
            long[] dimset = randomSubspace(dbdim, mindim, maxdim, rand);
            SubspaceEuclideanDistanceFunction df = new SubspaceEuclideanDistanceFunction(dimset);
            LOF<NumberVector> lof = new LOF<>(k, df);
            // run LOF and collect the result
            OutlierResult result = lof.run(database, relation);
            results.add(result);
            LOG.incrementProcessed(prog);
        }
        LOG.ensureCompleted(prog);
    }
    WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
    DoubleMinMax minmax = new DoubleMinMax();
    if (breadth) {
        FiniteProgress cprog = LOG.isVerbose() ? new FiniteProgress("Combining results", relation.size(), LOG) : null;
        @SuppressWarnings("unchecked") Pair<DBIDIter, DoubleRelation>[] IDVectorOntoScoreVector = (Pair<DBIDIter, DoubleRelation>[]) new Pair[results.size()];
        // Mapping score-sorted DBID-Iterators onto their corresponding scores.
        // We need to initialize them now be able to iterate them "in parallel".
        {
            int i = 0;
            for (OutlierResult r : results) {
                IDVectorOntoScoreVector[i] = new Pair<DBIDIter, DoubleRelation>(r.getOrdering().order(relation.getDBIDs()).iter(), r.getScores());
                i++;
            }
        }
        // Iterating over the *lines* of the AS_t(i)-matrix.
        for (int i = 0; i < relation.size(); i++) {
            // Iterating over the elements of a line (breadth-first).
            for (Pair<DBIDIter, DoubleRelation> pair : IDVectorOntoScoreVector) {
                DBIDIter iter = pair.first;
                // for every DBID).
                if (iter.valid()) {
                    double score = pair.second.doubleValue(iter);
                    if (Double.isNaN(scores.doubleValue(iter))) {
                        scores.putDouble(iter, score);
                        minmax.put(score);
                    }
                    iter.advance();
                } else {
                    LOG.warning("Incomplete result: Iterator does not contain |DB| DBIDs");
                }
            }
            // Progress does not take the initial mapping into account.
            LOG.incrementProcessed(cprog);
        }
        LOG.ensureCompleted(cprog);
    } else {
        FiniteProgress cprog = LOG.isVerbose() ? new FiniteProgress("Combining results", relation.size(), LOG) : null;
        for (DBIDIter iter = relation.iterDBIDs(); iter.valid(); iter.advance()) {
            double sum = 0.0;
            for (OutlierResult r : results) {
                final double s = r.getScores().doubleValue(iter);
                if (!Double.isNaN(s)) {
                    sum += s;
                }
            }
            scores.putDouble(iter, sum);
            minmax.put(sum);
            LOG.incrementProcessed(cprog);
        }
        LOG.ensureCompleted(cprog);
    }
    OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax());
    DoubleRelation scoreres = new MaterializedDoubleRelation("Feature bagging", "fb-outlier", scores, relation.getDBIDs());
    return new OutlierResult(meta, scoreres);
}
Also used : LOF(de.lmu.ifi.dbs.elki.algorithm.outlier.lof.LOF) WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) ArrayList(java.util.ArrayList) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) Random(java.util.Random) SubspaceEuclideanDistanceFunction(de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.SubspaceEuclideanDistanceFunction) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) Pair(de.lmu.ifi.dbs.elki.utilities.pairs.Pair)

Example 14 with Pair

use of de.lmu.ifi.dbs.elki.utilities.pairs.Pair in project elki by elki-project.

the class NaiveMeanShiftClustering method run.

/**
 * Run the mean-shift clustering algorithm.
 *
 * @param database Database
 * @param relation Data relation
 * @return Clustering result
 */
public Clustering<MeanModel> run(Database database, Relation<V> relation) {
    final DistanceQuery<V> distq = database.getDistanceQuery(relation, getDistanceFunction());
    final RangeQuery<V> rangeq = database.getRangeQuery(distq);
    final NumberVector.Factory<V> factory = RelationUtil.getNumberVectorFactory(relation);
    final int dim = RelationUtil.dimensionality(relation);
    // Stopping threshold
    final double threshold = bandwidth * 1E-10;
    // Result store:
    ArrayList<Pair<V, ModifiableDBIDs>> clusters = new ArrayList<>();
    ModifiableDBIDs noise = DBIDUtil.newArray();
    FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Mean-shift clustering", relation.size(), LOG) : null;
    for (DBIDIter iter = relation.iterDBIDs(); iter.valid(); iter.advance()) {
        // Initial position:
        V position = relation.get(iter);
        iterations: for (int j = 1; j <= MAXITER; j++) {
            // Compute new position:
            V newvec = null;
            {
                DoubleDBIDList neigh = rangeq.getRangeForObject(position, bandwidth);
                boolean okay = (neigh.size() > 1) || (neigh.size() >= 1 && j > 1);
                if (okay) {
                    Centroid newpos = new Centroid(dim);
                    for (DoubleDBIDListIter niter = neigh.iter(); niter.valid(); niter.advance()) {
                        final double weight = kernel.density(niter.doubleValue() / bandwidth);
                        newpos.put(relation.get(niter), weight);
                    }
                    newvec = factory.newNumberVector(newpos.getArrayRef());
                // TODO: detect 0 weight!
                }
                if (!okay) {
                    noise.add(iter);
                    break iterations;
                }
            }
            // Test if we are close to one of the known clusters:
            double bestd = Double.POSITIVE_INFINITY;
            Pair<V, ModifiableDBIDs> bestp = null;
            for (Pair<V, ModifiableDBIDs> pair : clusters) {
                final double merged = distq.distance(newvec, pair.first);
                if (merged < bestd) {
                    bestd = merged;
                    bestp = pair;
                }
            }
            // Check for convergence:
            double delta = distq.distance(position, newvec);
            if (bestd < 10 * threshold || bestd * 2 < delta) {
                bestp.second.add(iter);
                break iterations;
            }
            if (j == MAXITER) {
                LOG.warning("No convergence after " + MAXITER + " iterations. Distance: " + delta);
            }
            if (Double.isNaN(delta)) {
                LOG.warning("Encountered NaN distance. Invalid center vector? " + newvec.toString());
                break iterations;
            }
            if (j == MAXITER || delta < threshold) {
                if (LOG.isDebuggingFine()) {
                    LOG.debugFine("New cluster:" + newvec + " delta: " + delta + " threshold: " + threshold + " bestd: " + bestd);
                }
                ArrayModifiableDBIDs cids = DBIDUtil.newArray();
                cids.add(iter);
                clusters.add(new Pair<V, ModifiableDBIDs>(newvec, cids));
                break iterations;
            }
            position = newvec;
        }
        LOG.incrementProcessed(prog);
    }
    LOG.ensureCompleted(prog);
    ArrayList<Cluster<MeanModel>> cs = new ArrayList<>(clusters.size());
    for (Pair<V, ModifiableDBIDs> pair : clusters) {
        cs.add(new Cluster<>(pair.second, new MeanModel(pair.first.toArray())));
    }
    if (noise.size() > 0) {
        cs.add(new Cluster<MeanModel>(noise, true));
    }
    Clustering<MeanModel> c = new Clustering<>("Mean-shift Clustering", "mean-shift-clustering", cs);
    return c;
}
Also used : ArrayList(java.util.ArrayList) MeanModel(de.lmu.ifi.dbs.elki.data.model.MeanModel) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) DoubleDBIDList(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList) Pair(de.lmu.ifi.dbs.elki.utilities.pairs.Pair) DoubleDBIDListIter(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) Cluster(de.lmu.ifi.dbs.elki.data.Cluster) Clustering(de.lmu.ifi.dbs.elki.data.Clustering) Centroid(de.lmu.ifi.dbs.elki.math.linearalgebra.Centroid) NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)

Example 15 with Pair

use of de.lmu.ifi.dbs.elki.utilities.pairs.Pair in project elki by elki-project.

the class PROCLUS method findDimensions.

/**
 * Refinement step that determines the set of correlated dimensions for each
 * cluster centroid.
 *
 * @param clusters the list of clusters
 * @param database the database containing the objects
 * @return the set of correlated dimensions for each specified cluster
 *         centroid
 */
private List<Pair<double[], long[]>> findDimensions(ArrayList<PROCLUSCluster> clusters, Relation<V> database) {
    // compute x_ij = avg distance from points in c_i to c_i.centroid
    final int dim = RelationUtil.dimensionality(database);
    final int numc = clusters.size();
    double[][] averageDistances = new double[numc][];
    for (int i = 0; i < numc; i++) {
        PROCLUSCluster c_i = clusters.get(i);
        double[] x_i = new double[dim];
        for (DBIDIter iter = c_i.objectIDs.iter(); iter.valid(); iter.advance()) {
            V o = database.get(iter);
            for (int d = 0; d < dim; d++) {
                x_i[d] += Math.abs(c_i.centroid[d] - o.doubleValue(d));
            }
        }
        for (int d = 0; d < dim; d++) {
            x_i[d] /= c_i.objectIDs.size();
        }
        averageDistances[i] = x_i;
    }
    List<DoubleIntInt> z_ijs = computeZijs(averageDistances, dim);
    long[][] dimensionMap = computeDimensionMap(z_ijs, dim, numc);
    // mapping cluster -> dimensions
    List<Pair<double[], long[]>> result = new ArrayList<>(numc);
    for (int i = 0; i < numc; i++) {
        long[] dims_i = dimensionMap[i];
        if (dims_i == null) {
            continue;
        }
        result.add(new Pair<>(clusters.get(i).centroid, dims_i));
    }
    return result;
}
Also used : ArrayList(java.util.ArrayList) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) Pair(de.lmu.ifi.dbs.elki.utilities.pairs.Pair)

Aggregations

Pair (de.lmu.ifi.dbs.elki.utilities.pairs.Pair)16 ArrayList (java.util.ArrayList)10 ArrayModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs)6 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)6 ModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)6 List (java.util.List)4 Cluster (de.lmu.ifi.dbs.elki.data.Cluster)3 Clustering (de.lmu.ifi.dbs.elki.data.Clustering)3 NumberVector (de.lmu.ifi.dbs.elki.data.NumberVector)3 Subspace (de.lmu.ifi.dbs.elki.data.Subspace)3 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)3 IOException (java.io.IOException)3 SubspaceModel (de.lmu.ifi.dbs.elki.data.model.SubspaceModel)2 WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)2 DBIDVar (de.lmu.ifi.dbs.elki.database.ids.DBIDVar)2 DoubleRelation (de.lmu.ifi.dbs.elki.database.relation.DoubleRelation)2 MaterializedDoubleRelation (de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)2 StepProgress (de.lmu.ifi.dbs.elki.logging.progress.StepProgress)2 DoubleMinMax (de.lmu.ifi.dbs.elki.math.DoubleMinMax)2 BasicOutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta)2