Search in sources :

Example 16 with NumberVector

use of de.lmu.ifi.dbs.elki.data.NumberVector in project elki by elki-project.

the class AbstractLayout3DPC method computeSimilarityMatrix.

/**
 * Compute a column-wise dependency matrix for the given relation.
 *
 * @param sim Dependence measure
 * @param rel Vector relation
 * @return Similarity matrix (lower triangular form)
 */
public static double[] computeSimilarityMatrix(DependenceMeasure sim, Relation<? extends NumberVector> rel) {
    final int dim = RelationUtil.dimensionality(rel);
    final int size = rel.size();
    // TODO: we could use less memory (no copy), but this would likely be
    // slower. Maybe as a fallback option?
    double[][] data = new double[dim][size];
    int r = 0;
    for (DBIDIter it = rel.iterDBIDs(); it.valid(); it.advance(), r++) {
        NumberVector v = rel.get(it);
        for (int d = 0; d < dim; d++) {
            data[d][r] = v.doubleValue(d);
        }
    }
    return sim.dependence(DoubleArrayAdapter.STATIC, Arrays.asList(data));
}
Also used : NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 17 with NumberVector

use of de.lmu.ifi.dbs.elki.data.NumberVector in project elki by elki-project.

the class VisualizePairwiseGainMatrix method run.

@Override
public void run() {
    final Database database = inputstep.getDatabase();
    ResultHierarchy hier = database.getHierarchy();
    Relation<NumberVector> relation = database.getRelation(TypeUtil.NUMBER_VECTOR_FIELD);
    final Relation<String> labels = DatabaseUtil.guessLabelRepresentation(database);
    final DBID firstid = DBIDUtil.deref(labels.iterDBIDs());
    final String firstlabel = labels.get(firstid);
    if (!firstlabel.matches(".*by.?label.*")) {
        throw new AbortException("No 'by label' reference outlier found, which is needed for weighting!");
    }
    relation = GreedyEnsembleExperiment.applyPrescaling(prescaling, relation, firstid);
    // Dimensionality and reference vector
    final int dim = RelationUtil.dimensionality(relation);
    final NumberVector refvec = relation.get(firstid);
    // Build the truth vector
    VectorNonZero pos = new VectorNonZero(refvec);
    ArrayModifiableDBIDs ids = DBIDUtil.newArray(relation.getDBIDs());
    ids.remove(firstid);
    ids.sort();
    final int size = ids.size();
    double[][] data = new double[size][size];
    DoubleMinMax minmax = new DoubleMinMax(), commax = new DoubleMinMax();
    {
        FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Computing ensemble gain.", size * (size + 1) >> 1, LOG) : null;
        // Vote combination buffer.
        double[] buf = new double[2];
        int a = 0;
        for (DBIDIter id = ids.iter(); id.valid(); id.advance(), a++) {
            final NumberVector veca = relation.get(id);
            // Direct AUC score:
            {
                double auc = ROCEvaluation.computeROCAUC(pos, new DecreasingVectorIter(veca));
                data[a][a] = auc;
                // minmax.put(auc);
                LOG.incrementProcessed(prog);
            }
            // Compare to others, exploiting symmetry
            DBIDArrayIter id2 = ids.iter();
            id2.seek(a + 1);
            for (int b = a + 1; b < size; b++, id2.advance()) {
                final NumberVector vecb = relation.get(id2);
                double[] combined = new double[dim];
                for (int d = 0; d < dim; d++) {
                    buf[0] = veca.doubleValue(d);
                    buf[1] = vecb.doubleValue(d);
                    combined[d] = voting.combine(buf);
                }
                double auc = ROCEvaluation.computeROCAUC(pos, new DecreasingVectorIter(DoubleVector.wrap(combined)));
                // logger.verbose(auc + " " + labels.get(ids.get(a)) + " " +
                // labels.get(ids.get(b)));
                data[a][b] = auc;
                data[b][a] = auc;
                commax.put(data[a][b]);
                // minmax.put(auc);
                LOG.incrementProcessed(prog);
            }
        }
        LOG.ensureCompleted(prog);
    }
    for (int a = 0; a < size; a++) {
        for (int b = a + 1; b < size; b++) {
            double ref = Math.max(data[a][a], data[b][b]);
            data[a][b] = (data[a][b] - ref) / (1 - ref);
            data[b][a] = (data[b][a] - ref) / (1 - ref);
            // logger.verbose(data[a][b] + " " + labels.get(ids.get(a)) + " " +
            // labels.get(ids.get(b)));
            minmax.put(data[a][b]);
        }
    }
    for (int a = 0; a < size; a++) {
        data[a][a] = 0;
    }
    LOG.verbose("Gain: " + minmax.toString() + " AUC: " + commax.toString());
    boolean hasneg = (minmax.getMin() < -1E-3);
    LinearScaling scale;
    if (!hasneg) {
        scale = LinearScaling.fromMinMax(0., minmax.getMax());
    } else {
        scale = LinearScaling.fromMinMax(0.0, Math.max(minmax.getMax(), -minmax.getMin()));
    }
    scale = LinearScaling.fromMinMax(0., .5);
    BufferedImage img = new BufferedImage(size, size, BufferedImage.TYPE_INT_RGB);
    for (int x = 0; x < size; x++) {
        for (int y = x; y < size; y++) {
            double val = data[x][y];
            val = Math.max(-1, Math.min(1., scale.getScaled(val)));
            // Compute color:
            final int col;
            {
                if (val >= 0) {
                    int ival = 0xFF & (int) (255 * val);
                    col = 0xff000000 | (ival << 8);
                } else {
                    int ival = 0xFF & (int) (255 * -val);
                    col = 0xff000000 | (ival << 16);
                }
            }
            img.setRGB(x, y, col);
            img.setRGB(y, x, col);
        }
    }
    SimilarityMatrix smat = new ComputeSimilarityMatrixImage.SimilarityMatrix(img, relation, ids);
    hier.add(database, smat);
    VisualizerContext context = vispar.newContext(hier, smat);
    // Attach visualizers to results
    SimilarityMatrixVisualizer factory = new SimilarityMatrixVisualizer();
    factory.processNewResult(context, database);
    VisualizationTree.findVis(context).filter(VisualizationTask.class).forEach(task -> {
        if (task.getFactory() == factory) {
            showVisualization(context, factory, task);
        }
    });
}
Also used : DecreasingVectorIter(de.lmu.ifi.dbs.elki.evaluation.scores.adapter.DecreasingVectorIter) SimilarityMatrix(de.lmu.ifi.dbs.elki.evaluation.similaritymatrix.ComputeSimilarityMatrixImage.SimilarityMatrix) BufferedImage(java.awt.image.BufferedImage) SimilarityMatrixVisualizer(de.lmu.ifi.dbs.elki.visualization.visualizers.visunproj.SimilarityMatrixVisualizer) LinearScaling(de.lmu.ifi.dbs.elki.utilities.scaling.LinearScaling) VisualizationTask(de.lmu.ifi.dbs.elki.visualization.VisualizationTask) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) Database(de.lmu.ifi.dbs.elki.database.Database) ResultHierarchy(de.lmu.ifi.dbs.elki.result.ResultHierarchy) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) VisualizerContext(de.lmu.ifi.dbs.elki.visualization.VisualizerContext) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException) VectorNonZero(de.lmu.ifi.dbs.elki.evaluation.scores.adapter.VectorNonZero)

Example 18 with NumberVector

use of de.lmu.ifi.dbs.elki.data.NumberVector in project elki by elki-project.

the class DiSH method isParent.

/**
 * Returns true, if the specified parent cluster is a parent of one child of
 * the children clusters.
 *
 * @param relation the database containing the objects
 * @param parent the parent to be tested
 * @param iter the list of children to be tested
 * @param db_dim Database dimensionality
 * @return true, if the specified parent cluster is a parent of one child of
 *         the children clusters, false otherwise
 */
private boolean isParent(Relation<V> relation, Cluster<SubspaceModel> parent, It<Cluster<SubspaceModel>> iter, int db_dim) {
    Subspace s_p = parent.getModel().getSubspace();
    NumberVector parent_centroid = ProjectedCentroid.make(s_p.getDimensions(), relation, parent.getIDs());
    int subspaceDim_parent = db_dim - s_p.dimensionality();
    for (; iter.valid(); iter.advance()) {
        Cluster<SubspaceModel> child = iter.get();
        Subspace s_c = child.getModel().getSubspace();
        NumberVector child_centroid = ProjectedCentroid.make(s_c.getDimensions(), relation, child.getIDs());
        long[] commonPreferenceVector = BitsUtil.andCMin(s_p.getDimensions(), s_c.getDimensions());
        int subspaceDim = subspaceDimensionality(parent_centroid, child_centroid, s_p.getDimensions(), s_c.getDimensions(), commonPreferenceVector);
        if (subspaceDim == subspaceDim_parent) {
            return true;
        }
    }
    return false;
}
Also used : NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) Subspace(de.lmu.ifi.dbs.elki.data.Subspace) SubspaceModel(de.lmu.ifi.dbs.elki.data.model.SubspaceModel)

Example 19 with NumberVector

use of de.lmu.ifi.dbs.elki.data.NumberVector in project elki by elki-project.

the class AddSingleScale method run.

/**
 * Add scales to a single vector relation.
 *
 * @param rel Relation
 * @return Scales
 */
private ScalesResult run(Relation<? extends NumberVector> rel) {
    final int dim = RelationUtil.dimensionality(rel);
    LinearScale[] scales = new LinearScale[dim];
    if (minmax == null) {
        DoubleMinMax mm = new DoubleMinMax();
        for (DBIDIter iditer = rel.iterDBIDs(); iditer.valid(); iditer.advance()) {
            NumberVector vec = rel.get(iditer);
            for (int d = 0; d < dim; d++) {
                final double val = vec.doubleValue(d);
                if (val != val) {
                    // NaN
                    continue;
                }
                mm.put(val);
            }
        }
        LinearScale scale = new LinearScale(mm.getMin(), mm.getMax());
        for (int i = 0; i < dim; i++) {
            scales[i] = scale;
        }
    } else {
        // Use predefined.
        LinearScale scale = new LinearScale(minmax[0], minmax[1]);
        for (int i = 0; i < dim; i++) {
            scales[i] = scale;
        }
    }
    ScalesResult res = new ScalesResult(scales);
    return res;
}
Also used : LinearScale(de.lmu.ifi.dbs.elki.math.scales.LinearScale) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) ScalesResult(de.lmu.ifi.dbs.elki.result.ScalesResult) NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) ListSizeConstraint(de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.ListSizeConstraint) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 20 with NumberVector

use of de.lmu.ifi.dbs.elki.data.NumberVector in project elki by elki-project.

the class PassingDataToELKI method main.

/**
 * Main method
 *
 * @param args Command line parameters (not supported)
 */
public static void main(String[] args) {
    // Set the logging level to statistics:
    LoggingConfiguration.setStatistics();
    // Generate a random data set.
    // Note: ELKI has a nice data generator class, use that instead.
    double[][] data = new double[1000][2];
    for (int i = 0; i < data.length; i++) {
        for (int j = 0; j < data[i].length; j++) {
            data[i][j] = Math.random();
        }
    }
    // Adapter to load data from an existing array.
    DatabaseConnection dbc = new ArrayAdapterDatabaseConnection(data);
    // Create a database (which may contain multiple relations!)
    Database db = new StaticArrayDatabase(dbc, null);
    // Load the data into the database (do NOT forget to initialize...)
    db.initialize();
    // Relation containing the number vectors:
    Relation<NumberVector> rel = db.getRelation(TypeUtil.NUMBER_VECTOR_FIELD);
    // We know that the ids must be a continuous range:
    DBIDRange ids = (DBIDRange) rel.getDBIDs();
    // K-means should be used with squared Euclidean (least squares):
    SquaredEuclideanDistanceFunction dist = SquaredEuclideanDistanceFunction.STATIC;
    // Default initialization, using global random:
    // To fix the random seed, use: new RandomFactory(seed);
    RandomlyGeneratedInitialMeans init = new RandomlyGeneratedInitialMeans(RandomFactory.DEFAULT);
    // Textbook k-means clustering:
    KMeansLloyd<NumberVector> km = new // 
    KMeansLloyd<>(// 
    dist, // 
    3, /* k - number of partitions */
    0, /* maximum number of iterations: no limit */
    init);
    // K-means will automatically choose a numerical relation from the data set:
    // But we could make it explicit (if there were more than one numeric
    // relation!): km.run(db, rel);
    Clustering<KMeansModel> c = km.run(db);
    // Output all clusters:
    int i = 0;
    for (Cluster<KMeansModel> clu : c.getAllClusters()) {
        // K-means will name all clusters "Cluster" in lack of noise support:
        System.out.println("#" + i + ": " + clu.getNameAutomatic());
        System.out.println("Size: " + clu.size());
        System.out.println("Center: " + clu.getModel().getPrototype().toString());
        // Iterate over objects:
        System.out.print("Objects: ");
        for (DBIDIter it = clu.getIDs().iter(); it.valid(); it.advance()) {
            // To get the vector use:
            // NumberVector v = rel.get(it);
            // Offset within our DBID range: "line number"
            final int offset = ids.getOffset(it);
            System.out.print(" " + offset);
        // Do NOT rely on using "internalGetIndex()" directly!
        }
        System.out.println();
        ++i;
    }
}
Also used : KMeansLloyd(de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.KMeansLloyd) KMeansModel(de.lmu.ifi.dbs.elki.data.model.KMeansModel) RandomlyGeneratedInitialMeans(de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.initialization.RandomlyGeneratedInitialMeans) ArrayAdapterDatabaseConnection(de.lmu.ifi.dbs.elki.datasource.ArrayAdapterDatabaseConnection) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) SquaredEuclideanDistanceFunction(de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.SquaredEuclideanDistanceFunction) Database(de.lmu.ifi.dbs.elki.database.Database) StaticArrayDatabase(de.lmu.ifi.dbs.elki.database.StaticArrayDatabase) DBIDRange(de.lmu.ifi.dbs.elki.database.ids.DBIDRange) ArrayAdapterDatabaseConnection(de.lmu.ifi.dbs.elki.datasource.ArrayAdapterDatabaseConnection) DatabaseConnection(de.lmu.ifi.dbs.elki.datasource.DatabaseConnection) StaticArrayDatabase(de.lmu.ifi.dbs.elki.database.StaticArrayDatabase)

Aggregations

NumberVector (de.lmu.ifi.dbs.elki.data.NumberVector)85 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)40 ArrayList (java.util.ArrayList)16 LongStatistic (de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic)9 DoubleVector (de.lmu.ifi.dbs.elki.data.DoubleVector)8 MultipleObjectsBundle (de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle)8 AbortException (de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)8 Database (de.lmu.ifi.dbs.elki.database.Database)7 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)7 DoubleStatistic (de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic)7 Random (java.util.Random)7 Test (org.junit.Test)7 VectorFieldTypeInformation (de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation)5 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)5 MeanVariance (de.lmu.ifi.dbs.elki.math.MeanVariance)5 EvaluationResult (de.lmu.ifi.dbs.elki.result.EvaluationResult)5 MeasurementGroup (de.lmu.ifi.dbs.elki.result.EvaluationResult.MeasurementGroup)5 List (java.util.List)5 SparseNumberVector (de.lmu.ifi.dbs.elki.data.SparseNumberVector)4 RandomProjectionFamily (de.lmu.ifi.dbs.elki.data.projection.random.RandomProjectionFamily)4