Search in sources :

Example 91 with Database

use of de.lmu.ifi.dbs.elki.database.Database in project elki by elki-project.

the class GiniIndexTest method testToyExample.

@Test
public void testToyExample() {
    Database db = loadTransactions(UNITTEST + "itemsets/increasing5.txt", 5);
    AssociationRuleResult res = // 
    new ELKIBuilder<>(AssociationRuleGeneration.class).with(FPGrowth.Parameterizer.MINSUPP_ID, // 
    1).with(AssociationRuleGeneration.Parameterizer.MINMEASURE_ID, // 
    0.2).with(AssociationRuleGeneration.Parameterizer.INTERESTMEASURE_ID, // 
    GiniIndex.class).build().run(db);
    assertEquals("Size not as expected.", 18, res.getRules().size());
}
Also used : AssociationRuleResult(de.lmu.ifi.dbs.elki.result.AssociationRuleResult) AssociationRuleGeneration(de.lmu.ifi.dbs.elki.algorithm.itemsetmining.associationrules.AssociationRuleGeneration) Database(de.lmu.ifi.dbs.elki.database.Database) Test(org.junit.Test) AbstractFrequentItemsetAlgorithmTest(de.lmu.ifi.dbs.elki.algorithm.itemsetmining.AbstractFrequentItemsetAlgorithmTest)

Example 92 with Database

use of de.lmu.ifi.dbs.elki.database.Database in project elki by elki-project.

the class VisualizePairwiseGainMatrix method run.

@Override
public void run() {
    final Database database = inputstep.getDatabase();
    ResultHierarchy hier = database.getHierarchy();
    Relation<NumberVector> relation = database.getRelation(TypeUtil.NUMBER_VECTOR_FIELD);
    final Relation<String> labels = DatabaseUtil.guessLabelRepresentation(database);
    final DBID firstid = DBIDUtil.deref(labels.iterDBIDs());
    final String firstlabel = labels.get(firstid);
    if (!firstlabel.matches(".*by.?label.*")) {
        throw new AbortException("No 'by label' reference outlier found, which is needed for weighting!");
    }
    relation = GreedyEnsembleExperiment.applyPrescaling(prescaling, relation, firstid);
    // Dimensionality and reference vector
    final int dim = RelationUtil.dimensionality(relation);
    final NumberVector refvec = relation.get(firstid);
    // Build the truth vector
    VectorNonZero pos = new VectorNonZero(refvec);
    ArrayModifiableDBIDs ids = DBIDUtil.newArray(relation.getDBIDs());
    ids.remove(firstid);
    ids.sort();
    final int size = ids.size();
    double[][] data = new double[size][size];
    DoubleMinMax minmax = new DoubleMinMax(), commax = new DoubleMinMax();
    {
        FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Computing ensemble gain.", size * (size + 1) >> 1, LOG) : null;
        // Vote combination buffer.
        double[] buf = new double[2];
        int a = 0;
        for (DBIDIter id = ids.iter(); id.valid(); id.advance(), a++) {
            final NumberVector veca = relation.get(id);
            // Direct AUC score:
            {
                double auc = ROCEvaluation.computeROCAUC(pos, new DecreasingVectorIter(veca));
                data[a][a] = auc;
                // minmax.put(auc);
                LOG.incrementProcessed(prog);
            }
            // Compare to others, exploiting symmetry
            DBIDArrayIter id2 = ids.iter();
            id2.seek(a + 1);
            for (int b = a + 1; b < size; b++, id2.advance()) {
                final NumberVector vecb = relation.get(id2);
                double[] combined = new double[dim];
                for (int d = 0; d < dim; d++) {
                    buf[0] = veca.doubleValue(d);
                    buf[1] = vecb.doubleValue(d);
                    combined[d] = voting.combine(buf);
                }
                double auc = ROCEvaluation.computeROCAUC(pos, new DecreasingVectorIter(DoubleVector.wrap(combined)));
                // logger.verbose(auc + " " + labels.get(ids.get(a)) + " " +
                // labels.get(ids.get(b)));
                data[a][b] = auc;
                data[b][a] = auc;
                commax.put(data[a][b]);
                // minmax.put(auc);
                LOG.incrementProcessed(prog);
            }
        }
        LOG.ensureCompleted(prog);
    }
    for (int a = 0; a < size; a++) {
        for (int b = a + 1; b < size; b++) {
            double ref = Math.max(data[a][a], data[b][b]);
            data[a][b] = (data[a][b] - ref) / (1 - ref);
            data[b][a] = (data[b][a] - ref) / (1 - ref);
            // logger.verbose(data[a][b] + " " + labels.get(ids.get(a)) + " " +
            // labels.get(ids.get(b)));
            minmax.put(data[a][b]);
        }
    }
    for (int a = 0; a < size; a++) {
        data[a][a] = 0;
    }
    LOG.verbose("Gain: " + minmax.toString() + " AUC: " + commax.toString());
    boolean hasneg = (minmax.getMin() < -1E-3);
    LinearScaling scale;
    if (!hasneg) {
        scale = LinearScaling.fromMinMax(0., minmax.getMax());
    } else {
        scale = LinearScaling.fromMinMax(0.0, Math.max(minmax.getMax(), -minmax.getMin()));
    }
    scale = LinearScaling.fromMinMax(0., .5);
    BufferedImage img = new BufferedImage(size, size, BufferedImage.TYPE_INT_RGB);
    for (int x = 0; x < size; x++) {
        for (int y = x; y < size; y++) {
            double val = data[x][y];
            val = Math.max(-1, Math.min(1., scale.getScaled(val)));
            // Compute color:
            final int col;
            {
                if (val >= 0) {
                    int ival = 0xFF & (int) (255 * val);
                    col = 0xff000000 | (ival << 8);
                } else {
                    int ival = 0xFF & (int) (255 * -val);
                    col = 0xff000000 | (ival << 16);
                }
            }
            img.setRGB(x, y, col);
            img.setRGB(y, x, col);
        }
    }
    SimilarityMatrix smat = new ComputeSimilarityMatrixImage.SimilarityMatrix(img, relation, ids);
    hier.add(database, smat);
    VisualizerContext context = vispar.newContext(hier, smat);
    // Attach visualizers to results
    SimilarityMatrixVisualizer factory = new SimilarityMatrixVisualizer();
    factory.processNewResult(context, database);
    VisualizationTree.findVis(context).filter(VisualizationTask.class).forEach(task -> {
        if (task.getFactory() == factory) {
            showVisualization(context, factory, task);
        }
    });
}
Also used : DecreasingVectorIter(de.lmu.ifi.dbs.elki.evaluation.scores.adapter.DecreasingVectorIter) SimilarityMatrix(de.lmu.ifi.dbs.elki.evaluation.similaritymatrix.ComputeSimilarityMatrixImage.SimilarityMatrix) BufferedImage(java.awt.image.BufferedImage) SimilarityMatrixVisualizer(de.lmu.ifi.dbs.elki.visualization.visualizers.visunproj.SimilarityMatrixVisualizer) LinearScaling(de.lmu.ifi.dbs.elki.utilities.scaling.LinearScaling) VisualizationTask(de.lmu.ifi.dbs.elki.visualization.VisualizationTask) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) Database(de.lmu.ifi.dbs.elki.database.Database) ResultHierarchy(de.lmu.ifi.dbs.elki.result.ResultHierarchy) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) VisualizerContext(de.lmu.ifi.dbs.elki.visualization.VisualizerContext) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException) VectorNonZero(de.lmu.ifi.dbs.elki.evaluation.scores.adapter.VectorNonZero)

Example 93 with Database

use of de.lmu.ifi.dbs.elki.database.Database in project elki by elki-project.

the class SpacefillingKNNPreprocessorTest method testGaussian.

@Test
public void testGaussian() {
    Database db = AbstractSimpleAlgorithmTest.makeSimpleDatabase(dataset, shoulds);
    Relation<DoubleVector> rel = db.getRelation(TypeUtil.DOUBLE_VECTOR_FIELD);
    DistanceQuery<DoubleVector> distanceQuery = db.getDistanceQuery(rel, EuclideanDistanceFunction.STATIC);
    // get linear queries
    LinearScanDistanceKNNQuery<DoubleVector> lin_knn_query = new LinearScanDistanceKNNQuery<>(distanceQuery);
    // get preprocessed queries
    ListParameterization config = new ListParameterization();
    // 
    config.addParameter(// 
    SpacefillingKNNPreprocessor.Factory.Parameterizer.CURVES_ID, // 
    HilbertSpatialSorter.class.getName() + "," + PeanoSpatialSorter.class.getName() + "," + ZCurveSpatialSorter.class.getName() + "," + BinarySplitSpatialSorter.class.getName());
    config.addParameter(SpacefillingKNNPreprocessor.Factory.Parameterizer.DIM_ID, 7);
    config.addParameter(SpacefillingKNNPreprocessor.Factory.Parameterizer.PROJECTION_ID, GaussianRandomProjectionFamily.class);
    config.addParameter(SpacefillingKNNPreprocessor.Factory.Parameterizer.VARIANTS_ID, 10);
    config.addParameter(SpacefillingKNNPreprocessor.Factory.Parameterizer.WINDOW_ID, 5.);
    config.addParameter(SpacefillingKNNPreprocessor.Factory.Parameterizer.RANDOM_ID, 0L);
    config.addParameter(GaussianRandomProjectionFamily.Parameterizer.RANDOM_ID, 0L);
    SpacefillingKNNPreprocessor.Factory<DoubleVector> preprocf = ClassGenericsUtil.parameterizeOrAbort(SpacefillingKNNPreprocessor.Factory.class, config);
    SpacefillingKNNPreprocessor<DoubleVector> preproc = preprocf.instantiate(rel);
    preproc.initialize();
    // add as index
    db.getHierarchy().add(rel, preproc);
    KNNQuery<DoubleVector> preproc_knn_query = preproc.getKNNQuery(distanceQuery, k);
    assertFalse("Preprocessor knn query class incorrect.", preproc_knn_query instanceof LinearScanDistanceKNNQuery);
    // test queries
    testKNNQueries(rel, lin_knn_query, preproc_knn_query, k);
    // also test partial queries, forward only
    testKNNQueries(rel, lin_knn_query, preproc_knn_query, k / 2);
}
Also used : SpacefillingKNNPreprocessor(de.lmu.ifi.dbs.elki.index.preprocessed.knn.SpacefillingKNNPreprocessor) LinearScanDistanceKNNQuery(de.lmu.ifi.dbs.elki.database.query.knn.LinearScanDistanceKNNQuery) Database(de.lmu.ifi.dbs.elki.database.Database) DoubleVector(de.lmu.ifi.dbs.elki.data.DoubleVector) HilbertSpatialSorter(de.lmu.ifi.dbs.elki.math.spacefillingcurves.HilbertSpatialSorter) ListParameterization(de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ListParameterization) Test(org.junit.Test) AbstractSimpleAlgorithmTest(de.lmu.ifi.dbs.elki.algorithm.AbstractSimpleAlgorithmTest)

Example 94 with Database

use of de.lmu.ifi.dbs.elki.database.Database in project elki by elki-project.

the class SpacefillingKNNPreprocessorTest method testHenzinger.

@Test
public void testHenzinger() {
    Database db = AbstractSimpleAlgorithmTest.makeSimpleDatabase(dataset, shoulds);
    Relation<DoubleVector> rel = db.getRelation(TypeUtil.DOUBLE_VECTOR_FIELD);
    DistanceQuery<DoubleVector> distanceQuery = db.getDistanceQuery(rel, EuclideanDistanceFunction.STATIC);
    // get linear queries
    LinearScanDistanceKNNQuery<DoubleVector> lin_knn_query = new LinearScanDistanceKNNQuery<>(distanceQuery);
    // get preprocessed queries
    ListParameterization config = new ListParameterization();
    // 
    config.addParameter(// 
    SpacefillingKNNPreprocessor.Factory.Parameterizer.CURVES_ID, // 
    HilbertSpatialSorter.class.getName() + "," + PeanoSpatialSorter.class.getName() + "," + ZCurveSpatialSorter.class.getName() + "," + BinarySplitSpatialSorter.class.getName());
    config.addParameter(SpacefillingKNNPreprocessor.Factory.Parameterizer.DIM_ID, 7);
    config.addParameter(SpacefillingKNNPreprocessor.Factory.Parameterizer.PROJECTION_ID, SimplifiedRandomHyperplaneProjectionFamily.class);
    config.addParameter(SpacefillingKNNPreprocessor.Factory.Parameterizer.VARIANTS_ID, 10);
    config.addParameter(SpacefillingKNNPreprocessor.Factory.Parameterizer.WINDOW_ID, 5.);
    config.addParameter(SpacefillingKNNPreprocessor.Factory.Parameterizer.RANDOM_ID, 0L);
    config.addParameter(SimplifiedRandomHyperplaneProjectionFamily.Parameterizer.RANDOM_ID, 1L);
    SpacefillingKNNPreprocessor.Factory<DoubleVector> preprocf = ClassGenericsUtil.parameterizeOrAbort(SpacefillingKNNPreprocessor.Factory.class, config);
    SpacefillingKNNPreprocessor<DoubleVector> preproc = preprocf.instantiate(rel);
    preproc.initialize();
    // add as index
    db.getHierarchy().add(rel, preproc);
    KNNQuery<DoubleVector> preproc_knn_query = preproc.getKNNQuery(distanceQuery, k);
    assertFalse("Preprocessor knn query class incorrect.", preproc_knn_query instanceof LinearScanDistanceKNNQuery);
    // test queries
    testKNNQueries(rel, lin_knn_query, preproc_knn_query, k);
    // also test partial queries, forward only
    testKNNQueries(rel, lin_knn_query, preproc_knn_query, k / 2);
}
Also used : SpacefillingKNNPreprocessor(de.lmu.ifi.dbs.elki.index.preprocessed.knn.SpacefillingKNNPreprocessor) LinearScanDistanceKNNQuery(de.lmu.ifi.dbs.elki.database.query.knn.LinearScanDistanceKNNQuery) Database(de.lmu.ifi.dbs.elki.database.Database) DoubleVector(de.lmu.ifi.dbs.elki.data.DoubleVector) HilbertSpatialSorter(de.lmu.ifi.dbs.elki.math.spacefillingcurves.HilbertSpatialSorter) ListParameterization(de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ListParameterization) Test(org.junit.Test) AbstractSimpleAlgorithmTest(de.lmu.ifi.dbs.elki.algorithm.AbstractSimpleAlgorithmTest)

Example 95 with Database

use of de.lmu.ifi.dbs.elki.database.Database in project elki by elki-project.

the class SpacefillingKNNPreprocessorTest method testAchlioptas.

@Test
public void testAchlioptas() {
    Database db = AbstractSimpleAlgorithmTest.makeSimpleDatabase(dataset, shoulds);
    Relation<DoubleVector> rel = db.getRelation(TypeUtil.DOUBLE_VECTOR_FIELD);
    DistanceQuery<DoubleVector> distanceQuery = db.getDistanceQuery(rel, EuclideanDistanceFunction.STATIC);
    // get linear queries
    LinearScanDistanceKNNQuery<DoubleVector> lin_knn_query = new LinearScanDistanceKNNQuery<>(distanceQuery);
    // get preprocessed queries
    ListParameterization config = new ListParameterization();
    // 
    config.addParameter(// 
    SpacefillingKNNPreprocessor.Factory.Parameterizer.CURVES_ID, // 
    HilbertSpatialSorter.class.getName() + "," + PeanoSpatialSorter.class.getName() + "," + ZCurveSpatialSorter.class.getName() + "," + BinarySplitSpatialSorter.class.getName());
    config.addParameter(SpacefillingKNNPreprocessor.Factory.Parameterizer.DIM_ID, 7);
    config.addParameter(SpacefillingKNNPreprocessor.Factory.Parameterizer.PROJECTION_ID, AchlioptasRandomProjectionFamily.class);
    config.addParameter(SpacefillingKNNPreprocessor.Factory.Parameterizer.VARIANTS_ID, 10);
    config.addParameter(SpacefillingKNNPreprocessor.Factory.Parameterizer.WINDOW_ID, 5.);
    config.addParameter(SpacefillingKNNPreprocessor.Factory.Parameterizer.RANDOM_ID, 0L);
    config.addParameter(AchlioptasRandomProjectionFamily.Parameterizer.RANDOM_ID, 0L);
    SpacefillingKNNPreprocessor.Factory<DoubleVector> preprocf = ClassGenericsUtil.parameterizeOrAbort(SpacefillingKNNPreprocessor.Factory.class, config);
    SpacefillingKNNPreprocessor<DoubleVector> preproc = preprocf.instantiate(rel);
    preproc.initialize();
    // add as index
    db.getHierarchy().add(rel, preproc);
    KNNQuery<DoubleVector> preproc_knn_query = preproc.getKNNQuery(distanceQuery, k);
    assertFalse("Preprocessor knn query class incorrect.", preproc_knn_query instanceof LinearScanDistanceKNNQuery);
    // test queries
    testKNNQueries(rel, lin_knn_query, preproc_knn_query, k);
    // also test partial queries, forward only
    testKNNQueries(rel, lin_knn_query, preproc_knn_query, k / 2);
}
Also used : SpacefillingKNNPreprocessor(de.lmu.ifi.dbs.elki.index.preprocessed.knn.SpacefillingKNNPreprocessor) LinearScanDistanceKNNQuery(de.lmu.ifi.dbs.elki.database.query.knn.LinearScanDistanceKNNQuery) Database(de.lmu.ifi.dbs.elki.database.Database) DoubleVector(de.lmu.ifi.dbs.elki.data.DoubleVector) HilbertSpatialSorter(de.lmu.ifi.dbs.elki.math.spacefillingcurves.HilbertSpatialSorter) ListParameterization(de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ListParameterization) Test(org.junit.Test) AbstractSimpleAlgorithmTest(de.lmu.ifi.dbs.elki.algorithm.AbstractSimpleAlgorithmTest)

Aggregations

Database (de.lmu.ifi.dbs.elki.database.Database)288 Test (org.junit.Test)240 AbstractClusterAlgorithmTest (de.lmu.ifi.dbs.elki.algorithm.clustering.AbstractClusterAlgorithmTest)151 ELKIBuilder (de.lmu.ifi.dbs.elki.utilities.ELKIBuilder)102 DoubleVector (de.lmu.ifi.dbs.elki.data.DoubleVector)85 OutlierResult (de.lmu.ifi.dbs.elki.result.outlier.OutlierResult)69 AbstractOutlierAlgorithmTest (de.lmu.ifi.dbs.elki.algorithm.outlier.AbstractOutlierAlgorithmTest)50 Model (de.lmu.ifi.dbs.elki.data.model.Model)29 CutDendrogramByNumberOfClusters (de.lmu.ifi.dbs.elki.algorithm.clustering.hierarchical.extraction.CutDendrogramByNumberOfClusters)26 Clustering (de.lmu.ifi.dbs.elki.data.Clustering)14 StaticArrayDatabase (de.lmu.ifi.dbs.elki.database.StaticArrayDatabase)11 AbstractFrequentItemsetAlgorithmTest (de.lmu.ifi.dbs.elki.algorithm.itemsetmining.AbstractFrequentItemsetAlgorithmTest)10 AssociationRuleGeneration (de.lmu.ifi.dbs.elki.algorithm.itemsetmining.associationrules.AssociationRuleGeneration)10 AssociationRuleResult (de.lmu.ifi.dbs.elki.result.AssociationRuleResult)10 ListParameterization (de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ListParameterization)10 AbstractSimpleAlgorithmTest (de.lmu.ifi.dbs.elki.algorithm.AbstractSimpleAlgorithmTest)9 MedoidModel (de.lmu.ifi.dbs.elki.data.model.MedoidModel)9 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)9 NumberVector (de.lmu.ifi.dbs.elki.data.NumberVector)8 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)8