use of de.lmu.ifi.dbs.elki.database.Database in project elki by elki-project.
the class GiniIndexTest method testToyExample.
@Test
public void testToyExample() {
Database db = loadTransactions(UNITTEST + "itemsets/increasing5.txt", 5);
AssociationRuleResult res = //
new ELKIBuilder<>(AssociationRuleGeneration.class).with(FPGrowth.Parameterizer.MINSUPP_ID, //
1).with(AssociationRuleGeneration.Parameterizer.MINMEASURE_ID, //
0.2).with(AssociationRuleGeneration.Parameterizer.INTERESTMEASURE_ID, //
GiniIndex.class).build().run(db);
assertEquals("Size not as expected.", 18, res.getRules().size());
}
use of de.lmu.ifi.dbs.elki.database.Database in project elki by elki-project.
the class VisualizePairwiseGainMatrix method run.
@Override
public void run() {
final Database database = inputstep.getDatabase();
ResultHierarchy hier = database.getHierarchy();
Relation<NumberVector> relation = database.getRelation(TypeUtil.NUMBER_VECTOR_FIELD);
final Relation<String> labels = DatabaseUtil.guessLabelRepresentation(database);
final DBID firstid = DBIDUtil.deref(labels.iterDBIDs());
final String firstlabel = labels.get(firstid);
if (!firstlabel.matches(".*by.?label.*")) {
throw new AbortException("No 'by label' reference outlier found, which is needed for weighting!");
}
relation = GreedyEnsembleExperiment.applyPrescaling(prescaling, relation, firstid);
// Dimensionality and reference vector
final int dim = RelationUtil.dimensionality(relation);
final NumberVector refvec = relation.get(firstid);
// Build the truth vector
VectorNonZero pos = new VectorNonZero(refvec);
ArrayModifiableDBIDs ids = DBIDUtil.newArray(relation.getDBIDs());
ids.remove(firstid);
ids.sort();
final int size = ids.size();
double[][] data = new double[size][size];
DoubleMinMax minmax = new DoubleMinMax(), commax = new DoubleMinMax();
{
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Computing ensemble gain.", size * (size + 1) >> 1, LOG) : null;
// Vote combination buffer.
double[] buf = new double[2];
int a = 0;
for (DBIDIter id = ids.iter(); id.valid(); id.advance(), a++) {
final NumberVector veca = relation.get(id);
// Direct AUC score:
{
double auc = ROCEvaluation.computeROCAUC(pos, new DecreasingVectorIter(veca));
data[a][a] = auc;
// minmax.put(auc);
LOG.incrementProcessed(prog);
}
// Compare to others, exploiting symmetry
DBIDArrayIter id2 = ids.iter();
id2.seek(a + 1);
for (int b = a + 1; b < size; b++, id2.advance()) {
final NumberVector vecb = relation.get(id2);
double[] combined = new double[dim];
for (int d = 0; d < dim; d++) {
buf[0] = veca.doubleValue(d);
buf[1] = vecb.doubleValue(d);
combined[d] = voting.combine(buf);
}
double auc = ROCEvaluation.computeROCAUC(pos, new DecreasingVectorIter(DoubleVector.wrap(combined)));
// logger.verbose(auc + " " + labels.get(ids.get(a)) + " " +
// labels.get(ids.get(b)));
data[a][b] = auc;
data[b][a] = auc;
commax.put(data[a][b]);
// minmax.put(auc);
LOG.incrementProcessed(prog);
}
}
LOG.ensureCompleted(prog);
}
for (int a = 0; a < size; a++) {
for (int b = a + 1; b < size; b++) {
double ref = Math.max(data[a][a], data[b][b]);
data[a][b] = (data[a][b] - ref) / (1 - ref);
data[b][a] = (data[b][a] - ref) / (1 - ref);
// logger.verbose(data[a][b] + " " + labels.get(ids.get(a)) + " " +
// labels.get(ids.get(b)));
minmax.put(data[a][b]);
}
}
for (int a = 0; a < size; a++) {
data[a][a] = 0;
}
LOG.verbose("Gain: " + minmax.toString() + " AUC: " + commax.toString());
boolean hasneg = (minmax.getMin() < -1E-3);
LinearScaling scale;
if (!hasneg) {
scale = LinearScaling.fromMinMax(0., minmax.getMax());
} else {
scale = LinearScaling.fromMinMax(0.0, Math.max(minmax.getMax(), -minmax.getMin()));
}
scale = LinearScaling.fromMinMax(0., .5);
BufferedImage img = new BufferedImage(size, size, BufferedImage.TYPE_INT_RGB);
for (int x = 0; x < size; x++) {
for (int y = x; y < size; y++) {
double val = data[x][y];
val = Math.max(-1, Math.min(1., scale.getScaled(val)));
// Compute color:
final int col;
{
if (val >= 0) {
int ival = 0xFF & (int) (255 * val);
col = 0xff000000 | (ival << 8);
} else {
int ival = 0xFF & (int) (255 * -val);
col = 0xff000000 | (ival << 16);
}
}
img.setRGB(x, y, col);
img.setRGB(y, x, col);
}
}
SimilarityMatrix smat = new ComputeSimilarityMatrixImage.SimilarityMatrix(img, relation, ids);
hier.add(database, smat);
VisualizerContext context = vispar.newContext(hier, smat);
// Attach visualizers to results
SimilarityMatrixVisualizer factory = new SimilarityMatrixVisualizer();
factory.processNewResult(context, database);
VisualizationTree.findVis(context).filter(VisualizationTask.class).forEach(task -> {
if (task.getFactory() == factory) {
showVisualization(context, factory, task);
}
});
}
use of de.lmu.ifi.dbs.elki.database.Database in project elki by elki-project.
the class SpacefillingKNNPreprocessorTest method testGaussian.
@Test
public void testGaussian() {
Database db = AbstractSimpleAlgorithmTest.makeSimpleDatabase(dataset, shoulds);
Relation<DoubleVector> rel = db.getRelation(TypeUtil.DOUBLE_VECTOR_FIELD);
DistanceQuery<DoubleVector> distanceQuery = db.getDistanceQuery(rel, EuclideanDistanceFunction.STATIC);
// get linear queries
LinearScanDistanceKNNQuery<DoubleVector> lin_knn_query = new LinearScanDistanceKNNQuery<>(distanceQuery);
// get preprocessed queries
ListParameterization config = new ListParameterization();
//
config.addParameter(//
SpacefillingKNNPreprocessor.Factory.Parameterizer.CURVES_ID, //
HilbertSpatialSorter.class.getName() + "," + PeanoSpatialSorter.class.getName() + "," + ZCurveSpatialSorter.class.getName() + "," + BinarySplitSpatialSorter.class.getName());
config.addParameter(SpacefillingKNNPreprocessor.Factory.Parameterizer.DIM_ID, 7);
config.addParameter(SpacefillingKNNPreprocessor.Factory.Parameterizer.PROJECTION_ID, GaussianRandomProjectionFamily.class);
config.addParameter(SpacefillingKNNPreprocessor.Factory.Parameterizer.VARIANTS_ID, 10);
config.addParameter(SpacefillingKNNPreprocessor.Factory.Parameterizer.WINDOW_ID, 5.);
config.addParameter(SpacefillingKNNPreprocessor.Factory.Parameterizer.RANDOM_ID, 0L);
config.addParameter(GaussianRandomProjectionFamily.Parameterizer.RANDOM_ID, 0L);
SpacefillingKNNPreprocessor.Factory<DoubleVector> preprocf = ClassGenericsUtil.parameterizeOrAbort(SpacefillingKNNPreprocessor.Factory.class, config);
SpacefillingKNNPreprocessor<DoubleVector> preproc = preprocf.instantiate(rel);
preproc.initialize();
// add as index
db.getHierarchy().add(rel, preproc);
KNNQuery<DoubleVector> preproc_knn_query = preproc.getKNNQuery(distanceQuery, k);
assertFalse("Preprocessor knn query class incorrect.", preproc_knn_query instanceof LinearScanDistanceKNNQuery);
// test queries
testKNNQueries(rel, lin_knn_query, preproc_knn_query, k);
// also test partial queries, forward only
testKNNQueries(rel, lin_knn_query, preproc_knn_query, k / 2);
}
use of de.lmu.ifi.dbs.elki.database.Database in project elki by elki-project.
the class SpacefillingKNNPreprocessorTest method testHenzinger.
@Test
public void testHenzinger() {
Database db = AbstractSimpleAlgorithmTest.makeSimpleDatabase(dataset, shoulds);
Relation<DoubleVector> rel = db.getRelation(TypeUtil.DOUBLE_VECTOR_FIELD);
DistanceQuery<DoubleVector> distanceQuery = db.getDistanceQuery(rel, EuclideanDistanceFunction.STATIC);
// get linear queries
LinearScanDistanceKNNQuery<DoubleVector> lin_knn_query = new LinearScanDistanceKNNQuery<>(distanceQuery);
// get preprocessed queries
ListParameterization config = new ListParameterization();
//
config.addParameter(//
SpacefillingKNNPreprocessor.Factory.Parameterizer.CURVES_ID, //
HilbertSpatialSorter.class.getName() + "," + PeanoSpatialSorter.class.getName() + "," + ZCurveSpatialSorter.class.getName() + "," + BinarySplitSpatialSorter.class.getName());
config.addParameter(SpacefillingKNNPreprocessor.Factory.Parameterizer.DIM_ID, 7);
config.addParameter(SpacefillingKNNPreprocessor.Factory.Parameterizer.PROJECTION_ID, SimplifiedRandomHyperplaneProjectionFamily.class);
config.addParameter(SpacefillingKNNPreprocessor.Factory.Parameterizer.VARIANTS_ID, 10);
config.addParameter(SpacefillingKNNPreprocessor.Factory.Parameterizer.WINDOW_ID, 5.);
config.addParameter(SpacefillingKNNPreprocessor.Factory.Parameterizer.RANDOM_ID, 0L);
config.addParameter(SimplifiedRandomHyperplaneProjectionFamily.Parameterizer.RANDOM_ID, 1L);
SpacefillingKNNPreprocessor.Factory<DoubleVector> preprocf = ClassGenericsUtil.parameterizeOrAbort(SpacefillingKNNPreprocessor.Factory.class, config);
SpacefillingKNNPreprocessor<DoubleVector> preproc = preprocf.instantiate(rel);
preproc.initialize();
// add as index
db.getHierarchy().add(rel, preproc);
KNNQuery<DoubleVector> preproc_knn_query = preproc.getKNNQuery(distanceQuery, k);
assertFalse("Preprocessor knn query class incorrect.", preproc_knn_query instanceof LinearScanDistanceKNNQuery);
// test queries
testKNNQueries(rel, lin_knn_query, preproc_knn_query, k);
// also test partial queries, forward only
testKNNQueries(rel, lin_knn_query, preproc_knn_query, k / 2);
}
use of de.lmu.ifi.dbs.elki.database.Database in project elki by elki-project.
the class SpacefillingKNNPreprocessorTest method testAchlioptas.
@Test
public void testAchlioptas() {
Database db = AbstractSimpleAlgorithmTest.makeSimpleDatabase(dataset, shoulds);
Relation<DoubleVector> rel = db.getRelation(TypeUtil.DOUBLE_VECTOR_FIELD);
DistanceQuery<DoubleVector> distanceQuery = db.getDistanceQuery(rel, EuclideanDistanceFunction.STATIC);
// get linear queries
LinearScanDistanceKNNQuery<DoubleVector> lin_knn_query = new LinearScanDistanceKNNQuery<>(distanceQuery);
// get preprocessed queries
ListParameterization config = new ListParameterization();
//
config.addParameter(//
SpacefillingKNNPreprocessor.Factory.Parameterizer.CURVES_ID, //
HilbertSpatialSorter.class.getName() + "," + PeanoSpatialSorter.class.getName() + "," + ZCurveSpatialSorter.class.getName() + "," + BinarySplitSpatialSorter.class.getName());
config.addParameter(SpacefillingKNNPreprocessor.Factory.Parameterizer.DIM_ID, 7);
config.addParameter(SpacefillingKNNPreprocessor.Factory.Parameterizer.PROJECTION_ID, AchlioptasRandomProjectionFamily.class);
config.addParameter(SpacefillingKNNPreprocessor.Factory.Parameterizer.VARIANTS_ID, 10);
config.addParameter(SpacefillingKNNPreprocessor.Factory.Parameterizer.WINDOW_ID, 5.);
config.addParameter(SpacefillingKNNPreprocessor.Factory.Parameterizer.RANDOM_ID, 0L);
config.addParameter(AchlioptasRandomProjectionFamily.Parameterizer.RANDOM_ID, 0L);
SpacefillingKNNPreprocessor.Factory<DoubleVector> preprocf = ClassGenericsUtil.parameterizeOrAbort(SpacefillingKNNPreprocessor.Factory.class, config);
SpacefillingKNNPreprocessor<DoubleVector> preproc = preprocf.instantiate(rel);
preproc.initialize();
// add as index
db.getHierarchy().add(rel, preproc);
KNNQuery<DoubleVector> preproc_knn_query = preproc.getKNNQuery(distanceQuery, k);
assertFalse("Preprocessor knn query class incorrect.", preproc_knn_query instanceof LinearScanDistanceKNNQuery);
// test queries
testKNNQueries(rel, lin_knn_query, preproc_knn_query, k);
// also test partial queries, forward only
testKNNQueries(rel, lin_knn_query, preproc_knn_query, k / 2);
}
Aggregations