use of de.lmu.ifi.dbs.elki.data.NumberVector in project elki by elki-project.
the class AbstractLayout3DPC method computeSimilarityMatrix.
/**
* Compute a column-wise dependency matrix for the given relation.
*
* @param sim Dependence measure
* @param rel Vector relation
* @return Similarity matrix (lower triangular form)
*/
public static double[] computeSimilarityMatrix(DependenceMeasure sim, Relation<? extends NumberVector> rel) {
final int dim = RelationUtil.dimensionality(rel);
final int size = rel.size();
// TODO: we could use less memory (no copy), but this would likely be
// slower. Maybe as a fallback option?
double[][] data = new double[dim][size];
int r = 0;
for (DBIDIter it = rel.iterDBIDs(); it.valid(); it.advance(), r++) {
NumberVector v = rel.get(it);
for (int d = 0; d < dim; d++) {
data[d][r] = v.doubleValue(d);
}
}
return sim.dependence(DoubleArrayAdapter.STATIC, Arrays.asList(data));
}
use of de.lmu.ifi.dbs.elki.data.NumberVector in project elki by elki-project.
the class VisualizePairwiseGainMatrix method run.
@Override
public void run() {
final Database database = inputstep.getDatabase();
ResultHierarchy hier = database.getHierarchy();
Relation<NumberVector> relation = database.getRelation(TypeUtil.NUMBER_VECTOR_FIELD);
final Relation<String> labels = DatabaseUtil.guessLabelRepresentation(database);
final DBID firstid = DBIDUtil.deref(labels.iterDBIDs());
final String firstlabel = labels.get(firstid);
if (!firstlabel.matches(".*by.?label.*")) {
throw new AbortException("No 'by label' reference outlier found, which is needed for weighting!");
}
relation = GreedyEnsembleExperiment.applyPrescaling(prescaling, relation, firstid);
// Dimensionality and reference vector
final int dim = RelationUtil.dimensionality(relation);
final NumberVector refvec = relation.get(firstid);
// Build the truth vector
VectorNonZero pos = new VectorNonZero(refvec);
ArrayModifiableDBIDs ids = DBIDUtil.newArray(relation.getDBIDs());
ids.remove(firstid);
ids.sort();
final int size = ids.size();
double[][] data = new double[size][size];
DoubleMinMax minmax = new DoubleMinMax(), commax = new DoubleMinMax();
{
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Computing ensemble gain.", size * (size + 1) >> 1, LOG) : null;
// Vote combination buffer.
double[] buf = new double[2];
int a = 0;
for (DBIDIter id = ids.iter(); id.valid(); id.advance(), a++) {
final NumberVector veca = relation.get(id);
// Direct AUC score:
{
double auc = ROCEvaluation.computeROCAUC(pos, new DecreasingVectorIter(veca));
data[a][a] = auc;
// minmax.put(auc);
LOG.incrementProcessed(prog);
}
// Compare to others, exploiting symmetry
DBIDArrayIter id2 = ids.iter();
id2.seek(a + 1);
for (int b = a + 1; b < size; b++, id2.advance()) {
final NumberVector vecb = relation.get(id2);
double[] combined = new double[dim];
for (int d = 0; d < dim; d++) {
buf[0] = veca.doubleValue(d);
buf[1] = vecb.doubleValue(d);
combined[d] = voting.combine(buf);
}
double auc = ROCEvaluation.computeROCAUC(pos, new DecreasingVectorIter(DoubleVector.wrap(combined)));
// logger.verbose(auc + " " + labels.get(ids.get(a)) + " " +
// labels.get(ids.get(b)));
data[a][b] = auc;
data[b][a] = auc;
commax.put(data[a][b]);
// minmax.put(auc);
LOG.incrementProcessed(prog);
}
}
LOG.ensureCompleted(prog);
}
for (int a = 0; a < size; a++) {
for (int b = a + 1; b < size; b++) {
double ref = Math.max(data[a][a], data[b][b]);
data[a][b] = (data[a][b] - ref) / (1 - ref);
data[b][a] = (data[b][a] - ref) / (1 - ref);
// logger.verbose(data[a][b] + " " + labels.get(ids.get(a)) + " " +
// labels.get(ids.get(b)));
minmax.put(data[a][b]);
}
}
for (int a = 0; a < size; a++) {
data[a][a] = 0;
}
LOG.verbose("Gain: " + minmax.toString() + " AUC: " + commax.toString());
boolean hasneg = (minmax.getMin() < -1E-3);
LinearScaling scale;
if (!hasneg) {
scale = LinearScaling.fromMinMax(0., minmax.getMax());
} else {
scale = LinearScaling.fromMinMax(0.0, Math.max(minmax.getMax(), -minmax.getMin()));
}
scale = LinearScaling.fromMinMax(0., .5);
BufferedImage img = new BufferedImage(size, size, BufferedImage.TYPE_INT_RGB);
for (int x = 0; x < size; x++) {
for (int y = x; y < size; y++) {
double val = data[x][y];
val = Math.max(-1, Math.min(1., scale.getScaled(val)));
// Compute color:
final int col;
{
if (val >= 0) {
int ival = 0xFF & (int) (255 * val);
col = 0xff000000 | (ival << 8);
} else {
int ival = 0xFF & (int) (255 * -val);
col = 0xff000000 | (ival << 16);
}
}
img.setRGB(x, y, col);
img.setRGB(y, x, col);
}
}
SimilarityMatrix smat = new ComputeSimilarityMatrixImage.SimilarityMatrix(img, relation, ids);
hier.add(database, smat);
VisualizerContext context = vispar.newContext(hier, smat);
// Attach visualizers to results
SimilarityMatrixVisualizer factory = new SimilarityMatrixVisualizer();
factory.processNewResult(context, database);
VisualizationTree.findVis(context).filter(VisualizationTask.class).forEach(task -> {
if (task.getFactory() == factory) {
showVisualization(context, factory, task);
}
});
}
use of de.lmu.ifi.dbs.elki.data.NumberVector in project elki by elki-project.
the class DiSH method isParent.
/**
* Returns true, if the specified parent cluster is a parent of one child of
* the children clusters.
*
* @param relation the database containing the objects
* @param parent the parent to be tested
* @param iter the list of children to be tested
* @param db_dim Database dimensionality
* @return true, if the specified parent cluster is a parent of one child of
* the children clusters, false otherwise
*/
private boolean isParent(Relation<V> relation, Cluster<SubspaceModel> parent, It<Cluster<SubspaceModel>> iter, int db_dim) {
Subspace s_p = parent.getModel().getSubspace();
NumberVector parent_centroid = ProjectedCentroid.make(s_p.getDimensions(), relation, parent.getIDs());
int subspaceDim_parent = db_dim - s_p.dimensionality();
for (; iter.valid(); iter.advance()) {
Cluster<SubspaceModel> child = iter.get();
Subspace s_c = child.getModel().getSubspace();
NumberVector child_centroid = ProjectedCentroid.make(s_c.getDimensions(), relation, child.getIDs());
long[] commonPreferenceVector = BitsUtil.andCMin(s_p.getDimensions(), s_c.getDimensions());
int subspaceDim = subspaceDimensionality(parent_centroid, child_centroid, s_p.getDimensions(), s_c.getDimensions(), commonPreferenceVector);
if (subspaceDim == subspaceDim_parent) {
return true;
}
}
return false;
}
use of de.lmu.ifi.dbs.elki.data.NumberVector in project elki by elki-project.
the class AddSingleScale method run.
/**
* Add scales to a single vector relation.
*
* @param rel Relation
* @return Scales
*/
private ScalesResult run(Relation<? extends NumberVector> rel) {
final int dim = RelationUtil.dimensionality(rel);
LinearScale[] scales = new LinearScale[dim];
if (minmax == null) {
DoubleMinMax mm = new DoubleMinMax();
for (DBIDIter iditer = rel.iterDBIDs(); iditer.valid(); iditer.advance()) {
NumberVector vec = rel.get(iditer);
for (int d = 0; d < dim; d++) {
final double val = vec.doubleValue(d);
if (val != val) {
// NaN
continue;
}
mm.put(val);
}
}
LinearScale scale = new LinearScale(mm.getMin(), mm.getMax());
for (int i = 0; i < dim; i++) {
scales[i] = scale;
}
} else {
// Use predefined.
LinearScale scale = new LinearScale(minmax[0], minmax[1]);
for (int i = 0; i < dim; i++) {
scales[i] = scale;
}
}
ScalesResult res = new ScalesResult(scales);
return res;
}
use of de.lmu.ifi.dbs.elki.data.NumberVector in project elki by elki-project.
the class PassingDataToELKI method main.
/**
* Main method
*
* @param args Command line parameters (not supported)
*/
public static void main(String[] args) {
// Set the logging level to statistics:
LoggingConfiguration.setStatistics();
// Generate a random data set.
// Note: ELKI has a nice data generator class, use that instead.
double[][] data = new double[1000][2];
for (int i = 0; i < data.length; i++) {
for (int j = 0; j < data[i].length; j++) {
data[i][j] = Math.random();
}
}
// Adapter to load data from an existing array.
DatabaseConnection dbc = new ArrayAdapterDatabaseConnection(data);
// Create a database (which may contain multiple relations!)
Database db = new StaticArrayDatabase(dbc, null);
// Load the data into the database (do NOT forget to initialize...)
db.initialize();
// Relation containing the number vectors:
Relation<NumberVector> rel = db.getRelation(TypeUtil.NUMBER_VECTOR_FIELD);
// We know that the ids must be a continuous range:
DBIDRange ids = (DBIDRange) rel.getDBIDs();
// K-means should be used with squared Euclidean (least squares):
SquaredEuclideanDistanceFunction dist = SquaredEuclideanDistanceFunction.STATIC;
// Default initialization, using global random:
// To fix the random seed, use: new RandomFactory(seed);
RandomlyGeneratedInitialMeans init = new RandomlyGeneratedInitialMeans(RandomFactory.DEFAULT);
// Textbook k-means clustering:
KMeansLloyd<NumberVector> km = new //
KMeansLloyd<>(//
dist, //
3, /* k - number of partitions */
0, /* maximum number of iterations: no limit */
init);
// K-means will automatically choose a numerical relation from the data set:
// But we could make it explicit (if there were more than one numeric
// relation!): km.run(db, rel);
Clustering<KMeansModel> c = km.run(db);
// Output all clusters:
int i = 0;
for (Cluster<KMeansModel> clu : c.getAllClusters()) {
// K-means will name all clusters "Cluster" in lack of noise support:
System.out.println("#" + i + ": " + clu.getNameAutomatic());
System.out.println("Size: " + clu.size());
System.out.println("Center: " + clu.getModel().getPrototype().toString());
// Iterate over objects:
System.out.print("Objects: ");
for (DBIDIter it = clu.getIDs().iter(); it.valid(); it.advance()) {
// To get the vector use:
// NumberVector v = rel.get(it);
// Offset within our DBID range: "line number"
final int offset = ids.getOffset(it);
System.out.print(" " + offset);
// Do NOT rely on using "internalGetIndex()" directly!
}
System.out.println();
++i;
}
}
Aggregations