use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.
the class SimpleCOP method run.
public OutlierResult run(Database database, Relation<V> data) throws IllegalStateException {
KNNQuery<V> knnQuery = QueryUtil.getKNNQuery(data, getDistanceFunction(), k + 1);
DBIDs ids = data.getDBIDs();
WritableDoubleDataStore cop_score = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC);
WritableDataStore<double[]> cop_err_v = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC, double[].class);
WritableDataStore<double[][]> cop_datav = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC, double[][].class);
WritableIntegerDataStore cop_dim = DataStoreUtil.makeIntegerStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC, -1);
WritableDataStore<CorrelationAnalysisSolution<?>> cop_sol = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC, CorrelationAnalysisSolution.class);
{
// compute neighbors of each db object
FiniteProgress progressLocalPCA = LOG.isVerbose() ? new FiniteProgress("Correlation Outlier Probabilities", data.size(), LOG) : null;
double sqrt2 = MathUtil.SQRT2;
for (DBIDIter id = data.iterDBIDs(); id.valid(); id.advance()) {
KNNList neighbors = knnQuery.getKNNForDBID(id, k + 1);
ModifiableDBIDs nids = DBIDUtil.newArray(neighbors);
nids.remove(id);
// TODO: do we want to use the query point as centroid?
CorrelationAnalysisSolution<V> depsol = dependencyDerivator.generateModel(data, nids);
double stddev = depsol.getStandardDeviation();
double distance = depsol.distance(data.get(id));
double prob = NormalDistribution.erf(distance / (stddev * sqrt2));
cop_score.putDouble(id, prob);
cop_err_v.put(id, times(depsol.errorVector(data.get(id)), -1));
double[][] datav = depsol.dataProjections(data.get(id));
cop_datav.put(id, datav);
cop_dim.putInt(id, depsol.getCorrelationDimensionality());
cop_sol.put(id, depsol);
LOG.incrementProcessed(progressLocalPCA);
}
LOG.ensureCompleted(progressLocalPCA);
}
// combine results.
DoubleRelation scoreResult = new MaterializedDoubleRelation("Original Correlation Outlier Probabilities", "origcop-outlier", cop_score, ids);
OutlierScoreMeta scoreMeta = new ProbabilisticOutlierScore();
OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
// extra results
result.addChildResult(new MaterializedRelation<>("Local Dimensionality", COP.COP_DIM, TypeUtil.INTEGER, cop_dim, ids));
result.addChildResult(new MaterializedRelation<>("Error vectors", COP.COP_ERRORVEC, TypeUtil.DOUBLE_ARRAY, cop_err_v, ids));
result.addChildResult(new MaterializedRelation<>("Data vectors", "cop-datavec", TypeUtil.MATRIX, cop_datav, ids));
result.addChildResult(new MaterializedRelation<>("Correlation analysis", "cop-sol", new SimpleTypeInformation<CorrelationAnalysisSolution<?>>(CorrelationAnalysisSolution.class), cop_sol, ids));
return result;
}
use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.
the class ComputeSimilarityMatrixImage method computeSimilarityMatrixImage.
/**
* Compute the actual similarity image.
*
* @param relation Relation
* @param iter DBID iterator
* @return result object
*/
private SimilarityMatrix computeSimilarityMatrixImage(Relation<O> relation, DBIDIter iter) {
ArrayModifiableDBIDs order = DBIDUtil.newArray(relation.size());
for (; iter.valid(); iter.advance()) {
order.add(iter);
}
if (order.size() != relation.size()) {
throw new IllegalStateException("Iterable result doesn't match database size - incomplete ordering?");
}
DistanceQuery<O> dq = distanceFunction.instantiate(relation);
final int size = order.size();
// When the logging is in the outer loop, it's just 2*size (providing enough
// resolution)
// size * (size + 1);
final int ltotal = 2 * size;
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Similarity Matrix Image", ltotal, LOG) : null;
// Note: we assume that we have an efficient distance cache available,
// since we are using 2*O(n*n) distance computations.
DoubleMinMax minmax = new DoubleMinMax();
{
DBIDArrayIter id1 = order.iter();
DBIDArrayIter id2 = order.iter();
for (; id1.valid(); id1.advance()) {
id2.seek(id1.getOffset());
for (; id2.valid(); id2.advance()) {
final double dist = dq.distance(id1, id2);
if (!Double.isNaN(dist) && !Double.isInfinite(dist)) /* && dist > 0.0 */
{
if (!skipzero || dist > 0.0) {
minmax.put(dist);
}
}
}
LOG.incrementProcessed(prog);
}
}
double zoom = minmax.getMax() - minmax.getMin();
if (zoom > 0.0) {
zoom = 1. / zoom;
}
LinearScaling scale = new LinearScaling(zoom, -minmax.getMin() * zoom);
BufferedImage img = new BufferedImage(size, size, BufferedImage.TYPE_INT_RGB);
{
DBIDArrayIter id1 = order.iter();
DBIDArrayIter id2 = order.iter();
for (int x = 0; x < size && id1.valid(); x++, id1.advance()) {
id2.seek(id1.getOffset());
for (int y = x; y < size && id2.valid(); y++, id2.advance()) {
double ddist = dq.distance(id1, id2);
if (ddist > 0.0) {
ddist = scale.getScaled(ddist);
}
// Apply extra scaling
if (scaling != null) {
ddist = scaling.getScaled(ddist);
}
int dist = 0xFF & (int) (255 * ddist);
int col = 0xff000000 | (dist << 16) | (dist << 8) | dist;
img.setRGB(x, y, col);
img.setRGB(y, x, col);
}
LOG.incrementProcessed(prog);
}
}
LOG.ensureCompleted(prog);
return new SimilarityMatrix(img, relation, order);
}
use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.
the class DiSHPreferenceVectorIndex method initialize.
@Override
public void initialize() {
if (relation == null || relation.size() == 0) {
throw new EmptyDataException();
}
storage = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, long[].class);
if (LOG.isDebugging()) {
LOG.debugFine(//
new StringBuilder().append("eps ").append(Arrays.asList(epsilon)).append("\n minpts ").append(//
minpts).append("\n strategy ").append(strategy).toString());
}
long start = System.currentTimeMillis();
FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Preprocessing preference vector", relation.size(), LOG) : null;
// only one epsilon value specified
int dim = RelationUtil.dimensionality(relation);
if (epsilon.length == 1 && dim != 1) {
double eps = epsilon[0];
epsilon = new double[dim];
Arrays.fill(epsilon, eps);
}
// epsilons as string
RangeQuery<V>[] rangeQueries = initRangeQueries(relation, dim);
StringBuilder msg = LOG.isDebugging() ? new StringBuilder() : null;
for (DBIDIter it = relation.iterDBIDs(); it.valid(); it.advance()) {
if (msg != null) {
msg.setLength(0);
msg.append("\nid = ").append(DBIDUtil.toString(it));
// msg.append(" ").append(database.get(id));
// msg.append(" ").append(database.getObjectLabelQuery().get(id));
}
// determine neighbors in each dimension
ModifiableDBIDs[] allNeighbors = new ModifiableDBIDs[dim];
for (int d = 0; d < dim; d++) {
allNeighbors[d] = DBIDUtil.newHashSet(rangeQueries[d].getRangeForDBID(it, epsilon[d]));
}
if (msg != null) {
for (int d = 0; d < dim; d++) {
//
msg.append("\n neighbors [").append(d).append(']').append(" (").append(allNeighbors[d].size()).append(") = ").append(allNeighbors[d]);
}
}
storage.put(it, determinePreferenceVector(relation, allNeighbors, msg));
if (msg != null) {
LOG.debugFine(msg.toString());
}
LOG.incrementProcessed(progress);
}
LOG.ensureCompleted(progress);
// TODO: re-add timing code!
if (LOG.isVerbose()) {
long end = System.currentTimeMillis();
long elapsedTime = end - start;
LOG.verbose(this.getClass().getName() + " runtime: " + elapsedTime + " milliseconds.");
}
}
use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.
the class GaussianAffinityMatrixBuilder method buildDistanceMatrix.
/**
* Build a distance matrix of squared distances.
*
* @param ids DBIDs
* @param dq Distance query
* @return Distance matrix
*/
protected double[][] buildDistanceMatrix(ArrayDBIDs ids, DistanceQuery<?> dq) {
final int size = ids.size();
double[][] dmat = new double[size][size];
final boolean square = !dq.getDistanceFunction().isSquared();
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Computing distance matrix", (size * (size - 1)) >>> 1, LOG) : null;
Duration timer = LOG.isStatistics() ? LOG.newDuration(this.getClass().getName() + ".runtime.distancematrix").begin() : null;
DBIDArrayIter ix = ids.iter(), iy = ids.iter();
for (ix.seek(0); ix.valid(); ix.advance()) {
double[] dmat_x = dmat[ix.getOffset()];
for (iy.seek(ix.getOffset() + 1); iy.valid(); iy.advance()) {
final double dist = dq.distance(ix, iy);
dmat[iy.getOffset()][ix.getOffset()] = dmat_x[iy.getOffset()] = square ? (dist * dist) : dist;
}
if (prog != null) {
int row = ix.getOffset() + 1;
prog.setProcessed(row * size - ((row * (row + 1)) >>> 1), LOG);
}
}
LOG.ensureCompleted(prog);
if (timer != null) {
LOG.statistics(timer.end());
}
return dmat;
}
use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.
the class IntrinsicNearestNeighborAffinityMatrixBuilder method computePij.
/**
* Compute the sparse pij using the nearest neighbors only.
*
* @param ids ID range
* @param knnq kNN query
* @param square Use squared distances
* @param numberOfNeighbours Number of neighbors to get
* @param pij Output of distances
* @param indices Output of indexes
* @param initialScale Initial scaling factor
*/
protected void computePij(DBIDRange ids, KNNQuery<?> knnq, boolean square, int numberOfNeighbours, double[][] pij, int[][] indices, double initialScale) {
Duration timer = LOG.isStatistics() ? LOG.newDuration(this.getClass().getName() + ".runtime.neighborspijmatrix").begin() : null;
final double logPerp = FastMath.log(perplexity);
// Scratch arrays, resizable
DoubleArray dists = new DoubleArray(numberOfNeighbours + 10);
IntegerArray inds = new IntegerArray(numberOfNeighbours + 10);
// Compute nearest-neighbor sparse affinity matrix
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Finding neighbors and optimizing perplexity", ids.size(), LOG) : null;
MeanVariance mv = LOG.isStatistics() ? new MeanVariance() : null;
Mean mid = LOG.isStatistics() ? new Mean() : null;
for (DBIDArrayIter ix = ids.iter(); ix.valid(); ix.advance()) {
dists.clear();
inds.clear();
KNNList neighbours = knnq.getKNNForDBID(ix, numberOfNeighbours + 1);
convertNeighbors(ids, ix, square, neighbours, dists, inds, mid);
double beta = computeSigma(//
ix.getOffset(), //
dists, //
perplexity, //
logPerp, pij[ix.getOffset()] = new double[dists.size()]);
if (mv != null) {
// Sigma
mv.put(beta > 0 ? FastMath.sqrt(.5 / beta) : 0.);
}
indices[ix.getOffset()] = inds.toArray();
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
if (mid != null) {
LOG.statistics(new DoubleStatistic(getClass() + ".average-original-id", mid.getMean()));
}
// Sum of the sparse affinity matrix:
double sum = 0.;
for (int i = 0; i < pij.length; i++) {
final double[] pij_i = pij[i];
for (int offi = 0; offi < pij_i.length; offi++) {
int j = indices[i][offi];
if (j > i) {
// Exploit symmetry.
continue;
}
assert (i != j);
int offj = containsIndex(indices[j], i);
if (offj >= 0) {
// Found
sum += FastMath.sqrt(pij_i[offi] * pij[j][offj]);
}
}
}
final double scale = initialScale / (2 * sum);
for (int i = 0; i < pij.length; i++) {
final double[] pij_i = pij[i];
for (int offi = 0; offi < pij_i.length; offi++) {
int j = indices[i][offi];
assert (i != j);
int offj = containsIndex(indices[j], i);
if (offj >= 0) {
// Found
assert (indices[j][offj] == i);
// Exploit symmetry:
if (i < j) {
// Symmetrize
final double val = FastMath.sqrt(pij_i[offi] * pij[j][offj]);
pij_i[offi] = pij[j][offj] = MathUtil.max(val * scale, MIN_PIJ);
}
} else {
// Not found, so zero.
pij_i[offi] = 0;
}
}
}
if (LOG.isStatistics()) {
// timer != null, mv != null
LOG.statistics(timer.end());
LOG.statistics(new DoubleStatistic(NearestNeighborAffinityMatrixBuilder.class.getName() + ".sigma.average", mv.getMean()));
LOG.statistics(new DoubleStatistic(NearestNeighborAffinityMatrixBuilder.class.getName() + ".sigma.stddev", mv.getSampleStddev()));
}
}
Aggregations