use of de.lmu.ifi.dbs.elki.math.MeanVariance in project elki by elki-project.
the class RankingQualityHistogram method run.
/**
* Process a database
*
* @param database Database to process
* @param relation Relation to process
* @return Histogram of ranking qualities
*/
public HistogramResult run(Database database, Relation<O> relation) {
final DistanceQuery<O> distanceQuery = database.getDistanceQuery(relation, getDistanceFunction());
final KNNQuery<O> knnQuery = database.getKNNQuery(distanceQuery, relation.size());
if (LOG.isVerbose()) {
LOG.verbose("Preprocessing clusters...");
}
// Cluster by labels
Collection<Cluster<Model>> split = (new ByLabelOrAllInOneClustering()).run(database).getAllClusters();
DoubleStaticHistogram hist = new DoubleStaticHistogram(numbins, 0.0, 1.0);
if (LOG.isVerbose()) {
LOG.verbose("Processing points...");
}
FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Computing ROC AUC values", relation.size(), LOG) : null;
ROCEvaluation roc = new ROCEvaluation();
MeanVariance mv = new MeanVariance();
// sort neighbors
for (Cluster<?> clus : split) {
for (DBIDIter iter = clus.getIDs().iter(); iter.valid(); iter.advance()) {
KNNList knn = knnQuery.getKNNForDBID(iter, relation.size());
double result = EvaluateClustering.evaluateRanking(roc, clus, knn);
mv.put(result);
hist.increment(result, 1. / relation.size());
LOG.incrementProcessed(progress);
}
}
LOG.ensureCompleted(progress);
// Transform Histogram into a Double Vector array.
Collection<double[]> res = new ArrayList<>(relation.size());
for (DoubleStaticHistogram.Iter iter = hist.iter(); iter.valid(); iter.advance()) {
res.add(new double[] { iter.getCenter(), iter.getValue() });
}
HistogramResult result = new HistogramResult("Ranking Quality Histogram", "ranking-histogram", res);
result.addHeader("Mean: " + mv.getMean() + " Variance: " + mv.getSampleVariance());
return result;
}
use of de.lmu.ifi.dbs.elki.math.MeanVariance in project elki by elki-project.
the class LogNormalLevenbergMarquardtKDEEstimator method estimate.
@Override
public <A> LogNormalDistribution estimate(A data, NumberArrayAdapter<?, A> adapter) {
// We first need the basic parameters:
final int len = adapter.size(data);
MeanVariance mv = new MeanVariance();
// X positions of samples
double[] x = new double[len];
for (int i = 0; i < len; i++) {
final double val = adapter.getDouble(data, i);
if (!(val > 0)) {
throw new ArithmeticException("Cannot fit logNormal to a data set which includes non-positive values: " + val);
}
x[i] = FastMath.log(val);
mv.put(x[i]);
}
// Sort our copy.
Arrays.sort(x);
double median = (x[len >> 1] + x[(len + 1) >> 1]) * .5;
// Height = density, via KDE.
KernelDensityEstimator de = new KernelDensityEstimator(x, GaussianKernelDensityFunction.KERNEL, 1e-6);
double[] y = de.getDensity();
// Weights:
double[] s = new double[len];
Arrays.fill(s, 1.0);
// Initial parameter estimate:
double[] params = { median, mv.getSampleStddev(), 1 };
boolean[] dofit = { true, true, false };
LevenbergMarquardtMethod fit = new LevenbergMarquardtMethod(GaussianFittingFunction.STATIC, params, dofit, x, y, s);
fit.run();
double[] ps = fit.getParams();
return new LogNormalDistribution(ps[0], ps[1], 0.);
}
use of de.lmu.ifi.dbs.elki.math.MeanVariance in project elki by elki-project.
the class WelchTTest method deviation.
@Override
public double deviation(double[] sample1, double[] sample2) {
MeanVariance mv1 = new MeanVariance(), mv2 = new MeanVariance();
for (double d : sample1) {
mv1.put(d);
}
for (double d : sample2) {
mv2.put(d);
}
final double t = calculateTestStatistic(mv1, mv2);
final int v = calculateDOF(mv1, mv2);
return 1 - calculatePValue(t, v);
}
use of de.lmu.ifi.dbs.elki.math.MeanVariance in project elki by elki-project.
the class OUTRES method outresScore.
/**
* Main loop of OUTRES. Run for each object
*
* @param s start dimension
* @param subspace Current subspace
* @param id Current object ID
* @param kernel Kernel
* @return Score
*/
public double outresScore(final int s, long[] subspace, DBIDRef id, KernelDensityEstimator kernel) {
// Initial score is 1.0
double score = 1.0;
final SubspaceEuclideanDistanceFunction df = new SubspaceEuclideanDistanceFunction(subspace);
MeanVariance meanv = new MeanVariance();
for (int i = s; i < kernel.dim; i++) {
if (BitsUtil.get(subspace, i)) {
// with i=0?
continue;
}
BitsUtil.setI(subspace, i);
df.setSelectedDimensions(subspace);
final double adjustedEps = kernel.adjustedEps(kernel.dim);
// Query with a larger window, to also get neighbors of neighbors
// Subspace euclidean is metric!
final double range = adjustedEps * 2.;
RangeQuery<V> rq = QueryUtil.getRangeQuery(kernel.relation, df, range);
DoubleDBIDList neighc = rq.getRangeForDBID(id, range);
DoubleDBIDList neigh = refineRange(neighc, adjustedEps);
if (neigh.size() > 2) {
// Relevance test
if (relevantSubspace(subspace, neigh, kernel)) {
final double density = kernel.subspaceDensity(subspace, neigh);
// Compute mean and standard deviation for densities of neighbors.
meanv.reset();
for (DoubleDBIDListIter neighbor = neigh.iter(); neighbor.valid(); neighbor.advance()) {
DoubleDBIDList n2 = subsetNeighborhoodQuery(neighc, neighbor, df, adjustedEps, kernel);
meanv.put(kernel.subspaceDensity(subspace, n2));
}
final double deviation = (meanv.getMean() - density) / (2. * meanv.getSampleStddev());
// High deviation:
if (deviation >= 1) {
score *= (density / deviation);
}
// Recursion
score *= outresScore(i + 1, subspace, id, kernel);
}
}
BitsUtil.clearI(subspace, i);
}
return score;
}
use of de.lmu.ifi.dbs.elki.math.MeanVariance in project elki by elki-project.
the class SigmoidOutlierScalingFunction method prepare.
@Override
public <A> void prepare(A array, NumberArrayAdapter<?, A> adapter) {
// Initial parameters - are these defaults sounds?
MeanVariance mv = new MeanVariance();
final int size = adapter.size(array);
for (int i = 0; i < size; i++) {
double val = adapter.getDouble(array, i);
if (!Double.isInfinite(val)) {
mv.put(val);
}
}
double a = 1.0;
double b = -mv.getMean();
int iter = 0;
long[] t = BitsUtil.zero(size);
boolean changing = true;
while (changing) {
changing = false;
// E-Step
for (int i = 0; i < size; i++) {
double val = adapter.getDouble(array, i);
double targ = a * val + b;
if (targ > 0) {
if (!BitsUtil.get(t, i)) {
BitsUtil.setI(t, i);
changing = true;
}
} else {
if (BitsUtil.get(t, i)) {
BitsUtil.clearI(t, i);
changing = true;
}
}
}
if (!changing) {
break;
}
// logger.debugFine("Number of outliers in sigmoid: " + t.cardinality());
// M-Step
// Implementation based on:<br />
// H.-T. Lin, C.-J. Lin, R. C. Weng:<br />
// A Note on Platt’s Probabilistic Outputs for Support Vector Machines
{
double[] newab = MStepLevenbergMarquardt(a, b, t, array, adapter);
a = newab[0];
b = newab[1];
}
iter++;
if (iter > 100) {
LOG.warning("Max iterations met in sigmoid fitting.");
break;
}
}
Afinal = a;
Bfinal = b;
LOG.debugFine("A = " + Afinal + " B = " + Bfinal);
}
Aggregations