use of de.lmu.ifi.dbs.elki.data.NumberVector in project elki by elki-project.
the class FilterUtil method guessFactory.
/**
* Try to guess the appropriate factory.
*
* @param in Input type
* @param <V> Vector type
* @return Factory
*/
@SuppressWarnings("unchecked")
public static <V extends NumberVector> NumberVector.Factory<V> guessFactory(SimpleTypeInformation<V> in) {
NumberVector.Factory<V> factory = null;
if (in instanceof VectorTypeInformation) {
factory = (NumberVector.Factory<V>) ((VectorTypeInformation<V>) in).getFactory();
}
if (factory == null) {
// FIXME: hack. Add factories to simple type information, too?
try {
Field f = in.getRestrictionClass().getField("FACTORY");
factory = (NumberVector.Factory<V>) f.get(null);
} catch (Exception e) {
LoggingUtil.warning("Cannot determine factory for type " + in.getRestrictionClass(), e);
}
}
return factory;
}
use of de.lmu.ifi.dbs.elki.data.NumberVector in project elki by elki-project.
the class EvaluatePrecomputedOutlierScores method run.
@Override
public void run() {
try (//
FileInputStream fis = new FileInputStream(infile);
//
InputStream is = new BufferedInputStream(FileUtil.tryGzipInput(fis));
FileOutputStream fosResult = new FileOutputStream(outfile, true);
PrintStream fout = new PrintStream(fosResult);
FileChannel chan = fosResult.getChannel()) {
// Setup the input stream.
parser.initStream(is);
// Lock the output file:
chan.lock();
if (chan.position() == 0L) {
writeHeader(fout);
} else {
LOG.info("Appending to existing output " + outfile);
}
int lcol = -1, dcol = -1;
loop: while (true) {
BundleStreamSource.Event ev = parser.nextEvent();
switch(ev) {
case END_OF_STREAM:
break loop;
case META_CHANGED:
BundleMeta meta = parser.getMeta();
lcol = -1;
dcol = -1;
for (int i = 0; i < meta.size(); i++) {
SimpleTypeInformation<?> m = meta.get(i);
if (TypeUtil.NUMBER_VECTOR_VARIABLE_LENGTH.isAssignableFromType(m)) {
if (dcol >= 0) {
throw new AbortException("More than one vector column.");
}
dcol = i;
} else if (TypeUtil.GUESSED_LABEL.isAssignableFromType(m)) {
if (lcol >= 0) {
throw new AbortException("More than one label column.");
}
lcol = i;
} else {
throw new AbortException("Unexpected data column type: " + m);
}
}
break;
case NEXT_OBJECT:
if (lcol < 0) {
throw new AbortException("No label column available.");
}
if (dcol < 0) {
throw new AbortException("No vector column available.");
}
processRow(fout, (NumberVector) parser.data(dcol), parser.data(lcol).toString());
break;
}
}
} catch (IOException e) {
throw new AbortException("IO error.", e);
}
}
use of de.lmu.ifi.dbs.elki.data.NumberVector in project elki by elki-project.
the class GreedyEnsembleExperiment method applyPrescaling.
/**
* Prescale each vector (except when in {@code skip}) with the given scaling
* function.
*
* @param scaling Scaling function
* @param relation Relation to read
* @param skip DBIDs to pass unmodified
* @return New relation
*/
public static Relation<NumberVector> applyPrescaling(ScalingFunction scaling, Relation<NumberVector> relation, DBIDs skip) {
if (scaling == null) {
return relation;
}
NumberVector.Factory<NumberVector> factory = RelationUtil.getNumberVectorFactory(relation);
DBIDs ids = relation.getDBIDs();
WritableDataStore<NumberVector> contents = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_HOT, NumberVector.class);
for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
NumberVector v = relation.get(iter);
double[] raw = v.toArray();
if (!skip.contains(iter)) {
applyScaling(raw, scaling);
}
contents.put(iter, factory.newNumberVector(raw, ArrayLikeUtil.DOUBLEARRAYADAPTER));
}
return new MaterializedRelation<>(relation.getDataTypeInformation(), ids, "rescaled", contents);
}
use of de.lmu.ifi.dbs.elki.data.NumberVector in project elki by elki-project.
the class HopkinsStatisticClusteringTendency method run.
/**
* Runs the algorithm in the timed evaluation part.
*
* @param database Database context
* @param relation Relation to analyze
*/
public Result run(Database database, Relation<NumberVector> relation) {
final int dim = RelationUtil.dimensionality(relation);
final DistanceQuery<NumberVector> distanceQuery = database.getDistanceQuery(relation, getDistanceFunction());
final KNNQuery<NumberVector> knnQuery = database.getKNNQuery(distanceQuery, k + 1);
final double[] min = new double[dim], extend = new double[dim];
initializeDataExtends(relation, dim, min, extend);
if (!LOG.isStatistics()) {
LOG.warning("This algorithm must be used with at least logging level " + Level.STATISTICS);
}
MeanVariance hmean = new MeanVariance(), umean = new MeanVariance(), wmean = new MeanVariance();
// more stable result
for (int j = 0; j < this.rep; j++) {
// Compute NN distances for random objects from within the database
double w = computeNNForRealData(knnQuery, relation, dim);
// Compute NN distances for randomly created new uniform objects
double u = computeNNForUniformData(knnQuery, min, extend);
// compute hopkins statistik
// = a / (1+a)
double h = u / (u + w);
hmean.put(h);
umean.put(u);
wmean.put(w);
}
final String prefix = this.getClass().getName();
LOG.statistics(new LongStatistic(prefix + ".samplesize", sampleSize));
LOG.statistics(new LongStatistic(prefix + ".dim", dim));
LOG.statistics(new LongStatistic(prefix + ".hopkins.nearest-neighbor", k));
LOG.statistics(new DoubleStatistic(prefix + ".hopkins.h.mean", hmean.getMean()));
LOG.statistics(new DoubleStatistic(prefix + ".hopkins.u.mean", umean.getMean()));
LOG.statistics(new DoubleStatistic(prefix + ".hopkins.w.mean", wmean.getMean()));
if (rep > 1) {
LOG.statistics(new DoubleStatistic(prefix + ".hopkins.h.std", hmean.getSampleStddev()));
LOG.statistics(new DoubleStatistic(prefix + ".hopkins.u.std", umean.getSampleStddev()));
LOG.statistics(new DoubleStatistic(prefix + ".hopkins.w.std", wmean.getSampleStddev()));
}
// Evaluate:
double x = hmean.getMean();
// See Hopkins for a proof that x is supposedly Beta distributed.
double ix = BetaDistribution.regularizedIncBeta(x, sampleSize, sampleSize);
double p = (x > .5) ? (1. - ix) : ix;
LOG.statistics(new DoubleStatistic(prefix + ".hopkins.p", p));
return null;
}
use of de.lmu.ifi.dbs.elki.data.NumberVector in project elki by elki-project.
the class KMeansMinusMinus method meansWithTreshhold.
/**
* Returns the mean vectors of the given clusters in the given database.
*
* @param clusters the clusters to compute the means
* @param means the recent means
* @param database the database containing the vectors
* @return the mean vectors of the given clusters in the given database
*/
protected double[][] meansWithTreshhold(List<? extends ModifiableDoubleDBIDList> clusters, double[][] means, Relation<V> database, Double tresh) {
// TODO: use Kahan summation for better numerical precision?
double[][] newMeans = new double[k][];
for (int i = 0; i < k; i++) {
DoubleDBIDList list = clusters.get(i);
double[] raw = null;
int count = 0;
// Update with remaining instances
for (DoubleDBIDListIter iter = list.iter(); iter.valid(); iter.advance()) {
if (iter.doubleValue() >= tresh) {
continue;
}
NumberVector vec = database.get(iter);
if (raw == null) {
// Initialize:
raw = vec.toArray();
}
for (int j = 0; j < raw.length; j++) {
raw[j] += vec.doubleValue(j);
}
count++;
}
newMeans[i] = (raw != null) ? VMath.timesEquals(raw, 1.0 / count) : means[i];
}
return newMeans;
}
Aggregations