use of de.lmu.ifi.dbs.elki.data.DoubleVector in project elki by elki-project.
the class GeneratorMain method generate.
/**
* Main loop to generate data set.
*
* @return Generated data set
*/
public MultipleObjectsBundle generate() {
// we actually need some clusters.
if (generators.isEmpty()) {
throw new AbortException("No clusters specified.");
}
// Assert that cluster dimensions agree.
final int dim = generators.get(0).getDim();
for (GeneratorInterface c : generators) {
if (c.getDim() != dim) {
throw new AbortException("Cluster dimensions do not agree.");
}
}
// Prepare result bundle
MultipleObjectsBundle bundle = new MultipleObjectsBundle();
VectorFieldTypeInformation<DoubleVector> type = new VectorFieldTypeInformation<>(DoubleVector.FACTORY, dim);
bundle.appendColumn(type, new ArrayList<>());
bundle.appendColumn(TypeUtil.CLASSLABEL, new ArrayList<>());
bundle.appendColumn(Model.TYPE, new ArrayList<Model>());
// generate clusters
ClassLabel[] labels = new ClassLabel[generators.size()];
Model[] models = new Model[generators.size()];
initLabelsAndModels(generators, labels, models, relabelClusters);
final AssignPoint assignment;
if (!testAgainstModel) {
assignment = new AssignPoint();
} else if (relabelClusters == null) {
assignment = new TestModel();
} else if (!relabelDistance) {
assignment = new AssignLabelsByDensity(labels);
} else {
assignment = new AssignLabelsByDistance(labels);
}
for (int i = 0; i < labels.length; i++) {
final GeneratorInterface curclus = generators.get(i);
assignment.newCluster(i, curclus);
// Only dynamic generators allow rejection / model testing:
GeneratorInterfaceDynamic cursclus = (curclus instanceof GeneratorInterfaceDynamic) ? (GeneratorInterfaceDynamic) curclus : null;
int kept = 0;
while (kept < curclus.getSize()) {
// generate the "missing" number of points
List<double[]> newp = curclus.generate(curclus.getSize() - kept);
for (double[] p : newp) {
int bestc = assignment.getAssignment(i, p);
if (bestc < 0) {
cursclus.incrementDiscarded();
continue;
}
bundle.appendSimple(DoubleVector.wrap(p), labels[bestc], models[bestc]);
++kept;
}
}
}
return bundle;
}
use of de.lmu.ifi.dbs.elki.data.DoubleVector in project elki by elki-project.
the class RandomGeneratedReferencePoints method getReferencePoints.
@Override
public Collection<? extends NumberVector> getReferencePoints(Relation<? extends NumberVector> db) {
double[][] minmax = RelationUtil.computeMinMax(db);
int dim = RelationUtil.dimensionality(db);
// Compute mean and extend from minmax.
double[] mean = minmax[0], delta = minmax[1];
for (int d = 0; d < dim; d++) {
delta[d] -= mean[d];
mean[d] -= delta[d] * .5;
}
Random rand = rnd.getSingleThreadedRandom();
ArrayList<DoubleVector> result = new ArrayList<>(samplesize);
for (int i = 0; i < samplesize; i++) {
double[] vec = new double[dim];
for (int d = 0; d < dim; d++) {
vec[d] = mean[d] + (rand.nextDouble() - 0.5) * scale * delta[d];
}
result.add(DoubleVector.wrap(vec));
}
return result;
}
use of de.lmu.ifi.dbs.elki.data.DoubleVector in project elki by elki-project.
the class ArrayAdapterDatabaseConnection method loadData.
@Override
public MultipleObjectsBundle loadData() {
MultipleObjectsBundle b = new MultipleObjectsBundle();
if (startid != null) {
b.setDBIDs(DBIDFactory.FACTORY.generateStaticDBIDRange(startid, data.length));
}
int mind = Integer.MAX_VALUE, maxd = 0;
List<DoubleVector> vecs = new ArrayList<>(data.length);
for (int i = 0; i < data.length; i++) {
final int d = data[i].length;
mind = d < mind ? d : mind;
maxd = d > maxd ? d : maxd;
vecs.add(DoubleVector.wrap(data[i]));
}
SimpleTypeInformation<DoubleVector> type = new VectorFieldTypeInformation<>(DoubleVector.FACTORY, mind, maxd, DoubleVector.FACTORY.getDefaultSerializer());
b.appendColumn(type, vecs);
if (labels != null) {
if (labels.length != data.length) {
throw new AbortException("Label and DBID columns must have the same size.");
}
b.appendColumn(TypeUtil.STRING, Arrays.asList(labels));
}
return invokeBundleFilters(b);
}
use of de.lmu.ifi.dbs.elki.data.DoubleVector in project elki by elki-project.
the class PartialVAFile method calculateSelectivityCoeffs.
/**
* Calculate selectivity coefficients.
*
* @param daFiles List of files to use
* @param query Query vector
* @param epsilon Epsilon radius
*/
protected static void calculateSelectivityCoeffs(List<DoubleObjPair<DAFile>> daFiles, NumberVector query, double epsilon) {
final int dimensions = query.getDimensionality();
double[] lowerVals = new double[dimensions];
double[] upperVals = new double[dimensions];
VectorApproximation queryApprox = calculatePartialApproximation(null, query, daFiles);
for (int i = 0; i < dimensions; i++) {
final double val = query.doubleValue(i);
lowerVals[i] = val - epsilon;
upperVals[i] = val + epsilon;
}
DoubleVector lowerEpsilon = DoubleVector.wrap(lowerVals);
VectorApproximation lowerEpsilonPartitions = calculatePartialApproximation(null, lowerEpsilon, daFiles);
DoubleVector upperEpsilon = DoubleVector.wrap(upperVals);
VectorApproximation upperEpsilonPartitions = calculatePartialApproximation(null, upperEpsilon, daFiles);
for (int i = 0; i < daFiles.size(); i++) {
int coeff = (queryApprox.getApproximation(i) - lowerEpsilonPartitions.getApproximation(i)) + (upperEpsilonPartitions.getApproximation(i) - queryApprox.getApproximation(i)) + 1;
daFiles.get(i).first = coeff;
}
}
use of de.lmu.ifi.dbs.elki.data.DoubleVector in project elki by elki-project.
the class ComputeKNNOutlierScores method run.
@Override
public void run() {
final Database database = inputstep.getDatabase();
final Relation<O> relation = database.getRelation(distf.getInputTypeRestriction());
// Ensure we don't go beyond the relation size:
final int maxk = Math.min(this.maxk, relation.size() - 1);
// Get a KNN query.
final int lim = Math.min(maxk + 2, relation.size());
KNNQuery<O> knnq = QueryUtil.getKNNQuery(relation, distf, lim);
// Precompute kNN:
if (!(knnq instanceof PreprocessorKNNQuery)) {
MaterializeKNNPreprocessor<O> preproc = new MaterializeKNNPreprocessor<>(relation, distf, lim);
preproc.initialize();
relation.getHierarchy().add(relation, preproc);
}
// Test that we now get a proper index query
knnq = QueryUtil.getKNNQuery(relation, distf, lim);
if (!(knnq instanceof PreprocessorKNNQuery)) {
throw new AbortException("Not using preprocessor knn query -- KNN queries using class: " + knnq.getClass());
}
// Warn for some known slow methods and large k:
if (!isDisabled("LDOF") && maxk > 100) {
LOG.verbose("Note: LODF needs O(k^2) distance computations. Use -" + Parameterizer.DISABLE_ID.getName() + " LDOF to disable.");
}
if (!isDisabled("FastABOD") && maxk > 100) {
LOG.warning("Note: FastABOD needs quadratic memory. Use -" + Parameterizer.DISABLE_ID.getName() + " FastABOD to disable.");
}
if (!isDisabled("DWOF") && maxk > 100) {
LOG.warning("Note: DWOF needs O(k^2) distance computations. Use -" + Parameterizer.DISABLE_ID.getName() + " DWOF to disable.");
}
final DBIDs ids = relation.getDBIDs();
try (PrintStream fout = new PrintStream(outfile)) {
// Control: print the DBIDs in case we are seeing an odd iteration
//
fout.append("# Data set size: " + relation.size()).append(" data type: " + relation.getDataTypeInformation()).append(FormatUtil.NEWLINE);
// Label outlier result (reference)
writeResult(fout, ids, bylabel.run(database), new IdentityScaling(), "bylabel");
final int startk = (this.startk > 0) ? this.startk : this.stepk;
final int startkmin2 = (startk >= 2) ? startk : (startk + stepk);
final int startkmin3 = (startk >= 3) ? startk : (startkmin2 >= 3) ? startkmin2 : (startkmin2 + stepk);
// Output function:
BiConsumer<String, OutlierResult> out = (kstr, result) -> writeResult(fout, ids, result, scaling, kstr);
// KNN
runForEachK(//
"KNN", //
startk, //
stepk, //
maxk, k -> //
new KNNOutlier<O>(distf, k).run(database, relation), out);
// KNN Weight
runForEachK(//
"KNNW", //
startk, //
stepk, //
maxk, k -> //
new KNNWeightOutlier<O>(distf, k).run(database, relation), out);
// Run LOF
runForEachK(//
"LOF", //
startk, //
stepk, //
maxk, k -> //
new LOF<O>(k, distf).run(database, relation), out);
// Run Simplified-LOF
runForEachK(//
"SimplifiedLOF", //
startk, //
stepk, //
maxk, k -> //
new SimplifiedLOF<O>(k, distf).run(database, relation), out);
// LoOP
runForEachK(//
"LoOP", //
startk, //
stepk, //
maxk, k -> //
new LoOP<O>(k, k, distf, distf, 1.0).run(database, relation), out);
// LDOF
runForEachK(//
"LDOF", //
startkmin2, //
stepk, //
maxk, k -> //
new LDOF<O>(distf, k).run(database, relation), out);
// Run ODIN
runForEachK(//
"ODIN", //
startk, //
stepk, //
maxk, k -> //
new ODIN<O>(distf, k).run(database, relation), out);
// Run FastABOD
runForEachK(//
"FastABOD", //
startkmin3, //
stepk, //
maxk, k -> //
new FastABOD<O>(new PolynomialKernelFunction(2), k).run(database, relation), out);
// Run KDEOS with intrinsic dimensionality 2.
runForEachK(//
"KDEOS", //
startkmin2, //
stepk, //
maxk, k -> new //
KDEOS<O>(//
distf, //
k, //
k, //
GaussianKernelDensityFunction.KERNEL, //
0., 0.5 * GaussianKernelDensityFunction.KERNEL.canonicalBandwidth(), //
2).run(database, relation), out);
// Run LDF
runForEachK(//
"LDF", //
startk, //
stepk, //
maxk, k -> //
new LDF<O>(k, distf, GaussianKernelDensityFunction.KERNEL, 1., .1).run(database, relation), out);
// Run INFLO
runForEachK(//
"INFLO", //
startk, //
stepk, //
maxk, k -> //
new INFLO<O>(distf, 1.0, k).run(database, relation), out);
// Run COF
runForEachK(//
"COF", //
startk, //
stepk, //
maxk, k -> //
new COF<O>(k, distf).run(database, relation), out);
// Run simple Intrinsic dimensionality
runForEachK(//
"Intrinsic", //
startkmin2, //
stepk, //
maxk, k -> //
new IntrinsicDimensionalityOutlier<O>(distf, k, AggregatedHillEstimator.STATIC).run(database, relation), out);
// Run IDOS
runForEachK(//
"IDOS", //
startkmin2, //
stepk, //
maxk, k -> //
new IDOS<O>(distf, AggregatedHillEstimator.STATIC, k, k).run(database, relation), out);
// Run simple kernel-density LOF variant
runForEachK(//
"KDLOF", //
startkmin2, //
stepk, //
maxk, k -> //
new SimpleKernelDensityLOF<O>(k, distf, GaussianKernelDensityFunction.KERNEL).run(database, relation), out);
// Run DWOF (need pairwise distances, too)
runForEachK(//
"DWOF", //
startkmin2, //
stepk, //
maxk, k -> //
new DWOF<O>(distf, k, 1.1).run(database, relation), out);
// Run LIC
runForEachK(//
"LIC", //
startk, //
stepk, //
maxk, k -> //
new LocalIsolationCoefficient<O>(distf, k).run(database, relation), out);
// Run VOV (requires a vector field).
if (TypeUtil.DOUBLE_VECTOR_FIELD.isAssignableFromType(relation.getDataTypeInformation())) {
@SuppressWarnings("unchecked") final DistanceFunction<? super DoubleVector> df = (DistanceFunction<? super DoubleVector>) distf;
@SuppressWarnings("unchecked") final Relation<DoubleVector> rel = (Relation<DoubleVector>) (Relation<?>) relation;
runForEachK(//
"VOV", //
startk, //
stepk, //
maxk, k -> //
new VarianceOfVolume<DoubleVector>(k, df).run(database, rel), out);
}
// Run KNN DD
runForEachK(//
"KNNDD", //
startk, //
stepk, //
maxk, k -> //
new KNNDD<O>(distf, k).run(database, relation), out);
// Run KNN SOS
runForEachK(//
"KNNSOS", //
startk, //
stepk, //
maxk, k -> //
new KNNSOS<O>(distf, k).run(relation), out);
// Run ISOS
runForEachK(//
"ISOS", //
startkmin2, //
stepk, //
maxk, k -> //
new ISOS<O>(distf, k, AggregatedHillEstimator.STATIC).run(relation), out);
} catch (FileNotFoundException e) {
throw new AbortException("Cannot create output file.", e);
}
}
Aggregations