use of de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation in project elki by elki-project.
the class RepresentativeUncertainClustering method runClusteringAlgorithm.
/**
* Run a clustering algorithm on a single instance.
*
* @param parent Parent result to attach to
* @param ids Object IDs to process
* @param store Input data
* @param dim Dimensionality
* @param title Title of relation
* @return Clustering result
*/
protected Clustering<?> runClusteringAlgorithm(ResultHierarchy hierarchy, Result parent, DBIDs ids, DataStore<DoubleVector> store, int dim, String title) {
SimpleTypeInformation<DoubleVector> t = new VectorFieldTypeInformation<>(DoubleVector.FACTORY, dim);
Relation<DoubleVector> sample = new MaterializedRelation<>(t, ids, title, store);
ProxyDatabase d = new ProxyDatabase(ids, sample);
Clustering<?> clusterResult = samplesAlgorithm.run(d);
d.getHierarchy().remove(sample);
d.getHierarchy().remove(clusterResult);
hierarchy.add(parent, sample);
hierarchy.add(sample, clusterResult);
return clusterResult;
}
use of de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation in project elki by elki-project.
the class GeneratorMain method generate.
/**
* Main loop to generate data set.
*
* @return Generated data set
*/
public MultipleObjectsBundle generate() {
// we actually need some clusters.
if (generators.isEmpty()) {
throw new AbortException("No clusters specified.");
}
// Assert that cluster dimensions agree.
final int dim = generators.get(0).getDim();
for (GeneratorInterface c : generators) {
if (c.getDim() != dim) {
throw new AbortException("Cluster dimensions do not agree.");
}
}
// Prepare result bundle
MultipleObjectsBundle bundle = new MultipleObjectsBundle();
VectorFieldTypeInformation<DoubleVector> type = new VectorFieldTypeInformation<>(DoubleVector.FACTORY, dim);
bundle.appendColumn(type, new ArrayList<>());
bundle.appendColumn(TypeUtil.CLASSLABEL, new ArrayList<>());
bundle.appendColumn(Model.TYPE, new ArrayList<Model>());
// generate clusters
ClassLabel[] labels = new ClassLabel[generators.size()];
Model[] models = new Model[generators.size()];
initLabelsAndModels(generators, labels, models, relabelClusters);
final AssignPoint assignment;
if (!testAgainstModel) {
assignment = new AssignPoint();
} else if (relabelClusters == null) {
assignment = new TestModel();
} else if (!relabelDistance) {
assignment = new AssignLabelsByDensity(labels);
} else {
assignment = new AssignLabelsByDistance(labels);
}
for (int i = 0; i < labels.length; i++) {
final GeneratorInterface curclus = generators.get(i);
assignment.newCluster(i, curclus);
// Only dynamic generators allow rejection / model testing:
GeneratorInterfaceDynamic cursclus = (curclus instanceof GeneratorInterfaceDynamic) ? (GeneratorInterfaceDynamic) curclus : null;
int kept = 0;
while (kept < curclus.getSize()) {
// generate the "missing" number of points
List<double[]> newp = curclus.generate(curclus.getSize() - kept);
for (double[] p : newp) {
int bestc = assignment.getAssignment(i, p);
if (bestc < 0) {
cursclus.incrementDiscarded();
continue;
}
bundle.appendSimple(DoubleVector.wrap(p), labels[bestc], models[bestc]);
++kept;
}
}
}
return bundle;
}
use of de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation in project elki by elki-project.
the class ArrayAdapterDatabaseConnection method loadData.
@Override
public MultipleObjectsBundle loadData() {
MultipleObjectsBundle b = new MultipleObjectsBundle();
if (startid != null) {
b.setDBIDs(DBIDFactory.FACTORY.generateStaticDBIDRange(startid, data.length));
}
int mind = Integer.MAX_VALUE, maxd = 0;
List<DoubleVector> vecs = new ArrayList<>(data.length);
for (int i = 0; i < data.length; i++) {
final int d = data[i].length;
mind = d < mind ? d : mind;
maxd = d > maxd ? d : maxd;
vecs.add(DoubleVector.wrap(data[i]));
}
SimpleTypeInformation<DoubleVector> type = new VectorFieldTypeInformation<>(DoubleVector.FACTORY, mind, maxd, DoubleVector.FACTORY.getDefaultSerializer());
b.appendColumn(type, vecs);
if (labels != null) {
if (labels.length != data.length) {
throw new AbortException("Label and DBID columns must have the same size.");
}
b.appendColumn(TypeUtil.STRING, Arrays.asList(labels));
}
return invokeBundleFilters(b);
}
use of de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation in project elki by elki-project.
the class AttributeWiseBetaNormalization method filter.
@Override
public MultipleObjectsBundle filter(MultipleObjectsBundle objects) {
if (objects.dataLength() == 0) {
return objects;
}
for (int r = 0; r < objects.metaLength(); r++) {
SimpleTypeInformation<?> type = (SimpleTypeInformation<?>) objects.meta(r);
final List<?> column = (List<?>) objects.getColumn(r);
if (!TypeUtil.NUMBER_VECTOR_FIELD.isAssignableFromType(type)) {
continue;
}
@SuppressWarnings("unchecked") final List<V> castColumn = (List<V>) column;
// Get the replacement type information
@SuppressWarnings("unchecked") final VectorFieldTypeInformation<V> castType = (VectorFieldTypeInformation<V>) type;
factory = FilterUtil.guessFactory(castType);
// Scan to find the best
final int dim = castType.getDimensionality();
dists = new ArrayList<>(dim);
// Scratch space for testing:
double[] test = new double[castColumn.size()];
// We iterate over dimensions, this kind of filter needs fast random
// access.
Adapter adapter = new Adapter();
for (int d = 0; d < dim; d++) {
adapter.dim = d;
Distribution dist = findBestFit(castColumn, adapter, d, test);
if (LOG.isVerbose()) {
LOG.verbose("Best fit for dimension " + d + ": " + dist.toString());
}
dists.add(dist);
}
// Beta distribution for projection
double p = FastMath.pow(alpha, -1 / FastMath.sqrt(dim));
BetaDistribution beta = new BetaDistribution(p, p);
// Normalization scan
double[] buf = new double[dim];
for (int i = 0; i < objects.dataLength(); i++) {
final V obj = castColumn.get(i);
for (int d = 0; d < dim; d++) {
// TODO: when available, use logspace for better numerical precision!
buf[d] = beta.quantile(dists.get(d).cdf(obj.doubleValue(d)));
}
castColumn.set(i, factory.newNumberVector(buf));
}
}
return objects;
}
use of de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation in project elki by elki-project.
the class IntegerRankTieNormalization method filter.
@Override
public MultipleObjectsBundle filter(MultipleObjectsBundle objects) {
final int len = objects.dataLength();
MultipleObjectsBundle bundle = new MultipleObjectsBundle();
int[] order = new int[len];
for (int i = 0; i < len; i++) {
order[i] = i;
}
Sorter comparator = new Sorter();
for (int r = 0; r < objects.metaLength(); r++) {
final SimpleTypeInformation<?> type = objects.meta(r);
final List<?> column = objects.getColumn(r);
if (!TypeUtil.NUMBER_VECTOR_FIELD.isAssignableFromType(type)) {
bundle.appendColumn(type, column);
continue;
}
@SuppressWarnings("unchecked") final List<? extends NumberVector> castColumn = (List<? extends NumberVector>) column;
// Get the replacement type information
final int dim = ((VectorFieldTypeInformation<?>) type).getDimensionality();
final VectorFieldTypeInformation<IntegerVector> outType = new VectorFieldTypeInformation<>(IntegerVector.STATIC, dim);
// Output vectors
int[][] posvecs = new int[len][dim];
// Sort for each dimension
for (int d = 0; d < dim; d++) {
// Sort
comparator.setup(castColumn, d);
IntegerArrayQuickSort.sort(order, comparator);
// Transfer positions to output vectors
for (int sta = 0; sta < order.length; ) {
double v = castColumn.get(order[sta]).doubleValue(d);
// Compute ties
int end = sta + 1;
while (end < order.length && !(v < castColumn.get(order[end]).doubleValue(d))) {
end++;
}
final int pos = (sta + end - 1);
for (int i = sta; i < end; i++) {
posvecs[order[i]][d] = pos;
}
sta = end;
}
}
// Prepare output data
final List<IntegerVector> outColumn = new ArrayList<>(len);
for (int i = 0; i < len; i++) {
outColumn.add(new IntegerVector(posvecs[i]));
}
bundle.appendColumn(outType, outColumn);
}
return bundle;
}
Aggregations