use of de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation in project elki by elki-project.
the class UncertainifyFilter method filter.
@Override
public MultipleObjectsBundle filter(MultipleObjectsBundle objects) {
if (objects.dataLength() == 0) {
return objects;
}
MultipleObjectsBundle bundle = new MultipleObjectsBundle();
for (int r = 0; r < objects.metaLength(); r++) {
SimpleTypeInformation<?> type = objects.meta(r);
@SuppressWarnings("unchecked") final List<Object> column = (List<Object>) objects.getColumn(r);
if (!TypeUtil.NUMBER_VECTOR_FIELD.isAssignableFromType(type)) {
bundle.appendColumn(type, column);
continue;
}
// Get the replacement type information
@SuppressWarnings("unchecked") final VectorFieldTypeInformation<NumberVector> castType = (VectorFieldTypeInformation<NumberVector>) type;
final int dim = castType.getDimensionality();
if (keep) {
bundle.appendColumn(type, column);
}
// Uncertain objects produced
final List<UO> uos = new ArrayList<>(column.size());
// Normalization scan
FiniteProgress nprog = LOG.isVerbose() ? new FiniteProgress("Derive uncertain objects", objects.dataLength(), LOG) : null;
for (int i = 0; i < objects.dataLength(); i++) {
final NumberVector obj = (NumberVector) column.get(i);
final UO normalizedObj = generator.newFeatureVector(rand, obj, ArrayLikeUtil.NUMBERVECTORADAPTER);
uos.add(normalizedObj);
LOG.incrementProcessed(nprog);
}
LOG.ensureCompleted(nprog);
// Add column with uncertain objects
bundle.appendColumn(new VectorFieldTypeInformation<UO>(generator.getFactory(), dim), uos);
}
return bundle;
}
use of de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation in project elki by elki-project.
the class AbstractSupervisedProjectionVectorFilter method filter.
@Override
public MultipleObjectsBundle filter(MultipleObjectsBundle objects) {
final int dataLength = objects.dataLength();
if (dataLength == 0) {
return objects;
}
List<? extends ClassLabel> classcolumn = null;
// First of all, identify a class label column.
for (int r = 0; r < objects.metaLength(); r++) {
SimpleTypeInformation<?> type = objects.meta(r);
List<?> column = objects.getColumn(r);
if (TypeUtil.CLASSLABEL.isAssignableFromType(type)) {
@SuppressWarnings("unchecked") final List<? extends ClassLabel> castcolumn = (List<? extends ClassLabel>) column;
classcolumn = castcolumn;
break;
}
}
if (classcolumn == null) {
getLogger().warning("No class label column found (try " + ClassLabelFilter.class.getSimpleName() + ") -- cannot run " + this.getClass().getSimpleName());
return objects;
}
boolean somesuccess = false;
MultipleObjectsBundle bundle = new MultipleObjectsBundle();
// Secondly, look for columns to train the projection on.
for (int r = 0; r < objects.metaLength(); r++) {
SimpleTypeInformation<?> type = objects.meta(r);
List<?> column = objects.getColumn(r);
if (!TypeUtil.NUMBER_VECTOR_FIELD.isAssignableFromType(type)) {
bundle.appendColumn(type, column);
continue;
}
@SuppressWarnings("unchecked") List<V> vectorcolumn = (List<V>) column;
final VectorFieldTypeInformation<?> vtype = (VectorFieldTypeInformation<?>) type;
@SuppressWarnings("unchecked") NumberVector.Factory<V> factory = (NumberVector.Factory<V>) vtype.getFactory();
int dim = vtype.getDimensionality();
if (tdim > dim) {
if (getLogger().isVerbose()) {
getLogger().verbose("Setting projection dimension to original dimension: projection dimension: " + tdim + " larger than original dimension: " + dim);
}
tdim = dim;
}
try {
double[][] proj = computeProjectionMatrix(vectorcolumn, classcolumn, dim);
for (int i = 0; i < dataLength; i++) {
double[] pv = times(proj, vectorcolumn.get(i).toArray());
vectorcolumn.set(i, factory.newNumberVector(pv));
}
bundle.appendColumn(convertedType(type, factory), column);
somesuccess = true;
} catch (Exception e) {
getLogger().error("Projection failed -- continuing with unprojected data!", e);
bundle.appendColumn(type, column);
continue;
}
}
if (!somesuccess) {
getLogger().warning("No vector field of fixed dimensionality found.");
return objects;
}
return bundle;
}
use of de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation in project elki by elki-project.
the class RandomDoubleVectorDatabaseConnection method loadData.
@Override
public MultipleObjectsBundle loadData() {
VectorFieldTypeInformation<DoubleVector> type = new VectorFieldTypeInformation<>(DoubleVector.FACTORY, dim);
List<DoubleVector> vectors = new ArrayList<>(size);
// Setup random generator
final Random rand = rnd.getSingleThreadedRandom();
// Produce random vectors
for (int i = 0; i < size; i++) {
vectors.add(VectorUtil.randomVector(DoubleVector.FACTORY, dim, rand));
}
return MultipleObjectsBundle.makeSimple(type, vectors);
}
use of de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation in project elki by elki-project.
the class SplitNumberVectorFilter method filter.
@Override
public MultipleObjectsBundle filter(MultipleObjectsBundle objects) {
if (objects.dataLength() == 0) {
return objects;
}
MultipleObjectsBundle bundle = new MultipleObjectsBundle();
for (int r = 0; r < objects.metaLength(); r++) {
@SuppressWarnings("unchecked") SimpleTypeInformation<Object> type = (SimpleTypeInformation<Object>) objects.meta(r);
@SuppressWarnings("unchecked") final List<Object> column = (List<Object>) objects.getColumn(r);
if (!getInputTypeRestriction().isAssignableFromType(type)) {
bundle.appendColumn(type, column);
continue;
}
// Should be a vector type after above test.
@SuppressWarnings("unchecked") final VectorFieldTypeInformation<V> vtype = VectorFieldTypeInformation.class.cast(type);
NumberVector.Factory<V> factory = FilterUtil.guessFactory(vtype);
// Get the replacement type informations
VectorFieldTypeInformation<V> type1 = new VectorFieldTypeInformation<>(factory, dims.length);
VectorFieldTypeInformation<V> type2 = new VectorFieldTypeInformation<>(factory, vtype.getDimensionality() - dims.length);
final List<V> col1 = new ArrayList<>(column.size());
final List<V> col2 = new ArrayList<>(column.size());
bundle.appendColumn(type1, col1);
bundle.appendColumn(type2, col2);
// Build other dimensions array.
int[] odims = new int[vtype.getDimensionality() - dims.length];
{
int i = 0;
for (int d = 0; d < vtype.getDimensionality(); d++) {
boolean found = false;
for (int j = 0; j < dims.length; j++) {
if (dims[j] == d) {
found = true;
break;
}
}
if (!found) {
if (i >= odims.length) {
throw new AbortException("Dimensionalities not proper!");
}
odims[i] = d;
i++;
}
}
}
// Splitting scan.
for (int i = 0; i < objects.dataLength(); i++) {
@SuppressWarnings("unchecked") final V obj = (V) column.get(i);
double[] part1 = new double[dims.length];
double[] part2 = new double[obj.getDimensionality() - dims.length];
for (int d = 0; d < dims.length; d++) {
part1[d] = obj.doubleValue(dims[d]);
}
for (int d = 0; d < odims.length; d++) {
part2[d] = obj.doubleValue(odims[d]);
}
col1.add(factory.newNumberVector(part1));
col2.add(factory.newNumberVector(part2));
}
}
return bundle;
}
use of de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation in project elki by elki-project.
the class AttributeWiseCDFNormalization method filter.
@Override
public MultipleObjectsBundle filter(MultipleObjectsBundle objects) {
if (objects.dataLength() == 0) {
return objects;
}
for (int r = 0; r < objects.metaLength(); r++) {
SimpleTypeInformation<?> type = (SimpleTypeInformation<?>) objects.meta(r);
final List<?> column = (List<?>) objects.getColumn(r);
if (!TypeUtil.NUMBER_VECTOR_FIELD.isAssignableFromType(type)) {
continue;
}
@SuppressWarnings("unchecked") final List<V> castColumn = (List<V>) column;
// Get the replacement type information
@SuppressWarnings("unchecked") final VectorFieldTypeInformation<V> castType = (VectorFieldTypeInformation<V>) type;
factory = FilterUtil.guessFactory(castType);
// Scan to find the best
final int dim = castType.getDimensionality();
dists = new ArrayList<>(dim);
// Scratch space for testing:
double[] test = estimators.size() > 1 ? new double[castColumn.size()] : null;
// We iterate over dimensions, this kind of filter needs fast random
// access.
Adapter adapter = new Adapter();
for (int d = 0; d < dim; d++) {
adapter.dim = d;
Distribution dist = findBestFit(castColumn, adapter, d, test);
// We want them to remain 0, instead of - usually - becoming constant .5
if (dist instanceof UniformDistribution) {
dist = constantZero(castColumn, adapter) ? new UniformDistribution(0., 1.) : dist;
}
dists.add(dist);
}
// Normalization scan
double[] buf = new double[dim];
for (int i = 0; i < objects.dataLength(); i++) {
final V obj = castColumn.get(i);
for (int d = 0; d < dim; d++) {
buf[d] = dists.get(d).cdf(obj.doubleValue(d));
}
castColumn.set(i, factory.newNumberVector(buf));
}
}
return objects;
}
Aggregations