use of de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation in project elki by elki-project.
the class AttributeWiseMADNormalization method filter.
@Override
public MultipleObjectsBundle filter(MultipleObjectsBundle objects) {
if (objects.dataLength() == 0) {
return objects;
}
for (int r = 0; r < objects.metaLength(); r++) {
SimpleTypeInformation<?> type = (SimpleTypeInformation<?>) objects.meta(r);
final List<?> column = (List<?>) objects.getColumn(r);
if (!TypeUtil.NUMBER_VECTOR_FIELD.isAssignableFromType(type)) {
continue;
}
@SuppressWarnings("unchecked") final List<V> castColumn = (List<V>) column;
// Get the replacement type information
@SuppressWarnings("unchecked") final VectorFieldTypeInformation<V> castType = (VectorFieldTypeInformation<V>) type;
factory = FilterUtil.guessFactory(castType);
// Scan to find the best
final int dim = castType.getDimensionality();
median = new double[dim];
imadsigma = new double[dim];
// Scratch space for testing:
double[] test = new double[castColumn.size()];
FiniteProgress dprog = LOG.isVerbose() ? new FiniteProgress("Analyzing data", dim, LOG) : null;
// access.
for (int d = 0; d < dim; d++) {
for (int i = 0; i < test.length; i++) {
test[i] = castColumn.get(i).doubleValue(d);
}
final double med = QuickSelect.median(test);
median[d] = med;
int zeros = 0;
for (int i = 0; i < test.length; i++) {
if ((test[i] = Math.abs(test[i] - med)) == 0.) {
zeros++;
}
}
// Rescale the true MAD for the best standard deviation estimate:
if (zeros < (test.length >>> 1)) {
imadsigma[d] = NormalDistribution.PHIINV075 / QuickSelect.median(test);
} else if (zeros == test.length) {
LOG.warning("Constant attribute detected. Using MAD=1.");
// Does not matter. Constant distribution.
imadsigma[d] = 1.;
} else {
// We have more than 50% zeros, so the regular MAD estimate does not
// work. Generalize the MAD approach to use the 50% non-zero value:
final int rank = zeros + ((test.length - zeros) >> 1);
final double rel = .5 + rank * .5 / test.length;
imadsigma[d] = NormalDistribution.quantile(0., 1., rel) / QuickSelect.quickSelect(test, rank);
LOG.warning("Near-constant attribute detected. Using modified MAD.");
}
LOG.incrementProcessed(dprog);
}
LOG.ensureCompleted(dprog);
FiniteProgress nprog = LOG.isVerbose() ? new FiniteProgress("Data normalization", objects.dataLength(), LOG) : null;
// Normalization scan
double[] buf = new double[dim];
for (int i = 0; i < objects.dataLength(); i++) {
final V obj = castColumn.get(i);
for (int d = 0; d < dim; d++) {
buf[d] = normalize(d, obj.doubleValue(d));
}
castColumn.set(i, factory.newNumberVector(buf));
LOG.incrementProcessed(nprog);
}
LOG.ensureCompleted(nprog);
}
return objects;
}
use of de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation in project elki by elki-project.
the class VectorDimensionalityFilter method updateMeta.
/**
* Update metadata.
*/
private void updateMeta() {
meta = new BundleMeta();
BundleMeta origmeta = source.getMeta();
for (int i = 0; i < origmeta.size(); i++) {
SimpleTypeInformation<?> type = origmeta.get(i);
if (column < 0) {
// Test whether this type matches
if (TypeUtil.NUMBER_VECTOR_VARIABLE_LENGTH.isAssignableFromType(type)) {
if (type instanceof VectorFieldTypeInformation) {
@SuppressWarnings("unchecked") final VectorFieldTypeInformation<V> castType = (VectorFieldTypeInformation<V>) type;
if (dim != -1 && castType.mindim() > dim) {
throw new AbortException("Would filter all vectors: minimum dimensionality " + castType.mindim() + " > desired dimensionality " + dim);
}
if (dim != -1 && castType.maxdim() < dim) {
throw new AbortException("Would filter all vectors: maximum dimensionality " + castType.maxdim() + " < desired dimensionality " + dim);
}
if (dim == -1) {
dim = castType.mindim();
}
if (castType.mindim() == castType.maxdim()) {
meta.add(castType);
column = i;
continue;
}
}
@SuppressWarnings("unchecked") final VectorTypeInformation<V> castType = (VectorTypeInformation<V>) type;
if (dim != -1) {
meta.add(new VectorFieldTypeInformation<>(FilterUtil.guessFactory(castType), dim, dim, castType.getSerializer()));
} else {
LOG.warning("No dimensionality yet for column " + i);
meta.add(castType);
}
column = i;
continue;
}
}
meta.add(type);
}
}
Aggregations