use of de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation in project elki by elki-project.
the class AbstractConversionFilter method filter.
/**
* A standard implementation of the filter process. First of all, all suitable
* representations are found. Then (if {@link #prepareStart} returns true),
* the data is processed read-only in a first pass.
*
* In the main pass, each object is then filtered using
* {@link #filterSingleObject}.
*
* @param objects Objects to filter
* @return Filtered bundle
*/
@Override
public MultipleObjectsBundle filter(MultipleObjectsBundle objects) {
if (objects.dataLength() == 0) {
return objects;
}
MultipleObjectsBundle bundle = new MultipleObjectsBundle();
final Logging logger = getLogger();
for (int r = 0; r < objects.metaLength(); r++) {
@SuppressWarnings("unchecked") SimpleTypeInformation<Object> type = (SimpleTypeInformation<Object>) objects.meta(r);
@SuppressWarnings("unchecked") final List<Object> column = (List<Object>) objects.getColumn(r);
if (!getInputTypeRestriction().isAssignableFromType(type)) {
bundle.appendColumn(type, column);
continue;
}
// Get the replacement type information
@SuppressWarnings("unchecked") final SimpleTypeInformation<I> castType = (SimpleTypeInformation<I>) type;
// When necessary, perform an initialization scan
if (prepareStart(castType)) {
FiniteProgress pprog = logger.isVerbose() ? new FiniteProgress("Preparing normalization", objects.dataLength(), logger) : null;
for (Object o : column) {
@SuppressWarnings("unchecked") final I obj = (I) o;
prepareProcessInstance(obj);
logger.incrementProcessed(pprog);
}
logger.ensureCompleted(pprog);
prepareComplete();
}
@SuppressWarnings("unchecked") final List<O> castColumn = (List<O>) column;
bundle.appendColumn(convertedType(castType), castColumn);
// Normalization scan
FiniteProgress nprog = logger.isVerbose() ? new FiniteProgress("Data normalization", objects.dataLength(), logger) : null;
for (int i = 0; i < objects.dataLength(); i++) {
@SuppressWarnings("unchecked") final I obj = (I) column.get(i);
final O normalizedObj = filterSingleObject(obj);
castColumn.set(i, normalizedObj);
logger.incrementProcessed(nprog);
}
logger.ensureCompleted(nprog);
}
return bundle;
}
use of de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation in project elki by elki-project.
the class AbstractStreamConversionFilter method nextEvent.
@Override
public Event nextEvent() {
Event ev = source.nextEvent();
if (ev == Event.META_CHANGED) {
if (meta == null) {
meta = new BundleMeta();
}
BundleMeta origmeta = source.getMeta();
for (int i = meta.size(); i < origmeta.size(); i++) {
if (column < 0) {
@SuppressWarnings("unchecked") SimpleTypeInformation<Object> type = (SimpleTypeInformation<Object>) origmeta.get(i);
// Test whether this type matches
if (getInputTypeRestriction().isAssignableFromType(type)) {
@SuppressWarnings("unchecked") final SimpleTypeInformation<I> castType = (SimpleTypeInformation<I>) type;
meta.add(convertedType(castType));
column = i;
continue;
}
}
meta.add(origmeta.get(i));
}
}
return ev;
}
use of de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation in project elki by elki-project.
the class SplitNumberVectorFilter method filter.
@Override
public MultipleObjectsBundle filter(MultipleObjectsBundle objects) {
if (objects.dataLength() == 0) {
return objects;
}
MultipleObjectsBundle bundle = new MultipleObjectsBundle();
for (int r = 0; r < objects.metaLength(); r++) {
@SuppressWarnings("unchecked") SimpleTypeInformation<Object> type = (SimpleTypeInformation<Object>) objects.meta(r);
@SuppressWarnings("unchecked") final List<Object> column = (List<Object>) objects.getColumn(r);
if (!getInputTypeRestriction().isAssignableFromType(type)) {
bundle.appendColumn(type, column);
continue;
}
// Should be a vector type after above test.
@SuppressWarnings("unchecked") final VectorFieldTypeInformation<V> vtype = VectorFieldTypeInformation.class.cast(type);
NumberVector.Factory<V> factory = FilterUtil.guessFactory(vtype);
// Get the replacement type informations
VectorFieldTypeInformation<V> type1 = new VectorFieldTypeInformation<>(factory, dims.length);
VectorFieldTypeInformation<V> type2 = new VectorFieldTypeInformation<>(factory, vtype.getDimensionality() - dims.length);
final List<V> col1 = new ArrayList<>(column.size());
final List<V> col2 = new ArrayList<>(column.size());
bundle.appendColumn(type1, col1);
bundle.appendColumn(type2, col2);
// Build other dimensions array.
int[] odims = new int[vtype.getDimensionality() - dims.length];
{
int i = 0;
for (int d = 0; d < vtype.getDimensionality(); d++) {
boolean found = false;
for (int j = 0; j < dims.length; j++) {
if (dims[j] == d) {
found = true;
break;
}
}
if (!found) {
if (i >= odims.length) {
throw new AbortException("Dimensionalities not proper!");
}
odims[i] = d;
i++;
}
}
}
// Splitting scan.
for (int i = 0; i < objects.dataLength(); i++) {
@SuppressWarnings("unchecked") final V obj = (V) column.get(i);
double[] part1 = new double[dims.length];
double[] part2 = new double[obj.getDimensionality() - dims.length];
for (int d = 0; d < dims.length; d++) {
part1[d] = obj.doubleValue(dims[d]);
}
for (int d = 0; d < odims.length; d++) {
part2[d] = obj.doubleValue(odims[d]);
}
col1.add(factory.newNumberVector(part1));
col2.add(factory.newNumberVector(part2));
}
}
return bundle;
}
use of de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation in project elki by elki-project.
the class AttributeWiseCDFNormalization method filter.
@Override
public MultipleObjectsBundle filter(MultipleObjectsBundle objects) {
if (objects.dataLength() == 0) {
return objects;
}
for (int r = 0; r < objects.metaLength(); r++) {
SimpleTypeInformation<?> type = (SimpleTypeInformation<?>) objects.meta(r);
final List<?> column = (List<?>) objects.getColumn(r);
if (!TypeUtil.NUMBER_VECTOR_FIELD.isAssignableFromType(type)) {
continue;
}
@SuppressWarnings("unchecked") final List<V> castColumn = (List<V>) column;
// Get the replacement type information
@SuppressWarnings("unchecked") final VectorFieldTypeInformation<V> castType = (VectorFieldTypeInformation<V>) type;
factory = FilterUtil.guessFactory(castType);
// Scan to find the best
final int dim = castType.getDimensionality();
dists = new ArrayList<>(dim);
// Scratch space for testing:
double[] test = estimators.size() > 1 ? new double[castColumn.size()] : null;
// We iterate over dimensions, this kind of filter needs fast random
// access.
Adapter adapter = new Adapter();
for (int d = 0; d < dim; d++) {
adapter.dim = d;
Distribution dist = findBestFit(castColumn, adapter, d, test);
// We want them to remain 0, instead of - usually - becoming constant .5
if (dist instanceof UniformDistribution) {
dist = constantZero(castColumn, adapter) ? new UniformDistribution(0., 1.) : dist;
}
dists.add(dist);
}
// Normalization scan
double[] buf = new double[dim];
for (int i = 0; i < objects.dataLength(); i++) {
final V obj = castColumn.get(i);
for (int d = 0; d < dim; d++) {
buf[d] = dists.get(d).cdf(obj.doubleValue(d));
}
castColumn.set(i, factory.newNumberVector(buf));
}
}
return objects;
}
use of de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation in project elki by elki-project.
the class AttributeWiseMADNormalization method filter.
@Override
public MultipleObjectsBundle filter(MultipleObjectsBundle objects) {
if (objects.dataLength() == 0) {
return objects;
}
for (int r = 0; r < objects.metaLength(); r++) {
SimpleTypeInformation<?> type = (SimpleTypeInformation<?>) objects.meta(r);
final List<?> column = (List<?>) objects.getColumn(r);
if (!TypeUtil.NUMBER_VECTOR_FIELD.isAssignableFromType(type)) {
continue;
}
@SuppressWarnings("unchecked") final List<V> castColumn = (List<V>) column;
// Get the replacement type information
@SuppressWarnings("unchecked") final VectorFieldTypeInformation<V> castType = (VectorFieldTypeInformation<V>) type;
factory = FilterUtil.guessFactory(castType);
// Scan to find the best
final int dim = castType.getDimensionality();
median = new double[dim];
imadsigma = new double[dim];
// Scratch space for testing:
double[] test = new double[castColumn.size()];
FiniteProgress dprog = LOG.isVerbose() ? new FiniteProgress("Analyzing data", dim, LOG) : null;
// access.
for (int d = 0; d < dim; d++) {
for (int i = 0; i < test.length; i++) {
test[i] = castColumn.get(i).doubleValue(d);
}
final double med = QuickSelect.median(test);
median[d] = med;
int zeros = 0;
for (int i = 0; i < test.length; i++) {
if ((test[i] = Math.abs(test[i] - med)) == 0.) {
zeros++;
}
}
// Rescale the true MAD for the best standard deviation estimate:
if (zeros < (test.length >>> 1)) {
imadsigma[d] = NormalDistribution.PHIINV075 / QuickSelect.median(test);
} else if (zeros == test.length) {
LOG.warning("Constant attribute detected. Using MAD=1.");
// Does not matter. Constant distribution.
imadsigma[d] = 1.;
} else {
// We have more than 50% zeros, so the regular MAD estimate does not
// work. Generalize the MAD approach to use the 50% non-zero value:
final int rank = zeros + ((test.length - zeros) >> 1);
final double rel = .5 + rank * .5 / test.length;
imadsigma[d] = NormalDistribution.quantile(0., 1., rel) / QuickSelect.quickSelect(test, rank);
LOG.warning("Near-constant attribute detected. Using modified MAD.");
}
LOG.incrementProcessed(dprog);
}
LOG.ensureCompleted(dprog);
FiniteProgress nprog = LOG.isVerbose() ? new FiniteProgress("Data normalization", objects.dataLength(), LOG) : null;
// Normalization scan
double[] buf = new double[dim];
for (int i = 0; i < objects.dataLength(); i++) {
final V obj = castColumn.get(i);
for (int d = 0; d < dim; d++) {
buf[d] = normalize(d, obj.doubleValue(d));
}
castColumn.set(i, factory.newNumberVector(buf));
LOG.incrementProcessed(nprog);
}
LOG.ensureCompleted(nprog);
}
return objects;
}
Aggregations