use of de.lmu.ifi.dbs.elki.data.IntegerVector in project elki by elki-project.
the class ReplaceNaNWithRandomFilterTest method parameters.
/**
* Test with standard normal distribution as parameter.
*/
@Test
public void parameters() {
String filename = UNITTEST + "nan-test-1.csv";
ReplaceNaNWithRandomFilter filter = //
new ELKIBuilder<>(ReplaceNaNWithRandomFilter.class).with(//
ReplaceNaNWithRandomFilter.Parameterizer.REPLACEMENT_DISTRIBUTION, new NormalDistribution(0, 1, new Random(0L))).build();
MultipleObjectsBundle filteredBundle = readBundle(filename, filter);
// Load the test data again without a filter.
MultipleObjectsBundle unfilteredBundle = readBundle(filename);
// Ensure the first column are the vectors.
assertTrue("Test file not as expected", TypeUtil.NUMBER_VECTOR_FIELD.isAssignableFromType(filteredBundle.meta(0)));
assertTrue("Test file not as expected", TypeUtil.NUMBER_VECTOR_FIELD.isAssignableFromType(unfilteredBundle.meta(0)));
// This cast is now safe (vector field):
int dimFiltered = ((FieldTypeInformation) unfilteredBundle.meta(0)).getDimensionality();
int dimUnfiltered = ((FieldTypeInformation) unfilteredBundle.meta(0)).getDimensionality();
assertEquals("Dimensionality expected equal", dimFiltered, dimUnfiltered);
// Note the indices of the NaN(s) in the data.
List<IntegerVector> NaNs = new ArrayList<IntegerVector>();
for (int row = 0; row < unfilteredBundle.dataLength(); row++) {
Object obj = unfilteredBundle.data(row, 0);
assertEquals("Unexpected data type", DoubleVector.class, obj.getClass());
DoubleVector d = (DoubleVector) obj;
for (int col = 0; col < dimUnfiltered; col++) {
final double v = d.doubleValue(col);
if (Double.isNaN(v)) {
NaNs.add(new IntegerVector(new int[] { row, col }));
}
}
}
// Verify that at least a single NaN exists in the unfiltered bundle.
assertTrue("NaN expected in unfiltered data", NaNs.size() > 0);
for (IntegerVector iv : NaNs) {
Object obj = filteredBundle.data(iv.intValue(0), 0);
assertEquals("Unexpected data type", DoubleVector.class, obj.getClass());
DoubleVector d = (DoubleVector) obj;
final double v = d.doubleValue(iv.intValue(1));
assertFalse("NaN not expected", Double.isNaN(v));
}
}
use of de.lmu.ifi.dbs.elki.data.IntegerVector in project elki by elki-project.
the class IntegerRankTieNormalizationTest method defaultParameters.
/**
* Test with default parameters.
*/
@Test
public void defaultParameters() {
String filename = UNITTEST + "normalization-test-1.csv";
IntegerRankTieNormalization filter = new ELKIBuilder<>(IntegerRankTieNormalization.class).build();
MultipleObjectsBundle bundle = readBundle(filename, filter);
int dim = getFieldDimensionality(bundle, 0, TypeUtil.NUMBER_VECTOR_FIELD);
IntegerArray coldata = new IntegerArray(bundle.dataLength());
for (int col = 0; col < dim; col++) {
coldata.clear();
// Extract the column:
for (int row = 0; row < bundle.dataLength(); row++) {
IntegerVector obj = get(bundle, row, 0, IntegerVector.class);
coldata.add(obj.intValue(col));
}
// Sort values:
coldata.sort();
// Verify that the gap matches the frequency of each value.
final int size = coldata.size;
assertEquals("First value", coldata.get(0), coldata.get(coldata.get(0)));
for (int i = 0; i < size; ) {
// s: Start, i: end, v: value, f: frequency
int s = i, v = coldata.get(i), f = 1;
while (++i < size && v == coldata.get(i)) {
f++;
}
// Only iff the frequencies is even, the values will be odd.
assertNotSame("Even/odd rule", (f & 1), (v & 1));
assertEquals("Bad value at position " + s, s + i - 1, v);
assertEquals("Bad frequency at position " + s, i - s, f);
}
}
}
use of de.lmu.ifi.dbs.elki.data.IntegerVector in project elki by elki-project.
the class IntegerRankTieNormalization method filter.
@Override
public MultipleObjectsBundle filter(MultipleObjectsBundle objects) {
final int len = objects.dataLength();
MultipleObjectsBundle bundle = new MultipleObjectsBundle();
int[] order = new int[len];
for (int i = 0; i < len; i++) {
order[i] = i;
}
Sorter comparator = new Sorter();
for (int r = 0; r < objects.metaLength(); r++) {
final SimpleTypeInformation<?> type = objects.meta(r);
final List<?> column = objects.getColumn(r);
if (!TypeUtil.NUMBER_VECTOR_FIELD.isAssignableFromType(type)) {
bundle.appendColumn(type, column);
continue;
}
@SuppressWarnings("unchecked") final List<? extends NumberVector> castColumn = (List<? extends NumberVector>) column;
// Get the replacement type information
final int dim = ((VectorFieldTypeInformation<?>) type).getDimensionality();
final VectorFieldTypeInformation<IntegerVector> outType = new VectorFieldTypeInformation<>(IntegerVector.STATIC, dim);
// Output vectors
int[][] posvecs = new int[len][dim];
// Sort for each dimension
for (int d = 0; d < dim; d++) {
// Sort
comparator.setup(castColumn, d);
IntegerArrayQuickSort.sort(order, comparator);
// Transfer positions to output vectors
for (int sta = 0; sta < order.length; ) {
double v = castColumn.get(order[sta]).doubleValue(d);
// Compute ties
int end = sta + 1;
while (end < order.length && !(v < castColumn.get(order[end]).doubleValue(d))) {
end++;
}
final int pos = (sta + end - 1);
for (int i = sta; i < end; i++) {
posvecs[order[i]][d] = pos;
}
sta = end;
}
}
// Prepare output data
final List<IntegerVector> outColumn = new ArrayList<>(len);
for (int i = 0; i < len; i++) {
outColumn.add(new IntegerVector(posvecs[i]));
}
bundle.appendColumn(outType, outColumn);
}
return bundle;
}
Aggregations