use of de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle in project elki by elki-project.
the class HellingerHistogramNormalizationTest method defaultParameters.
/**
* Test with default parameters.
*/
@Test
public void defaultParameters() {
String filename = UNITTEST + "normalization-test-1.csv";
HellingerHistogramNormalization<DoubleVector> filter = new ELKIBuilder<>(HellingerHistogramNormalization.class).build();
MultipleObjectsBundle bundle = readBundle(filename, filter);
int dim = getFieldDimensionality(bundle, 0, TypeUtil.NUMBER_VECTOR_FIELD);
// Verify that, in each vector, the sum of the squares is equal to 1.
for (int row = 0; row < bundle.dataLength(); row++) {
DoubleVector d = get(bundle, row, 0, DoubleVector.class);
double sum = 0.;
for (int col = 0; col < dim; col++) {
final double v = d.doubleValue(col);
sum += v * v;
}
assertEquals("Sum of squares not as expected", .5, sum, 1e-15);
}
}
use of de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle in project elki by elki-project.
the class InstanceLogRankNormalizationTest method defaultParameters.
/**
* Test with default parameters.
*/
@Test
public void defaultParameters() {
String filename = UNITTEST + "normalization-test-1.csv";
InstanceLogRankNormalization<DoubleVector> filter = new ELKIBuilder<>(InstanceLogRankNormalization.class).build();
MultipleObjectsBundle bundle = readBundle(filename, filter);
int dim = getFieldDimensionality(bundle, 0, TypeUtil.NUMBER_VECTOR_FIELD);
// Compute the expected mean and variances..
MeanVariance expected = new MeanVariance();
for (int ii = 0; ii < dim; ii++) {
expected.put(Math.log1p(ii / (double) (dim - 1)) * MathUtil.ONE_BY_LOG2);
}
// The smallest value (except for ties) must be mapped to 0, the largest to
// 1. And (again, except for ties), the mean and variance must match above
// expected values of a uniform distribution.
MeanVarianceMinMax mms = new MeanVarianceMinMax();
for (int row = 0; row < bundle.dataLength(); row++) {
DoubleVector d = get(bundle, row, 0, DoubleVector.class);
for (int col = 0; col < dim; col++) {
mms.put(d.doubleValue(col));
}
assertEquals("Min value is not 0", 0., mms.getMin(), 0);
assertEquals("Max value is not 1", 1., mms.getMax(), 0);
assertEquals("Mean value is not as expected", expected.getMean(), mms.getMean(), 1e-14);
assertEquals("Variance is not as expected", expected.getNaiveVariance(), mms.getNaiveVariance(), 1e-14);
mms.reset();
}
}
use of de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle in project elki by elki-project.
the class DropNaNFilterTest method defaultParameters.
/**
* Test with default parameters.
*/
@Test
public void defaultParameters() {
String filename = UNITTEST + "nan-test-1.csv";
DropNaNFilter filter = new ELKIBuilder<>(DropNaNFilter.class).build();
MultipleObjectsBundle filteredBundle = readBundle(filename, filter);
// Load the test data again without a filter.
MultipleObjectsBundle unfilteredBundle = readBundle(filename);
// Get dimensionalities
int dimFiltered = getFieldDimensionality(filteredBundle, 0, TypeUtil.NUMBER_VECTOR_FIELD);
int dimUnfiltered = getFieldDimensionality(unfilteredBundle, 0, TypeUtil.NUMBER_VECTOR_FIELD);
// Ensure that at least a single NaN exists in the unfiltered bundle.
boolean NaNfound = false;
for (int row = 0; row < unfilteredBundle.dataLength(); row++) {
DoubleVector d = get(unfilteredBundle, row, 0, DoubleVector.class);
for (int col = 0; col < dimUnfiltered; col++) {
final double v = d.doubleValue(col);
if (Double.isNaN(v)) {
NaNfound = true;
break;
}
}
}
assertTrue("NaN expected in unfiltered data", NaNfound);
// Ensure that no single NaN exists in the filtered bundle.
for (int row = 0; row < filteredBundle.dataLength(); row++) {
DoubleVector d = get(filteredBundle, row, 0, DoubleVector.class);
for (int col = 0; col < dimFiltered; col++) {
assertFalse("NaN not expected", Double.isNaN(d.doubleValue(col)));
}
}
}
use of de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle in project elki by elki-project.
the class DBIDRangeDatabaseConnection method loadData.
@Override
public MultipleObjectsBundle loadData() {
MultipleObjectsBundle b = new MultipleObjectsBundle();
b.setDBIDs(DBIDFactory.FACTORY.generateStaticDBIDRange(start, count));
return b;
}
use of de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle in project elki by elki-project.
the class InputStreamDatabaseConnection method loadData.
@Override
public MultipleObjectsBundle loadData() {
// Run parser
if (LOG.isDebugging()) {
LOG.debugFine("Invoking parsers.");
}
// Streaming parsers may yield to stream filters immediately.
if (parser instanceof StreamingParser) {
final StreamingParser streamParser = (StreamingParser) parser;
streamParser.initStream(in);
// normalize objects and transform labels
if (LOG.isDebugging()) {
LOG.debugFine("Parsing as stream.");
}
Duration duration = LOG.isStatistics() ? LOG.newDuration(this.getClass().getName() + ".load").begin() : null;
MultipleObjectsBundle objects = invokeStreamFilters(streamParser).asMultipleObjectsBundle();
parser.cleanup();
if (duration != null) {
LOG.statistics(duration.end());
}
return objects;
} else {
// For non-streaming parsers, we first parse, then filter
Duration duration = LOG.isStatistics() ? LOG.newDuration(this.getClass().getName() + ".parse").begin() : null;
MultipleObjectsBundle parsingResult = parser.parse(in);
parser.cleanup();
if (duration != null) {
LOG.statistics(duration.end());
}
// normalize objects and transform labels
if (LOG.isDebugging()) {
LOG.debugFine("Invoking filters.");
}
Duration fduration = LOG.isStatistics() ? LOG.newDuration(this.getClass().getName() + ".filter").begin() : null;
MultipleObjectsBundle objects = invokeBundleFilters(parsingResult);
if (fduration != null) {
LOG.statistics(fduration.end());
}
return objects;
}
}
Aggregations