use of de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle in project elki by elki-project.
the class PresortedBlindJoinDatabaseConnection method loadData.
@Override
public MultipleObjectsBundle loadData() {
List<MultipleObjectsBundle> bundles = new ArrayList<>(sources.size());
for (DatabaseConnection dbc : sources) {
bundles.add(dbc.loadData());
}
MultipleObjectsBundle first = bundles.get(0);
// Process additional columns
for (int c = 1; c < sources.size(); c++) {
MultipleObjectsBundle cur = bundles.get(c);
if (cur.dataLength() != first.dataLength()) {
throw new AbortException("Data set sizes do not agree - cannot join!");
}
for (int i = 0; i < cur.metaLength(); i++) {
first.appendColumn(cur.meta(i), cur.getColumn(i));
}
}
return invokeBundleFilters(first);
}
use of de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle in project elki by elki-project.
the class AbstractConversionFilter method filter.
/**
* A standard implementation of the filter process. First of all, all suitable
* representations are found. Then (if {@link #prepareStart} returns true),
* the data is processed read-only in a first pass.
*
* In the main pass, each object is then filtered using
* {@link #filterSingleObject}.
*
* @param objects Objects to filter
* @return Filtered bundle
*/
@Override
public MultipleObjectsBundle filter(MultipleObjectsBundle objects) {
if (objects.dataLength() == 0) {
return objects;
}
MultipleObjectsBundle bundle = new MultipleObjectsBundle();
final Logging logger = getLogger();
for (int r = 0; r < objects.metaLength(); r++) {
@SuppressWarnings("unchecked") SimpleTypeInformation<Object> type = (SimpleTypeInformation<Object>) objects.meta(r);
@SuppressWarnings("unchecked") final List<Object> column = (List<Object>) objects.getColumn(r);
if (!getInputTypeRestriction().isAssignableFromType(type)) {
bundle.appendColumn(type, column);
continue;
}
// Get the replacement type information
@SuppressWarnings("unchecked") final SimpleTypeInformation<I> castType = (SimpleTypeInformation<I>) type;
// When necessary, perform an initialization scan
if (prepareStart(castType)) {
FiniteProgress pprog = logger.isVerbose() ? new FiniteProgress("Preparing normalization", objects.dataLength(), logger) : null;
for (Object o : column) {
@SuppressWarnings("unchecked") final I obj = (I) o;
prepareProcessInstance(obj);
logger.incrementProcessed(pprog);
}
logger.ensureCompleted(pprog);
prepareComplete();
}
@SuppressWarnings("unchecked") final List<O> castColumn = (List<O>) column;
bundle.appendColumn(convertedType(castType), castColumn);
// Normalization scan
FiniteProgress nprog = logger.isVerbose() ? new FiniteProgress("Data normalization", objects.dataLength(), logger) : null;
for (int i = 0; i < objects.dataLength(); i++) {
@SuppressWarnings("unchecked") final I obj = (I) column.get(i);
final O normalizedObj = filterSingleObject(obj);
castColumn.set(i, normalizedObj);
logger.incrementProcessed(nprog);
}
logger.ensureCompleted(nprog);
}
return bundle;
}
use of de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle in project elki by elki-project.
the class SplitNumberVectorFilter method filter.
@Override
public MultipleObjectsBundle filter(MultipleObjectsBundle objects) {
if (objects.dataLength() == 0) {
return objects;
}
MultipleObjectsBundle bundle = new MultipleObjectsBundle();
for (int r = 0; r < objects.metaLength(); r++) {
@SuppressWarnings("unchecked") SimpleTypeInformation<Object> type = (SimpleTypeInformation<Object>) objects.meta(r);
@SuppressWarnings("unchecked") final List<Object> column = (List<Object>) objects.getColumn(r);
if (!getInputTypeRestriction().isAssignableFromType(type)) {
bundle.appendColumn(type, column);
continue;
}
// Should be a vector type after above test.
@SuppressWarnings("unchecked") final VectorFieldTypeInformation<V> vtype = VectorFieldTypeInformation.class.cast(type);
NumberVector.Factory<V> factory = FilterUtil.guessFactory(vtype);
// Get the replacement type informations
VectorFieldTypeInformation<V> type1 = new VectorFieldTypeInformation<>(factory, dims.length);
VectorFieldTypeInformation<V> type2 = new VectorFieldTypeInformation<>(factory, vtype.getDimensionality() - dims.length);
final List<V> col1 = new ArrayList<>(column.size());
final List<V> col2 = new ArrayList<>(column.size());
bundle.appendColumn(type1, col1);
bundle.appendColumn(type2, col2);
// Build other dimensions array.
int[] odims = new int[vtype.getDimensionality() - dims.length];
{
int i = 0;
for (int d = 0; d < vtype.getDimensionality(); d++) {
boolean found = false;
for (int j = 0; j < dims.length; j++) {
if (dims[j] == d) {
found = true;
break;
}
}
if (!found) {
if (i >= odims.length) {
throw new AbortException("Dimensionalities not proper!");
}
odims[i] = d;
i++;
}
}
}
// Splitting scan.
for (int i = 0; i < objects.dataLength(); i++) {
@SuppressWarnings("unchecked") final V obj = (V) column.get(i);
double[] part1 = new double[dims.length];
double[] part2 = new double[obj.getDimensionality() - dims.length];
for (int d = 0; d < dims.length; d++) {
part1[d] = obj.doubleValue(dims[d]);
}
for (int d = 0; d < odims.length; d++) {
part2[d] = obj.doubleValue(odims[d]);
}
col1.add(factory.newNumberVector(part1));
col2.add(factory.newNumberVector(part2));
}
}
return bundle;
}
use of de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle in project elki by elki-project.
the class ClassLabelFilter method filter.
@Override
public MultipleObjectsBundle filter(MultipleObjectsBundle objects) {
MultipleObjectsBundle bundle = new MultipleObjectsBundle();
// Find a labellist column
boolean done = false;
boolean keeplabelcol = false;
for (int i = 0; i < objects.metaLength(); i++) {
SimpleTypeInformation<?> meta = objects.meta(i);
// Skip non-labellist columns - or if we already had a labellist
if (done || !LabelList.class.equals(meta.getRestrictionClass())) {
bundle.appendColumn(meta, objects.getColumn(i));
continue;
}
done = true;
// We split the label column into two parts
List<ClassLabel> clscol = new ArrayList<>(objects.dataLength());
List<LabelList> lblcol = new ArrayList<>(objects.dataLength());
ArrayList<String> lbuf = new ArrayList<>();
// Split the column
for (Object obj : objects.getColumn(i)) {
if (obj != null) {
LabelList ll = (LabelList) obj;
int off = (classLabelIndex >= 0) ? classLabelIndex : (ll.size() - classLabelIndex);
try {
ClassLabel lbl = classLabelFactory.makeFromString(ll.get(off));
clscol.add(lbl);
} catch (Exception e) {
throw new AbortException("Cannot initialize class labels: " + e.getMessage(), e);
}
lbuf.clear();
for (int j = 0; j < ll.size(); j++) {
if (j == off) {
continue;
}
lbuf.add(ll.get(j));
}
lblcol.add(LabelList.make(lbuf));
if (!lbuf.isEmpty()) {
keeplabelcol = true;
}
} else {
clscol.add(null);
lblcol.add(null);
}
}
bundle.appendColumn(classLabelFactory.getTypeInformation(), clscol);
// Only add the label column when it's not empty.
if (keeplabelcol) {
bundle.appendColumn(meta, lblcol);
}
}
return bundle;
}
use of de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle in project elki by elki-project.
the class NoOpFilterTest method passthrough.
@Test
public void passthrough() {
String filename = UNITTEST + "normalization-test-1.csv";
NoOpFilter filter = new ELKIBuilder<>(NoOpFilter.class).build();
MultipleObjectsBundle filteredBundle = readBundle(filename, filter);
// Load the test data again without a filter.
MultipleObjectsBundle unfilteredBundle = readBundle(filename);
// Check dimensionality
assertEquals("Dimensionality", getFieldDimensionality(unfilteredBundle, 0, TypeUtil.NUMBER_VECTOR_FIELD), getFieldDimensionality(filteredBundle, 0, TypeUtil.NUMBER_VECTOR_FIELD));
// Verify that approximately p% of the values were sampled.
assertEquals("Unexpected bundle length", unfilteredBundle.dataLength(), filteredBundle.dataLength());
}
Aggregations