Search in sources :

Example 56 with MultipleObjectsBundle

use of de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle in project elki by elki-project.

the class PresortedBlindJoinDatabaseConnection method loadData.

@Override
public MultipleObjectsBundle loadData() {
    List<MultipleObjectsBundle> bundles = new ArrayList<>(sources.size());
    for (DatabaseConnection dbc : sources) {
        bundles.add(dbc.loadData());
    }
    MultipleObjectsBundle first = bundles.get(0);
    // Process additional columns
    for (int c = 1; c < sources.size(); c++) {
        MultipleObjectsBundle cur = bundles.get(c);
        if (cur.dataLength() != first.dataLength()) {
            throw new AbortException("Data set sizes do not agree - cannot join!");
        }
        for (int i = 0; i < cur.metaLength(); i++) {
            first.appendColumn(cur.meta(i), cur.getColumn(i));
        }
    }
    return invokeBundleFilters(first);
}
Also used : MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) ArrayList(java.util.ArrayList) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Example 57 with MultipleObjectsBundle

use of de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle in project elki by elki-project.

the class AbstractConversionFilter method filter.

/**
 * A standard implementation of the filter process. First of all, all suitable
 * representations are found. Then (if {@link #prepareStart} returns true),
 * the data is processed read-only in a first pass.
 *
 * In the main pass, each object is then filtered using
 * {@link #filterSingleObject}.
 *
 * @param objects Objects to filter
 * @return Filtered bundle
 */
@Override
public MultipleObjectsBundle filter(MultipleObjectsBundle objects) {
    if (objects.dataLength() == 0) {
        return objects;
    }
    MultipleObjectsBundle bundle = new MultipleObjectsBundle();
    final Logging logger = getLogger();
    for (int r = 0; r < objects.metaLength(); r++) {
        @SuppressWarnings("unchecked") SimpleTypeInformation<Object> type = (SimpleTypeInformation<Object>) objects.meta(r);
        @SuppressWarnings("unchecked") final List<Object> column = (List<Object>) objects.getColumn(r);
        if (!getInputTypeRestriction().isAssignableFromType(type)) {
            bundle.appendColumn(type, column);
            continue;
        }
        // Get the replacement type information
        @SuppressWarnings("unchecked") final SimpleTypeInformation<I> castType = (SimpleTypeInformation<I>) type;
        // When necessary, perform an initialization scan
        if (prepareStart(castType)) {
            FiniteProgress pprog = logger.isVerbose() ? new FiniteProgress("Preparing normalization", objects.dataLength(), logger) : null;
            for (Object o : column) {
                @SuppressWarnings("unchecked") final I obj = (I) o;
                prepareProcessInstance(obj);
                logger.incrementProcessed(pprog);
            }
            logger.ensureCompleted(pprog);
            prepareComplete();
        }
        @SuppressWarnings("unchecked") final List<O> castColumn = (List<O>) column;
        bundle.appendColumn(convertedType(castType), castColumn);
        // Normalization scan
        FiniteProgress nprog = logger.isVerbose() ? new FiniteProgress("Data normalization", objects.dataLength(), logger) : null;
        for (int i = 0; i < objects.dataLength(); i++) {
            @SuppressWarnings("unchecked") final I obj = (I) column.get(i);
            final O normalizedObj = filterSingleObject(obj);
            castColumn.set(i, normalizedObj);
            logger.incrementProcessed(nprog);
        }
        logger.ensureCompleted(nprog);
    }
    return bundle;
}
Also used : Logging(de.lmu.ifi.dbs.elki.logging.Logging) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) SimpleTypeInformation(de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation) List(java.util.List)

Example 58 with MultipleObjectsBundle

use of de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle in project elki by elki-project.

the class SplitNumberVectorFilter method filter.

@Override
public MultipleObjectsBundle filter(MultipleObjectsBundle objects) {
    if (objects.dataLength() == 0) {
        return objects;
    }
    MultipleObjectsBundle bundle = new MultipleObjectsBundle();
    for (int r = 0; r < objects.metaLength(); r++) {
        @SuppressWarnings("unchecked") SimpleTypeInformation<Object> type = (SimpleTypeInformation<Object>) objects.meta(r);
        @SuppressWarnings("unchecked") final List<Object> column = (List<Object>) objects.getColumn(r);
        if (!getInputTypeRestriction().isAssignableFromType(type)) {
            bundle.appendColumn(type, column);
            continue;
        }
        // Should be a vector type after above test.
        @SuppressWarnings("unchecked") final VectorFieldTypeInformation<V> vtype = VectorFieldTypeInformation.class.cast(type);
        NumberVector.Factory<V> factory = FilterUtil.guessFactory(vtype);
        // Get the replacement type informations
        VectorFieldTypeInformation<V> type1 = new VectorFieldTypeInformation<>(factory, dims.length);
        VectorFieldTypeInformation<V> type2 = new VectorFieldTypeInformation<>(factory, vtype.getDimensionality() - dims.length);
        final List<V> col1 = new ArrayList<>(column.size());
        final List<V> col2 = new ArrayList<>(column.size());
        bundle.appendColumn(type1, col1);
        bundle.appendColumn(type2, col2);
        // Build other dimensions array.
        int[] odims = new int[vtype.getDimensionality() - dims.length];
        {
            int i = 0;
            for (int d = 0; d < vtype.getDimensionality(); d++) {
                boolean found = false;
                for (int j = 0; j < dims.length; j++) {
                    if (dims[j] == d) {
                        found = true;
                        break;
                    }
                }
                if (!found) {
                    if (i >= odims.length) {
                        throw new AbortException("Dimensionalities not proper!");
                    }
                    odims[i] = d;
                    i++;
                }
            }
        }
        // Splitting scan.
        for (int i = 0; i < objects.dataLength(); i++) {
            @SuppressWarnings("unchecked") final V obj = (V) column.get(i);
            double[] part1 = new double[dims.length];
            double[] part2 = new double[obj.getDimensionality() - dims.length];
            for (int d = 0; d < dims.length; d++) {
                part1[d] = obj.doubleValue(dims[d]);
            }
            for (int d = 0; d < odims.length; d++) {
                part2[d] = obj.doubleValue(odims[d]);
            }
            col1.add(factory.newNumberVector(part1));
            col2.add(factory.newNumberVector(part2));
        }
    }
    return bundle;
}
Also used : MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) ArrayList(java.util.ArrayList) SimpleTypeInformation(de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation) NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) VectorFieldTypeInformation(de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation) ArrayList(java.util.ArrayList) List(java.util.List) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Example 59 with MultipleObjectsBundle

use of de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle in project elki by elki-project.

the class ClassLabelFilter method filter.

@Override
public MultipleObjectsBundle filter(MultipleObjectsBundle objects) {
    MultipleObjectsBundle bundle = new MultipleObjectsBundle();
    // Find a labellist column
    boolean done = false;
    boolean keeplabelcol = false;
    for (int i = 0; i < objects.metaLength(); i++) {
        SimpleTypeInformation<?> meta = objects.meta(i);
        // Skip non-labellist columns - or if we already had a labellist
        if (done || !LabelList.class.equals(meta.getRestrictionClass())) {
            bundle.appendColumn(meta, objects.getColumn(i));
            continue;
        }
        done = true;
        // We split the label column into two parts
        List<ClassLabel> clscol = new ArrayList<>(objects.dataLength());
        List<LabelList> lblcol = new ArrayList<>(objects.dataLength());
        ArrayList<String> lbuf = new ArrayList<>();
        // Split the column
        for (Object obj : objects.getColumn(i)) {
            if (obj != null) {
                LabelList ll = (LabelList) obj;
                int off = (classLabelIndex >= 0) ? classLabelIndex : (ll.size() - classLabelIndex);
                try {
                    ClassLabel lbl = classLabelFactory.makeFromString(ll.get(off));
                    clscol.add(lbl);
                } catch (Exception e) {
                    throw new AbortException("Cannot initialize class labels: " + e.getMessage(), e);
                }
                lbuf.clear();
                for (int j = 0; j < ll.size(); j++) {
                    if (j == off) {
                        continue;
                    }
                    lbuf.add(ll.get(j));
                }
                lblcol.add(LabelList.make(lbuf));
                if (!lbuf.isEmpty()) {
                    keeplabelcol = true;
                }
            } else {
                clscol.add(null);
                lblcol.add(null);
            }
        }
        bundle.appendColumn(classLabelFactory.getTypeInformation(), clscol);
        // Only add the label column when it's not empty.
        if (keeplabelcol) {
            bundle.appendColumn(meta, lblcol);
        }
    }
    return bundle;
}
Also used : LabelList(de.lmu.ifi.dbs.elki.data.LabelList) MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) ArrayList(java.util.ArrayList) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException) SimpleClassLabel(de.lmu.ifi.dbs.elki.data.SimpleClassLabel) ClassLabel(de.lmu.ifi.dbs.elki.data.ClassLabel) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Example 60 with MultipleObjectsBundle

use of de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle in project elki by elki-project.

the class NoOpFilterTest method passthrough.

@Test
public void passthrough() {
    String filename = UNITTEST + "normalization-test-1.csv";
    NoOpFilter filter = new ELKIBuilder<>(NoOpFilter.class).build();
    MultipleObjectsBundle filteredBundle = readBundle(filename, filter);
    // Load the test data again without a filter.
    MultipleObjectsBundle unfilteredBundle = readBundle(filename);
    // Check dimensionality
    assertEquals("Dimensionality", getFieldDimensionality(unfilteredBundle, 0, TypeUtil.NUMBER_VECTOR_FIELD), getFieldDimensionality(filteredBundle, 0, TypeUtil.NUMBER_VECTOR_FIELD));
    // Verify that approximately p% of the values were sampled.
    assertEquals("Unexpected bundle length", unfilteredBundle.dataLength(), filteredBundle.dataLength());
}
Also used : MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) AbstractDataSourceTest(de.lmu.ifi.dbs.elki.datasource.AbstractDataSourceTest) Test(org.junit.Test)

Aggregations

MultipleObjectsBundle (de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle)72 AbstractDataSourceTest (de.lmu.ifi.dbs.elki.datasource.AbstractDataSourceTest)37 Test (org.junit.Test)37 DoubleVector (de.lmu.ifi.dbs.elki.data.DoubleVector)27 ArrayList (java.util.ArrayList)19 AbortException (de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)13 NumberVector (de.lmu.ifi.dbs.elki.data.NumberVector)10 ELKIBuilder (de.lmu.ifi.dbs.elki.utilities.ELKIBuilder)10 VectorFieldTypeInformation (de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation)9 MeanVariance (de.lmu.ifi.dbs.elki.math.MeanVariance)8 List (java.util.List)7 LabelList (de.lmu.ifi.dbs.elki.data.LabelList)5 SimpleTypeInformation (de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation)5 InputStreamDatabaseConnection (de.lmu.ifi.dbs.elki.datasource.InputStreamDatabaseConnection)5 InputStream (java.io.InputStream)5 ClassLabel (de.lmu.ifi.dbs.elki.data.ClassLabel)4 TypeInformation (de.lmu.ifi.dbs.elki.data.type.TypeInformation)4 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)4 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)4 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)4