Search in sources :

Example 71 with MultipleObjectsBundle

use of de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle in project elki by elki-project.

the class DropNaNFilter method filter.

@Override
public MultipleObjectsBundle filter(final MultipleObjectsBundle objects) {
    if (LOG.isDebuggingFinest()) {
        LOG.debugFinest("Removing records with NaN values.");
    }
    updateMeta(objects.meta());
    MultipleObjectsBundle bundle = new MultipleObjectsBundle();
    for (int j = 0; j < objects.metaLength(); j++) {
        bundle.appendColumn(objects.meta(j), new ArrayList<>());
    }
    for (int i = 0; i < objects.dataLength(); i++) {
        final Object[] row = objects.getRow(i);
        boolean good = true;
        for (int j = BitsUtil.nextSetBit(densecols, 0); j >= 0; j = BitsUtil.nextSetBit(densecols, j + 1)) {
            NumberVector v = (NumberVector) row[j];
            if (v == null) {
                good = false;
                break;
            }
            for (int d = 0; d < v.getDimensionality(); d++) {
                if (Double.isNaN(v.doubleValue(d))) {
                    good = false;
                    break;
                }
            }
        }
        if (good) {
            bundle.appendSimple(row);
        }
    }
    return bundle;
}
Also used : NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle)

Example 72 with MultipleObjectsBundle

use of de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle in project elki by elki-project.

the class ReplaceNaNWithRandomFilter method filter.

@Override
public MultipleObjectsBundle filter(final MultipleObjectsBundle objects) {
    if (LOG.isDebuggingFinest()) {
        LOG.debugFinest("Removing records with NaN values.");
    }
    updateMeta(objects.meta());
    MultipleObjectsBundle bundle = new MultipleObjectsBundle();
    for (int j = 0; j < objects.metaLength(); j++) {
        bundle.appendColumn(objects.meta(j), new ArrayList<>());
    }
    for (int i = 0; i < objects.dataLength(); i++) {
        final Object[] row = objects.getRow(i);
        for (int j = 0; j < densecols.length; j++) {
            if (densecols[j] != null) {
                NumberVector v = (NumberVector) row[j];
                // replacement
                double[] ro = null;
                if (v != null) {
                    for (int d = 0; d < v.getDimensionality(); d++) {
                        if (Double.isNaN(v.doubleValue(d))) {
                            if (ro != null) {
                                ro = v.toArray();
                            }
                            ro[d] = dist.nextRandom();
                        }
                    }
                }
                row[j] = densecols[j].newNumberVector(ro);
            }
        }
        bundle.appendSimple(row);
    }
    return bundle;
}
Also used : NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle)

Aggregations

MultipleObjectsBundle (de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle)72 AbstractDataSourceTest (de.lmu.ifi.dbs.elki.datasource.AbstractDataSourceTest)37 Test (org.junit.Test)37 DoubleVector (de.lmu.ifi.dbs.elki.data.DoubleVector)27 ArrayList (java.util.ArrayList)19 AbortException (de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)13 NumberVector (de.lmu.ifi.dbs.elki.data.NumberVector)10 ELKIBuilder (de.lmu.ifi.dbs.elki.utilities.ELKIBuilder)10 VectorFieldTypeInformation (de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation)9 MeanVariance (de.lmu.ifi.dbs.elki.math.MeanVariance)8 List (java.util.List)7 LabelList (de.lmu.ifi.dbs.elki.data.LabelList)5 SimpleTypeInformation (de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation)5 InputStreamDatabaseConnection (de.lmu.ifi.dbs.elki.datasource.InputStreamDatabaseConnection)5 InputStream (java.io.InputStream)5 ClassLabel (de.lmu.ifi.dbs.elki.data.ClassLabel)4 TypeInformation (de.lmu.ifi.dbs.elki.data.type.TypeInformation)4 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)4 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)4 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)4