Search in sources :

Example 81 with NumberVector

use of de.lmu.ifi.dbs.elki.data.NumberVector in project elki by elki-project.

the class RandomSampleReferencePoints method getReferencePoints.

@Override
public Collection<? extends NumberVector> getReferencePoints(Relation<? extends NumberVector> db) {
    if (samplesize >= db.size()) {
        LoggingUtil.warning("Requested sample size is larger than database size!");
        return new RelationUtil.CollectionFromRelation<>(db);
    }
    DBIDs sample = DBIDUtil.randomSample(db.getDBIDs(), samplesize, rnd);
    ArrayList<NumberVector> result = new ArrayList<>(sample.size());
    for (DBIDIter it = sample.iter(); it.valid(); it.advance()) {
        result.add(db.get(it));
    }
    return result;
}
Also used : NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) ArrayList(java.util.ArrayList) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 82 with NumberVector

use of de.lmu.ifi.dbs.elki.data.NumberVector in project elki by elki-project.

the class StarBasedReferencePoints method getReferencePoints.

@Override
public Collection<? extends NumberVector> getReferencePoints(Relation<? extends NumberVector> db) {
    int dim = RelationUtil.dimensionality(db);
    // Compute minimum, maximum and centroid
    double[] centroid = new double[dim];
    double[] min = new double[dim];
    double[] max = new double[dim];
    for (int d = 0; d < dim; d++) {
        centroid[d] = 0;
        min[d] = Double.MAX_VALUE;
        max[d] = -Double.MAX_VALUE;
    }
    for (DBIDIter iditer = db.iterDBIDs(); iditer.valid(); iditer.advance()) {
        NumberVector obj = db.get(iditer);
        for (int d = 0; d < dim; d++) {
            double val = obj.doubleValue(d);
            centroid[d] += val;
            min[d] = Math.min(min[d], val);
            max[d] = Math.max(max[d], val);
        }
    }
    // finish centroid, scale min, max
    for (int d = 0; d < dim; d++) {
        centroid[d] = centroid[d] / db.size();
        min[d] = (min[d] - centroid[d]) * scale + centroid[d];
        max[d] = (max[d] - centroid[d]) * scale + centroid[d];
    }
    ArrayList<DoubleVector> result = new ArrayList<>(2 * dim + 1);
    if (!nocenter) {
        result.add(DoubleVector.wrap(centroid));
    }
    // Plus axis end points through centroid
    for (int i = 0; i < dim; i++) {
        double[] vec = centroid.clone();
        vec[i] = min[i];
        result.add(DoubleVector.wrap(vec));
        vec = centroid.clone();
        vec[i] = max[i];
        result.add(DoubleVector.wrap(vec));
    }
    return result;
}
Also used : NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) ArrayList(java.util.ArrayList) DoubleVector(de.lmu.ifi.dbs.elki.data.DoubleVector) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 83 with NumberVector

use of de.lmu.ifi.dbs.elki.data.NumberVector in project elki by elki-project.

the class DropNaNFilter method filter.

@Override
public MultipleObjectsBundle filter(final MultipleObjectsBundle objects) {
    if (LOG.isDebuggingFinest()) {
        LOG.debugFinest("Removing records with NaN values.");
    }
    updateMeta(objects.meta());
    MultipleObjectsBundle bundle = new MultipleObjectsBundle();
    for (int j = 0; j < objects.metaLength(); j++) {
        bundle.appendColumn(objects.meta(j), new ArrayList<>());
    }
    for (int i = 0; i < objects.dataLength(); i++) {
        final Object[] row = objects.getRow(i);
        boolean good = true;
        for (int j = BitsUtil.nextSetBit(densecols, 0); j >= 0; j = BitsUtil.nextSetBit(densecols, j + 1)) {
            NumberVector v = (NumberVector) row[j];
            if (v == null) {
                good = false;
                break;
            }
            for (int d = 0; d < v.getDimensionality(); d++) {
                if (Double.isNaN(v.doubleValue(d))) {
                    good = false;
                    break;
                }
            }
        }
        if (good) {
            bundle.appendSimple(row);
        }
    }
    return bundle;
}
Also used : NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle)

Example 84 with NumberVector

use of de.lmu.ifi.dbs.elki.data.NumberVector in project elki by elki-project.

the class ReplaceNaNWithRandomFilter method filter.

@Override
public MultipleObjectsBundle filter(final MultipleObjectsBundle objects) {
    if (LOG.isDebuggingFinest()) {
        LOG.debugFinest("Removing records with NaN values.");
    }
    updateMeta(objects.meta());
    MultipleObjectsBundle bundle = new MultipleObjectsBundle();
    for (int j = 0; j < objects.metaLength(); j++) {
        bundle.appendColumn(objects.meta(j), new ArrayList<>());
    }
    for (int i = 0; i < objects.dataLength(); i++) {
        final Object[] row = objects.getRow(i);
        for (int j = 0; j < densecols.length; j++) {
            if (densecols[j] != null) {
                NumberVector v = (NumberVector) row[j];
                // replacement
                double[] ro = null;
                if (v != null) {
                    for (int d = 0; d < v.getDimensionality(); d++) {
                        if (Double.isNaN(v.doubleValue(d))) {
                            if (ro != null) {
                                ro = v.toArray();
                            }
                            ro[d] = dist.nextRandom();
                        }
                    }
                }
                row[j] = densecols[j].newNumberVector(ro);
            }
        }
        bundle.appendSimple(row);
    }
    return bundle;
}
Also used : NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle)

Example 85 with NumberVector

use of de.lmu.ifi.dbs.elki.data.NumberVector in project elki by elki-project.

the class ReplaceNaNWithRandomFilter method nextEvent.

@Override
public Event nextEvent() {
    while (true) {
        Event ev = source.nextEvent();
        switch(ev) {
            case END_OF_STREAM:
                return ev;
            case META_CHANGED:
                updateMeta(source.getMeta());
                return ev;
            case NEXT_OBJECT:
                if (densecols == null) {
                    updateMeta(source.getMeta());
                }
                rows.clear();
                for (int j = 0; j < densecols.length; j++) {
                    Object o = source.data(j);
                    if (densecols[j] != null) {
                        NumberVector v = (NumberVector) o;
                        if (v == null) {
                            continue;
                        }
                        // replacement
                        double[] ro = null;
                        for (int i = 0; i < v.getDimensionality(); i++) {
                            if (Double.isNaN(v.doubleValue(i))) {
                                ro = ro != null ? ro : v.toArray();
                                ro[i] = dist.nextRandom();
                            }
                        }
                        // If there was no NaN, ro will still be null.
                        if (ro != null) {
                            o = densecols[j].newNumberVector(ro);
                        }
                    }
                    rows.add(o);
                }
                return ev;
        }
    }
}
Also used : NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector)

Aggregations

NumberVector (de.lmu.ifi.dbs.elki.data.NumberVector)85 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)40 ArrayList (java.util.ArrayList)16 LongStatistic (de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic)9 DoubleVector (de.lmu.ifi.dbs.elki.data.DoubleVector)8 MultipleObjectsBundle (de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle)8 AbortException (de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)8 Database (de.lmu.ifi.dbs.elki.database.Database)7 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)7 DoubleStatistic (de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic)7 Random (java.util.Random)7 Test (org.junit.Test)7 VectorFieldTypeInformation (de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation)5 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)5 MeanVariance (de.lmu.ifi.dbs.elki.math.MeanVariance)5 EvaluationResult (de.lmu.ifi.dbs.elki.result.EvaluationResult)5 MeasurementGroup (de.lmu.ifi.dbs.elki.result.EvaluationResult.MeasurementGroup)5 List (java.util.List)5 SparseNumberVector (de.lmu.ifi.dbs.elki.data.SparseNumberVector)4 RandomProjectionFamily (de.lmu.ifi.dbs.elki.data.projection.random.RandomProjectionFamily)4