Search in sources :

Example 16 with VectorFieldTypeInformation

use of de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation in project elki by elki-project.

the class UncertainifyFilter method filter.

@Override
public MultipleObjectsBundle filter(MultipleObjectsBundle objects) {
    if (objects.dataLength() == 0) {
        return objects;
    }
    MultipleObjectsBundle bundle = new MultipleObjectsBundle();
    for (int r = 0; r < objects.metaLength(); r++) {
        SimpleTypeInformation<?> type = objects.meta(r);
        @SuppressWarnings("unchecked") final List<Object> column = (List<Object>) objects.getColumn(r);
        if (!TypeUtil.NUMBER_VECTOR_FIELD.isAssignableFromType(type)) {
            bundle.appendColumn(type, column);
            continue;
        }
        // Get the replacement type information
        @SuppressWarnings("unchecked") final VectorFieldTypeInformation<NumberVector> castType = (VectorFieldTypeInformation<NumberVector>) type;
        final int dim = castType.getDimensionality();
        if (keep) {
            bundle.appendColumn(type, column);
        }
        // Uncertain objects produced
        final List<UO> uos = new ArrayList<>(column.size());
        // Normalization scan
        FiniteProgress nprog = LOG.isVerbose() ? new FiniteProgress("Derive uncertain objects", objects.dataLength(), LOG) : null;
        for (int i = 0; i < objects.dataLength(); i++) {
            final NumberVector obj = (NumberVector) column.get(i);
            final UO normalizedObj = generator.newFeatureVector(rand, obj, ArrayLikeUtil.NUMBERVECTORADAPTER);
            uos.add(normalizedObj);
            LOG.incrementProcessed(nprog);
        }
        LOG.ensureCompleted(nprog);
        // Add column with uncertain objects
        bundle.appendColumn(new VectorFieldTypeInformation<UO>(generator.getFactory(), dim), uos);
    }
    return bundle;
}
Also used : FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) ArrayList(java.util.ArrayList) NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) VectorFieldTypeInformation(de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation) UncertainObject(de.lmu.ifi.dbs.elki.data.uncertain.UncertainObject) ArrayList(java.util.ArrayList) List(java.util.List)

Example 17 with VectorFieldTypeInformation

use of de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation in project elki by elki-project.

the class AbstractSupervisedProjectionVectorFilter method filter.

@Override
public MultipleObjectsBundle filter(MultipleObjectsBundle objects) {
    final int dataLength = objects.dataLength();
    if (dataLength == 0) {
        return objects;
    }
    List<? extends ClassLabel> classcolumn = null;
    // First of all, identify a class label column.
    for (int r = 0; r < objects.metaLength(); r++) {
        SimpleTypeInformation<?> type = objects.meta(r);
        List<?> column = objects.getColumn(r);
        if (TypeUtil.CLASSLABEL.isAssignableFromType(type)) {
            @SuppressWarnings("unchecked") final List<? extends ClassLabel> castcolumn = (List<? extends ClassLabel>) column;
            classcolumn = castcolumn;
            break;
        }
    }
    if (classcolumn == null) {
        getLogger().warning("No class label column found (try " + ClassLabelFilter.class.getSimpleName() + ") -- cannot run " + this.getClass().getSimpleName());
        return objects;
    }
    boolean somesuccess = false;
    MultipleObjectsBundle bundle = new MultipleObjectsBundle();
    // Secondly, look for columns to train the projection on.
    for (int r = 0; r < objects.metaLength(); r++) {
        SimpleTypeInformation<?> type = objects.meta(r);
        List<?> column = objects.getColumn(r);
        if (!TypeUtil.NUMBER_VECTOR_FIELD.isAssignableFromType(type)) {
            bundle.appendColumn(type, column);
            continue;
        }
        @SuppressWarnings("unchecked") List<V> vectorcolumn = (List<V>) column;
        final VectorFieldTypeInformation<?> vtype = (VectorFieldTypeInformation<?>) type;
        @SuppressWarnings("unchecked") NumberVector.Factory<V> factory = (NumberVector.Factory<V>) vtype.getFactory();
        int dim = vtype.getDimensionality();
        if (tdim > dim) {
            if (getLogger().isVerbose()) {
                getLogger().verbose("Setting projection dimension to original dimension: projection dimension: " + tdim + " larger than original dimension: " + dim);
            }
            tdim = dim;
        }
        try {
            double[][] proj = computeProjectionMatrix(vectorcolumn, classcolumn, dim);
            for (int i = 0; i < dataLength; i++) {
                double[] pv = times(proj, vectorcolumn.get(i).toArray());
                vectorcolumn.set(i, factory.newNumberVector(pv));
            }
            bundle.appendColumn(convertedType(type, factory), column);
            somesuccess = true;
        } catch (Exception e) {
            getLogger().error("Projection failed -- continuing with unprojected data!", e);
            bundle.appendColumn(type, column);
            continue;
        }
    }
    if (!somesuccess) {
        getLogger().warning("No vector field of fixed dimensionality found.");
        return objects;
    }
    return bundle;
}
Also used : MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) ClassLabel(de.lmu.ifi.dbs.elki.data.ClassLabel) VectorFieldTypeInformation(de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation) NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) List(java.util.List) IntList(it.unimi.dsi.fastutil.ints.IntList) IntArrayList(it.unimi.dsi.fastutil.ints.IntArrayList)

Example 18 with VectorFieldTypeInformation

use of de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation in project elki by elki-project.

the class RandomDoubleVectorDatabaseConnection method loadData.

@Override
public MultipleObjectsBundle loadData() {
    VectorFieldTypeInformation<DoubleVector> type = new VectorFieldTypeInformation<>(DoubleVector.FACTORY, dim);
    List<DoubleVector> vectors = new ArrayList<>(size);
    // Setup random generator
    final Random rand = rnd.getSingleThreadedRandom();
    // Produce random vectors
    for (int i = 0; i < size; i++) {
        vectors.add(VectorUtil.randomVector(DoubleVector.FACTORY, dim, rand));
    }
    return MultipleObjectsBundle.makeSimple(type, vectors);
}
Also used : Random(java.util.Random) VectorFieldTypeInformation(de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation) ArrayList(java.util.ArrayList) DoubleVector(de.lmu.ifi.dbs.elki.data.DoubleVector)

Example 19 with VectorFieldTypeInformation

use of de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation in project elki by elki-project.

the class SplitNumberVectorFilter method filter.

@Override
public MultipleObjectsBundle filter(MultipleObjectsBundle objects) {
    if (objects.dataLength() == 0) {
        return objects;
    }
    MultipleObjectsBundle bundle = new MultipleObjectsBundle();
    for (int r = 0; r < objects.metaLength(); r++) {
        @SuppressWarnings("unchecked") SimpleTypeInformation<Object> type = (SimpleTypeInformation<Object>) objects.meta(r);
        @SuppressWarnings("unchecked") final List<Object> column = (List<Object>) objects.getColumn(r);
        if (!getInputTypeRestriction().isAssignableFromType(type)) {
            bundle.appendColumn(type, column);
            continue;
        }
        // Should be a vector type after above test.
        @SuppressWarnings("unchecked") final VectorFieldTypeInformation<V> vtype = VectorFieldTypeInformation.class.cast(type);
        NumberVector.Factory<V> factory = FilterUtil.guessFactory(vtype);
        // Get the replacement type informations
        VectorFieldTypeInformation<V> type1 = new VectorFieldTypeInformation<>(factory, dims.length);
        VectorFieldTypeInformation<V> type2 = new VectorFieldTypeInformation<>(factory, vtype.getDimensionality() - dims.length);
        final List<V> col1 = new ArrayList<>(column.size());
        final List<V> col2 = new ArrayList<>(column.size());
        bundle.appendColumn(type1, col1);
        bundle.appendColumn(type2, col2);
        // Build other dimensions array.
        int[] odims = new int[vtype.getDimensionality() - dims.length];
        {
            int i = 0;
            for (int d = 0; d < vtype.getDimensionality(); d++) {
                boolean found = false;
                for (int j = 0; j < dims.length; j++) {
                    if (dims[j] == d) {
                        found = true;
                        break;
                    }
                }
                if (!found) {
                    if (i >= odims.length) {
                        throw new AbortException("Dimensionalities not proper!");
                    }
                    odims[i] = d;
                    i++;
                }
            }
        }
        // Splitting scan.
        for (int i = 0; i < objects.dataLength(); i++) {
            @SuppressWarnings("unchecked") final V obj = (V) column.get(i);
            double[] part1 = new double[dims.length];
            double[] part2 = new double[obj.getDimensionality() - dims.length];
            for (int d = 0; d < dims.length; d++) {
                part1[d] = obj.doubleValue(dims[d]);
            }
            for (int d = 0; d < odims.length; d++) {
                part2[d] = obj.doubleValue(odims[d]);
            }
            col1.add(factory.newNumberVector(part1));
            col2.add(factory.newNumberVector(part2));
        }
    }
    return bundle;
}
Also used : MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) ArrayList(java.util.ArrayList) SimpleTypeInformation(de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation) NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) VectorFieldTypeInformation(de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation) ArrayList(java.util.ArrayList) List(java.util.List) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Example 20 with VectorFieldTypeInformation

use of de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation in project elki by elki-project.

the class AttributeWiseCDFNormalization method filter.

@Override
public MultipleObjectsBundle filter(MultipleObjectsBundle objects) {
    if (objects.dataLength() == 0) {
        return objects;
    }
    for (int r = 0; r < objects.metaLength(); r++) {
        SimpleTypeInformation<?> type = (SimpleTypeInformation<?>) objects.meta(r);
        final List<?> column = (List<?>) objects.getColumn(r);
        if (!TypeUtil.NUMBER_VECTOR_FIELD.isAssignableFromType(type)) {
            continue;
        }
        @SuppressWarnings("unchecked") final List<V> castColumn = (List<V>) column;
        // Get the replacement type information
        @SuppressWarnings("unchecked") final VectorFieldTypeInformation<V> castType = (VectorFieldTypeInformation<V>) type;
        factory = FilterUtil.guessFactory(castType);
        // Scan to find the best
        final int dim = castType.getDimensionality();
        dists = new ArrayList<>(dim);
        // Scratch space for testing:
        double[] test = estimators.size() > 1 ? new double[castColumn.size()] : null;
        // We iterate over dimensions, this kind of filter needs fast random
        // access.
        Adapter adapter = new Adapter();
        for (int d = 0; d < dim; d++) {
            adapter.dim = d;
            Distribution dist = findBestFit(castColumn, adapter, d, test);
            // We want them to remain 0, instead of - usually - becoming constant .5
            if (dist instanceof UniformDistribution) {
                dist = constantZero(castColumn, adapter) ? new UniformDistribution(0., 1.) : dist;
            }
            dists.add(dist);
        }
        // Normalization scan
        double[] buf = new double[dim];
        for (int i = 0; i < objects.dataLength(); i++) {
            final V obj = castColumn.get(i);
            for (int d = 0; d < dim; d++) {
                buf[d] = dists.get(d).cdf(obj.doubleValue(d));
            }
            castColumn.set(i, factory.newNumberVector(buf));
        }
    }
    return objects;
}
Also used : UniformDistribution(de.lmu.ifi.dbs.elki.math.statistics.distribution.UniformDistribution) SimpleTypeInformation(de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation) NumberArrayAdapter(de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.NumberArrayAdapter) VectorFieldTypeInformation(de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation) Distribution(de.lmu.ifi.dbs.elki.math.statistics.distribution.Distribution) UniformDistribution(de.lmu.ifi.dbs.elki.math.statistics.distribution.UniformDistribution) ArrayList(java.util.ArrayList) List(java.util.List)

Aggregations

VectorFieldTypeInformation (de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation)22 DoubleVector (de.lmu.ifi.dbs.elki.data.DoubleVector)9 ArrayList (java.util.ArrayList)9 List (java.util.List)8 MaterializedRelation (de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation)7 MultipleObjectsBundle (de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle)7 NumberVector (de.lmu.ifi.dbs.elki.data.NumberVector)6 AbortException (de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)6 SimpleTypeInformation (de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation)5 ProxyDatabase (de.lmu.ifi.dbs.elki.database.ProxyDatabase)4 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)4 DBIDArrayIter (de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter)3 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)3 ClassLabel (de.lmu.ifi.dbs.elki.data.ClassLabel)2 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)2 Distribution (de.lmu.ifi.dbs.elki.math.statistics.distribution.Distribution)2 Random (java.util.Random)2 ExternalID (de.lmu.ifi.dbs.elki.data.ExternalID)1 IntegerVector (de.lmu.ifi.dbs.elki.data.IntegerVector)1 SimpleClassLabel (de.lmu.ifi.dbs.elki.data.SimpleClassLabel)1