Search in sources :

Example 6 with VectorFieldTypeInformation

use of de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation in project elki by elki-project.

the class RepresentativeUncertainClustering method runClusteringAlgorithm.

/**
 * Run a clustering algorithm on a single instance.
 *
 * @param parent Parent result to attach to
 * @param ids Object IDs to process
 * @param store Input data
 * @param dim Dimensionality
 * @param title Title of relation
 * @return Clustering result
 */
protected Clustering<?> runClusteringAlgorithm(ResultHierarchy hierarchy, Result parent, DBIDs ids, DataStore<DoubleVector> store, int dim, String title) {
    SimpleTypeInformation<DoubleVector> t = new VectorFieldTypeInformation<>(DoubleVector.FACTORY, dim);
    Relation<DoubleVector> sample = new MaterializedRelation<>(t, ids, title, store);
    ProxyDatabase d = new ProxyDatabase(ids, sample);
    Clustering<?> clusterResult = samplesAlgorithm.run(d);
    d.getHierarchy().remove(sample);
    d.getHierarchy().remove(clusterResult);
    hierarchy.add(parent, sample);
    hierarchy.add(sample, clusterResult);
    return clusterResult;
}
Also used : VectorFieldTypeInformation(de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation) ProxyDatabase(de.lmu.ifi.dbs.elki.database.ProxyDatabase) DoubleVector(de.lmu.ifi.dbs.elki.data.DoubleVector) MaterializedRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation)

Example 7 with VectorFieldTypeInformation

use of de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation in project elki by elki-project.

the class GeneratorMain method generate.

/**
 * Main loop to generate data set.
 *
 * @return Generated data set
 */
public MultipleObjectsBundle generate() {
    // we actually need some clusters.
    if (generators.isEmpty()) {
        throw new AbortException("No clusters specified.");
    }
    // Assert that cluster dimensions agree.
    final int dim = generators.get(0).getDim();
    for (GeneratorInterface c : generators) {
        if (c.getDim() != dim) {
            throw new AbortException("Cluster dimensions do not agree.");
        }
    }
    // Prepare result bundle
    MultipleObjectsBundle bundle = new MultipleObjectsBundle();
    VectorFieldTypeInformation<DoubleVector> type = new VectorFieldTypeInformation<>(DoubleVector.FACTORY, dim);
    bundle.appendColumn(type, new ArrayList<>());
    bundle.appendColumn(TypeUtil.CLASSLABEL, new ArrayList<>());
    bundle.appendColumn(Model.TYPE, new ArrayList<Model>());
    // generate clusters
    ClassLabel[] labels = new ClassLabel[generators.size()];
    Model[] models = new Model[generators.size()];
    initLabelsAndModels(generators, labels, models, relabelClusters);
    final AssignPoint assignment;
    if (!testAgainstModel) {
        assignment = new AssignPoint();
    } else if (relabelClusters == null) {
        assignment = new TestModel();
    } else if (!relabelDistance) {
        assignment = new AssignLabelsByDensity(labels);
    } else {
        assignment = new AssignLabelsByDistance(labels);
    }
    for (int i = 0; i < labels.length; i++) {
        final GeneratorInterface curclus = generators.get(i);
        assignment.newCluster(i, curclus);
        // Only dynamic generators allow rejection / model testing:
        GeneratorInterfaceDynamic cursclus = (curclus instanceof GeneratorInterfaceDynamic) ? (GeneratorInterfaceDynamic) curclus : null;
        int kept = 0;
        while (kept < curclus.getSize()) {
            // generate the "missing" number of points
            List<double[]> newp = curclus.generate(curclus.getSize() - kept);
            for (double[] p : newp) {
                int bestc = assignment.getAssignment(i, p);
                if (bestc < 0) {
                    cursclus.incrementDiscarded();
                    continue;
                }
                bundle.appendSimple(DoubleVector.wrap(p), labels[bestc], models[bestc]);
                ++kept;
            }
        }
    }
    return bundle;
}
Also used : MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) SimpleClassLabel(de.lmu.ifi.dbs.elki.data.SimpleClassLabel) ClassLabel(de.lmu.ifi.dbs.elki.data.ClassLabel) VectorFieldTypeInformation(de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation) Model(de.lmu.ifi.dbs.elki.data.model.Model) DoubleVector(de.lmu.ifi.dbs.elki.data.DoubleVector) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Example 8 with VectorFieldTypeInformation

use of de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation in project elki by elki-project.

the class ArrayAdapterDatabaseConnection method loadData.

@Override
public MultipleObjectsBundle loadData() {
    MultipleObjectsBundle b = new MultipleObjectsBundle();
    if (startid != null) {
        b.setDBIDs(DBIDFactory.FACTORY.generateStaticDBIDRange(startid, data.length));
    }
    int mind = Integer.MAX_VALUE, maxd = 0;
    List<DoubleVector> vecs = new ArrayList<>(data.length);
    for (int i = 0; i < data.length; i++) {
        final int d = data[i].length;
        mind = d < mind ? d : mind;
        maxd = d > maxd ? d : maxd;
        vecs.add(DoubleVector.wrap(data[i]));
    }
    SimpleTypeInformation<DoubleVector> type = new VectorFieldTypeInformation<>(DoubleVector.FACTORY, mind, maxd, DoubleVector.FACTORY.getDefaultSerializer());
    b.appendColumn(type, vecs);
    if (labels != null) {
        if (labels.length != data.length) {
            throw new AbortException("Label and DBID columns must have the same size.");
        }
        b.appendColumn(TypeUtil.STRING, Arrays.asList(labels));
    }
    return invokeBundleFilters(b);
}
Also used : VectorFieldTypeInformation(de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation) MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) ArrayList(java.util.ArrayList) DoubleVector(de.lmu.ifi.dbs.elki.data.DoubleVector) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Example 9 with VectorFieldTypeInformation

use of de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation in project elki by elki-project.

the class AttributeWiseBetaNormalization method filter.

@Override
public MultipleObjectsBundle filter(MultipleObjectsBundle objects) {
    if (objects.dataLength() == 0) {
        return objects;
    }
    for (int r = 0; r < objects.metaLength(); r++) {
        SimpleTypeInformation<?> type = (SimpleTypeInformation<?>) objects.meta(r);
        final List<?> column = (List<?>) objects.getColumn(r);
        if (!TypeUtil.NUMBER_VECTOR_FIELD.isAssignableFromType(type)) {
            continue;
        }
        @SuppressWarnings("unchecked") final List<V> castColumn = (List<V>) column;
        // Get the replacement type information
        @SuppressWarnings("unchecked") final VectorFieldTypeInformation<V> castType = (VectorFieldTypeInformation<V>) type;
        factory = FilterUtil.guessFactory(castType);
        // Scan to find the best
        final int dim = castType.getDimensionality();
        dists = new ArrayList<>(dim);
        // Scratch space for testing:
        double[] test = new double[castColumn.size()];
        // We iterate over dimensions, this kind of filter needs fast random
        // access.
        Adapter adapter = new Adapter();
        for (int d = 0; d < dim; d++) {
            adapter.dim = d;
            Distribution dist = findBestFit(castColumn, adapter, d, test);
            if (LOG.isVerbose()) {
                LOG.verbose("Best fit for dimension " + d + ": " + dist.toString());
            }
            dists.add(dist);
        }
        // Beta distribution for projection
        double p = FastMath.pow(alpha, -1 / FastMath.sqrt(dim));
        BetaDistribution beta = new BetaDistribution(p, p);
        // Normalization scan
        double[] buf = new double[dim];
        for (int i = 0; i < objects.dataLength(); i++) {
            final V obj = castColumn.get(i);
            for (int d = 0; d < dim; d++) {
                // TODO: when available, use logspace for better numerical precision!
                buf[d] = beta.quantile(dists.get(d).cdf(obj.doubleValue(d)));
            }
            castColumn.set(i, factory.newNumberVector(buf));
        }
    }
    return objects;
}
Also used : SimpleTypeInformation(de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation) BetaDistribution(de.lmu.ifi.dbs.elki.math.statistics.distribution.BetaDistribution) VectorFieldTypeInformation(de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation) BetaDistribution(de.lmu.ifi.dbs.elki.math.statistics.distribution.BetaDistribution) Distribution(de.lmu.ifi.dbs.elki.math.statistics.distribution.Distribution) ArrayList(java.util.ArrayList) List(java.util.List)

Example 10 with VectorFieldTypeInformation

use of de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation in project elki by elki-project.

the class IntegerRankTieNormalization method filter.

@Override
public MultipleObjectsBundle filter(MultipleObjectsBundle objects) {
    final int len = objects.dataLength();
    MultipleObjectsBundle bundle = new MultipleObjectsBundle();
    int[] order = new int[len];
    for (int i = 0; i < len; i++) {
        order[i] = i;
    }
    Sorter comparator = new Sorter();
    for (int r = 0; r < objects.metaLength(); r++) {
        final SimpleTypeInformation<?> type = objects.meta(r);
        final List<?> column = objects.getColumn(r);
        if (!TypeUtil.NUMBER_VECTOR_FIELD.isAssignableFromType(type)) {
            bundle.appendColumn(type, column);
            continue;
        }
        @SuppressWarnings("unchecked") final List<? extends NumberVector> castColumn = (List<? extends NumberVector>) column;
        // Get the replacement type information
        final int dim = ((VectorFieldTypeInformation<?>) type).getDimensionality();
        final VectorFieldTypeInformation<IntegerVector> outType = new VectorFieldTypeInformation<>(IntegerVector.STATIC, dim);
        // Output vectors
        int[][] posvecs = new int[len][dim];
        // Sort for each dimension
        for (int d = 0; d < dim; d++) {
            // Sort
            comparator.setup(castColumn, d);
            IntegerArrayQuickSort.sort(order, comparator);
            // Transfer positions to output vectors
            for (int sta = 0; sta < order.length; ) {
                double v = castColumn.get(order[sta]).doubleValue(d);
                // Compute ties
                int end = sta + 1;
                while (end < order.length && !(v < castColumn.get(order[end]).doubleValue(d))) {
                    end++;
                }
                final int pos = (sta + end - 1);
                for (int i = sta; i < end; i++) {
                    posvecs[order[i]][d] = pos;
                }
                sta = end;
            }
        }
        // Prepare output data
        final List<IntegerVector> outColumn = new ArrayList<>(len);
        for (int i = 0; i < len; i++) {
            outColumn.add(new IntegerVector(posvecs[i]));
        }
        bundle.appendColumn(outType, outColumn);
    }
    return bundle;
}
Also used : MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) ArrayList(java.util.ArrayList) IntegerVector(de.lmu.ifi.dbs.elki.data.IntegerVector) NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) VectorFieldTypeInformation(de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation) List(java.util.List) ArrayList(java.util.ArrayList)

Aggregations

VectorFieldTypeInformation (de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation)22 DoubleVector (de.lmu.ifi.dbs.elki.data.DoubleVector)9 ArrayList (java.util.ArrayList)9 List (java.util.List)8 MaterializedRelation (de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation)7 MultipleObjectsBundle (de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle)7 NumberVector (de.lmu.ifi.dbs.elki.data.NumberVector)6 AbortException (de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)6 SimpleTypeInformation (de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation)5 ProxyDatabase (de.lmu.ifi.dbs.elki.database.ProxyDatabase)4 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)4 DBIDArrayIter (de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter)3 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)3 ClassLabel (de.lmu.ifi.dbs.elki.data.ClassLabel)2 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)2 Distribution (de.lmu.ifi.dbs.elki.math.statistics.distribution.Distribution)2 Random (java.util.Random)2 ExternalID (de.lmu.ifi.dbs.elki.data.ExternalID)1 IntegerVector (de.lmu.ifi.dbs.elki.data.IntegerVector)1 SimpleClassLabel (de.lmu.ifi.dbs.elki.data.SimpleClassLabel)1