Search in sources :

Example 26 with MultipleObjectsBundle

use of de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle in project elki by elki-project.

the class ConvertToBundleApplication method run.

@Override
public void run() {
    if (LOG.isVerbose()) {
        LOG.verbose("Loading data.");
    }
    MultipleObjectsBundle bundle = input.loadData();
    if (LOG.isVerbose()) {
        LOG.verbose("Serializing to output file: " + outfile.toString());
    }
    // TODO: make configurable?
    BundleWriter writer = new BundleWriter();
    try {
        FileOutputStream fos = new FileOutputStream(outfile);
        FileChannel channel = fos.getChannel();
        writer.writeBundleStream(bundle.asStream(), channel);
        channel.close();
        fos.close();
    } catch (IOException e) {
        LOG.exception("IO Error", e);
    }
}
Also used : FileChannel(java.nio.channels.FileChannel) FileOutputStream(java.io.FileOutputStream) MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) BundleWriter(de.lmu.ifi.dbs.elki.datasource.bundle.BundleWriter) IOException(java.io.IOException)

Example 27 with MultipleObjectsBundle

use of de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle in project elki by elki-project.

the class ArrayAdapterDatabaseConnection method loadData.

@Override
public MultipleObjectsBundle loadData() {
    MultipleObjectsBundle b = new MultipleObjectsBundle();
    if (startid != null) {
        b.setDBIDs(DBIDFactory.FACTORY.generateStaticDBIDRange(startid, data.length));
    }
    int mind = Integer.MAX_VALUE, maxd = 0;
    List<DoubleVector> vecs = new ArrayList<>(data.length);
    for (int i = 0; i < data.length; i++) {
        final int d = data[i].length;
        mind = d < mind ? d : mind;
        maxd = d > maxd ? d : maxd;
        vecs.add(DoubleVector.wrap(data[i]));
    }
    SimpleTypeInformation<DoubleVector> type = new VectorFieldTypeInformation<>(DoubleVector.FACTORY, mind, maxd, DoubleVector.FACTORY.getDefaultSerializer());
    b.appendColumn(type, vecs);
    if (labels != null) {
        if (labels.length != data.length) {
            throw new AbortException("Label and DBID columns must have the same size.");
        }
        b.appendColumn(TypeUtil.STRING, Arrays.asList(labels));
    }
    return invokeBundleFilters(b);
}
Also used : VectorFieldTypeInformation(de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation) MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) ArrayList(java.util.ArrayList) DoubleVector(de.lmu.ifi.dbs.elki.data.DoubleVector) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Example 28 with MultipleObjectsBundle

use of de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle in project elki by elki-project.

the class IntegerRankTieNormalization method filter.

@Override
public MultipleObjectsBundle filter(MultipleObjectsBundle objects) {
    final int len = objects.dataLength();
    MultipleObjectsBundle bundle = new MultipleObjectsBundle();
    int[] order = new int[len];
    for (int i = 0; i < len; i++) {
        order[i] = i;
    }
    Sorter comparator = new Sorter();
    for (int r = 0; r < objects.metaLength(); r++) {
        final SimpleTypeInformation<?> type = objects.meta(r);
        final List<?> column = objects.getColumn(r);
        if (!TypeUtil.NUMBER_VECTOR_FIELD.isAssignableFromType(type)) {
            bundle.appendColumn(type, column);
            continue;
        }
        @SuppressWarnings("unchecked") final List<? extends NumberVector> castColumn = (List<? extends NumberVector>) column;
        // Get the replacement type information
        final int dim = ((VectorFieldTypeInformation<?>) type).getDimensionality();
        final VectorFieldTypeInformation<IntegerVector> outType = new VectorFieldTypeInformation<>(IntegerVector.STATIC, dim);
        // Output vectors
        int[][] posvecs = new int[len][dim];
        // Sort for each dimension
        for (int d = 0; d < dim; d++) {
            // Sort
            comparator.setup(castColumn, d);
            IntegerArrayQuickSort.sort(order, comparator);
            // Transfer positions to output vectors
            for (int sta = 0; sta < order.length; ) {
                double v = castColumn.get(order[sta]).doubleValue(d);
                // Compute ties
                int end = sta + 1;
                while (end < order.length && !(v < castColumn.get(order[end]).doubleValue(d))) {
                    end++;
                }
                final int pos = (sta + end - 1);
                for (int i = sta; i < end; i++) {
                    posvecs[order[i]][d] = pos;
                }
                sta = end;
            }
        }
        // Prepare output data
        final List<IntegerVector> outColumn = new ArrayList<>(len);
        for (int i = 0; i < len; i++) {
            outColumn.add(new IntegerVector(posvecs[i]));
        }
        bundle.appendColumn(outType, outColumn);
    }
    return bundle;
}
Also used : MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) ArrayList(java.util.ArrayList) IntegerVector(de.lmu.ifi.dbs.elki.data.IntegerVector) NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) VectorFieldTypeInformation(de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation) List(java.util.List) ArrayList(java.util.ArrayList)

Example 29 with MultipleObjectsBundle

use of de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle in project elki by elki-project.

the class ClassicMultidimensionalScalingTransform method filter.

@Override
public MultipleObjectsBundle filter(MultipleObjectsBundle objects) {
    final int size = objects.dataLength();
    if (size == 0) {
        return objects;
    }
    MultipleObjectsBundle bundle = new MultipleObjectsBundle();
    for (int r = 0; r < objects.metaLength(); r++) {
        @SuppressWarnings("unchecked") SimpleTypeInformation<Object> type = (SimpleTypeInformation<Object>) objects.meta(r);
        @SuppressWarnings("unchecked") final List<Object> column = (List<Object>) objects.getColumn(r);
        if (!dist.getInputTypeRestriction().isAssignableFromType(type)) {
            bundle.appendColumn(type, column);
            continue;
        }
        // Get the replacement type information
        @SuppressWarnings("unchecked") final List<I> castColumn = (List<I>) column;
        bundle.appendColumn(new VectorFieldTypeInformation<>(factory, tdim), castColumn);
        StepProgress prog = LOG.isVerbose() ? new StepProgress("Classic MDS", 2) : null;
        // Compute distance matrix.
        LOG.beginStep(prog, 1, "Computing distance matrix");
        double[][] mat = computeSquaredDistanceMatrix(castColumn, dist);
        doubleCenterSymmetric(mat);
        // Find eigenvectors.
        {
            LOG.beginStep(prog, 2, "Computing singular value decomposition");
            SingularValueDecomposition svd = new SingularValueDecomposition(mat);
            double[][] u = svd.getU();
            double[] lambda = svd.getSingularValues();
            // Undo squared, unless we were given a squared distance function:
            if (!dist.isSquared()) {
                for (int i = 0; i < tdim; i++) {
                    lambda[i] = FastMath.sqrt(Math.abs(lambda[i]));
                }
            }
            double[] buf = new double[tdim];
            for (int i = 0; i < size; i++) {
                double[] row = u[i];
                for (int x = 0; x < buf.length; x++) {
                    buf[x] = lambda[x] * row[x];
                }
                column.set(i, factory.newNumberVector(buf));
            }
        }
        LOG.setCompleted(prog);
    }
    return bundle;
}
Also used : MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) SimpleTypeInformation(de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation) StepProgress(de.lmu.ifi.dbs.elki.logging.progress.StepProgress) List(java.util.List) SingularValueDecomposition(de.lmu.ifi.dbs.elki.math.linearalgebra.SingularValueDecomposition)

Example 30 with MultipleObjectsBundle

use of de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle in project elki by elki-project.

the class SortByLabelFilter method filter.

@Override
public MultipleObjectsBundle filter(final MultipleObjectsBundle objects) {
    if (LOG.isDebugging()) {
        LOG.debug("Sorting the data set");
    }
    // Prepare a reposition array for cheap resorting
    final int size = objects.dataLength();
    final int[] offsets = new int[size];
    for (int i = 0; i < size; i++) {
        offsets[i] = i;
    }
    // Sort by labels - identify a label column
    final int lblcol = FilterUtil.findLabelColumn(objects);
    if (lblcol == -1) {
        throw new AbortException("No label column found - cannot sort by label.");
    }
    IntegerArrayQuickSort.sort(offsets, new IntegerComparator() {

        @Override
        public int compare(int o1, int o2) {
            String l1 = objects.data(o1, lblcol).toString();
            String l2 = objects.data(o2, lblcol).toString();
            return l1.compareToIgnoreCase(l2);
        }
    });
    MultipleObjectsBundle bundle = new MultipleObjectsBundle();
    for (int j = 0; j < objects.metaLength(); j++) {
        // Reorder column accordingly
        List<?> in = objects.getColumn(j);
        List<Object> data = new ArrayList<>(size);
        for (int i = 0; i < size; i++) {
            data.add(in.get(offsets[i]));
        }
        bundle.appendColumn(objects.meta(j), data);
    }
    return bundle;
}
Also used : MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) ArrayList(java.util.ArrayList) IntegerComparator(de.lmu.ifi.dbs.elki.utilities.datastructures.arrays.IntegerComparator) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Aggregations

MultipleObjectsBundle (de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle)72 AbstractDataSourceTest (de.lmu.ifi.dbs.elki.datasource.AbstractDataSourceTest)37 Test (org.junit.Test)37 DoubleVector (de.lmu.ifi.dbs.elki.data.DoubleVector)27 ArrayList (java.util.ArrayList)19 AbortException (de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)13 NumberVector (de.lmu.ifi.dbs.elki.data.NumberVector)10 ELKIBuilder (de.lmu.ifi.dbs.elki.utilities.ELKIBuilder)10 VectorFieldTypeInformation (de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation)9 MeanVariance (de.lmu.ifi.dbs.elki.math.MeanVariance)8 List (java.util.List)7 LabelList (de.lmu.ifi.dbs.elki.data.LabelList)5 SimpleTypeInformation (de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation)5 InputStreamDatabaseConnection (de.lmu.ifi.dbs.elki.datasource.InputStreamDatabaseConnection)5 InputStream (java.io.InputStream)5 ClassLabel (de.lmu.ifi.dbs.elki.data.ClassLabel)4 TypeInformation (de.lmu.ifi.dbs.elki.data.type.TypeInformation)4 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)4 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)4 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)4