Search in sources :

Example 21 with MultipleObjectsBundle

use of de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle in project elki by elki-project.

the class StratifiedCrossValidation method nextPartitioning.

@Override
public TrainingAndTestSet nextPartitioning() {
    if (fold >= nfold) {
        return null;
    }
    final int tesize = sizes[fold], trsize = bundle.dataLength() - tesize;
    MultipleObjectsBundle training = new MultipleObjectsBundle();
    MultipleObjectsBundle test = new MultipleObjectsBundle();
    // Process column-wise.
    for (int c = 0, cs = bundle.metaLength(); c < cs; ++c) {
        ArrayList<Object> tr = new ArrayList<>(trsize), te = new ArrayList<>(tesize);
        for (int i = 0; i < bundle.dataLength(); ++i) {
            ((assignment[i] != fold) ? tr : te).add(bundle.data(i, c));
        }
        training.appendColumn(bundle.meta(c), tr);
        test.appendColumn(bundle.meta(c), te);
    }
    ++fold;
    return new TrainingAndTestSet(training, test, labels);
}
Also used : MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) IntArrayList(it.unimi.dsi.fastutil.ints.IntArrayList) ArrayList(java.util.ArrayList)

Example 22 with MultipleObjectsBundle

use of de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle in project elki by elki-project.

the class GeneratorMain method generate.

/**
 * Main loop to generate data set.
 *
 * @return Generated data set
 */
public MultipleObjectsBundle generate() {
    // we actually need some clusters.
    if (generators.isEmpty()) {
        throw new AbortException("No clusters specified.");
    }
    // Assert that cluster dimensions agree.
    final int dim = generators.get(0).getDim();
    for (GeneratorInterface c : generators) {
        if (c.getDim() != dim) {
            throw new AbortException("Cluster dimensions do not agree.");
        }
    }
    // Prepare result bundle
    MultipleObjectsBundle bundle = new MultipleObjectsBundle();
    VectorFieldTypeInformation<DoubleVector> type = new VectorFieldTypeInformation<>(DoubleVector.FACTORY, dim);
    bundle.appendColumn(type, new ArrayList<>());
    bundle.appendColumn(TypeUtil.CLASSLABEL, new ArrayList<>());
    bundle.appendColumn(Model.TYPE, new ArrayList<Model>());
    // generate clusters
    ClassLabel[] labels = new ClassLabel[generators.size()];
    Model[] models = new Model[generators.size()];
    initLabelsAndModels(generators, labels, models, relabelClusters);
    final AssignPoint assignment;
    if (!testAgainstModel) {
        assignment = new AssignPoint();
    } else if (relabelClusters == null) {
        assignment = new TestModel();
    } else if (!relabelDistance) {
        assignment = new AssignLabelsByDensity(labels);
    } else {
        assignment = new AssignLabelsByDistance(labels);
    }
    for (int i = 0; i < labels.length; i++) {
        final GeneratorInterface curclus = generators.get(i);
        assignment.newCluster(i, curclus);
        // Only dynamic generators allow rejection / model testing:
        GeneratorInterfaceDynamic cursclus = (curclus instanceof GeneratorInterfaceDynamic) ? (GeneratorInterfaceDynamic) curclus : null;
        int kept = 0;
        while (kept < curclus.getSize()) {
            // generate the "missing" number of points
            List<double[]> newp = curclus.generate(curclus.getSize() - kept);
            for (double[] p : newp) {
                int bestc = assignment.getAssignment(i, p);
                if (bestc < 0) {
                    cursclus.incrementDiscarded();
                    continue;
                }
                bundle.appendSimple(DoubleVector.wrap(p), labels[bestc], models[bestc]);
                ++kept;
            }
        }
    }
    return bundle;
}
Also used : MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) SimpleClassLabel(de.lmu.ifi.dbs.elki.data.SimpleClassLabel) ClassLabel(de.lmu.ifi.dbs.elki.data.ClassLabel) VectorFieldTypeInformation(de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation) Model(de.lmu.ifi.dbs.elki.data.model.Model) DoubleVector(de.lmu.ifi.dbs.elki.data.DoubleVector) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Example 23 with MultipleObjectsBundle

use of de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle in project elki by elki-project.

the class AbstractDatabaseConnection method invokeStreamFilters.

/**
 * Transforms the specified list of objects and their labels into a list of
 * objects and their associations.
 *
 * @param stream the objects to process
 * @return processed objects
 */
protected BundleStreamSource invokeStreamFilters(BundleStreamSource stream) {
    if (filters == null) {
        return stream;
    }
    // We dynamically switch between streaming and bundle operations.
    MultipleObjectsBundle bundle = null;
    for (ObjectFilter filter : filters) {
        if (filter instanceof StreamFilter) {
            stream = ((StreamFilter) filter).init((stream != null) ? stream : bundle.asStream());
            bundle = null;
        } else {
            bundle = filter.filter((bundle != null) ? bundle : stream.asMultipleObjectsBundle());
            stream = null;
        }
    }
    return (stream != null) ? stream : bundle.asStream();
}
Also used : MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) ObjectFilter(de.lmu.ifi.dbs.elki.datasource.filter.ObjectFilter) StreamFilter(de.lmu.ifi.dbs.elki.datasource.filter.StreamFilter)

Example 24 with MultipleObjectsBundle

use of de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle in project elki by elki-project.

the class BundleDatabaseConnection method loadData.

@Override
public MultipleObjectsBundle loadData() {
    try {
        FileInputStream fis = new FileInputStream(infile);
        FileChannel channel = fis.getChannel();
        MultipleObjectsBundle bundle = invokeStreamFilters(new BundleReader(channel)).asMultipleObjectsBundle();
        channel.close();
        fis.close();
        return bundle;
    } catch (IOException e) {
        throw new AbortException("IO error loading bundle", e);
    }
}
Also used : FileChannel(java.nio.channels.FileChannel) MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) IOException(java.io.IOException) FileInputStream(java.io.FileInputStream) BundleReader(de.lmu.ifi.dbs.elki.datasource.bundle.BundleReader) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Example 25 with MultipleObjectsBundle

use of de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle in project elki by elki-project.

the class ExternalIDJoinDatabaseConnection method loadData.

@Override
public MultipleObjectsBundle loadData() {
    List<MultipleObjectsBundle> bundles = new ArrayList<>(sources.size());
    for (DatabaseConnection dbc : sources) {
        bundles.add(dbc.loadData());
    }
    MultipleObjectsBundle first = bundles.get(0);
    Object2IntOpenHashMap<ExternalID> labelmap = new Object2IntOpenHashMap<>(first.dataLength());
    labelmap.defaultReturnValue(-1);
    // Process first bundle
    {
        // Identify a label column
        final int lblcol;
        {
            int lblc = -1;
            for (int i = 0; i < first.metaLength(); i++) {
                if (TypeUtil.EXTERNALID.isAssignableFromType(first.meta(i))) {
                    lblc = i;
                    break;
                }
            }
            // make static
            lblcol = lblc;
        }
        if (lblcol == -1) {
            throw new AbortException("No external ID column found in primary source.");
        }
        for (int i = 0; i < first.dataLength(); i++) {
            ExternalID data = (ExternalID) first.data(i, lblcol);
            if (data == null) {
                LOG.debug("Object without ID encountered.");
                continue;
            }
            int old = labelmap.put(data, i);
            if (old != -1) {
                LOG.debug("Duplicate id encountered: " + data + " in rows " + old + " and " + i);
            }
        }
    }
    // Process additional columns
    for (int c = 1; c < sources.size(); c++) {
        MultipleObjectsBundle cur = bundles.get(c);
        final int lblcol;
        {
            int lblc = -1;
            for (int i = 0; i < cur.metaLength(); i++) {
                if (TypeUtil.EXTERNALID.isAssignableFromType(cur.meta(i))) {
                    lblc = i;
                    break;
                }
            }
            // make static
            lblcol = lblc;
        }
        if (lblcol == -1) {
            StringBuilder buf = new StringBuilder();
            for (int i = 0; i < cur.metaLength(); i++) {
                if (buf.length() > 0) {
                    buf.append(',');
                }
                buf.append(cur.meta(i));
            }
            throw new AbortException("No external ID column found in source " + (c + 1) + " to join with. Got: " + buf.toString());
        }
        // Destination columns
        List<ArrayList<Object>> dcol = new ArrayList<>(cur.metaLength());
        for (int i = 0; i < cur.metaLength(); i++) {
            // Skip the label columns
            if (i == lblcol) {
                dcol.add(null);
                continue;
            }
            ArrayList<Object> newcol = new ArrayList<>(first.dataLength());
            // Pre-fill with nulls.
            for (int j = 0; j < first.dataLength(); j++) {
                newcol.add(null);
            }
            first.appendColumn(cur.meta(i), newcol);
            dcol.add(newcol);
        }
        for (int i = 0; i < cur.dataLength(); i++) {
            ExternalID data = (ExternalID) cur.data(i, lblcol);
            if (data == null) {
                LOG.warning("Object without label encountered.");
                continue;
            }
            int row = labelmap.getInt(data);
            if (row == -1) {
                LOG.debug("ID not found for join: " + data + " in row " + i);
                continue;
            }
            for (int d = 0; d < cur.metaLength(); d++) {
                if (d == lblcol) {
                    continue;
                }
                List<Object> col = dcol.get(d);
                assert (col != null);
                col.set(row, cur.data(i, d));
            }
        }
    }
    for (int i = 0; i < first.dataLength(); i++) {
        for (int d = 0; d < first.metaLength(); d++) {
            if (first.data(i, d) == null) {
                StringBuilder buf = new StringBuilder();
                for (int d2 = 0; d2 < first.metaLength(); d2++) {
                    if (buf.length() > 0) {
                        buf.append(", ");
                    }
                    if (first.data(i, d2) == null) {
                        buf.append("null");
                    } else {
                        buf.append(first.data(i, d2));
                    }
                }
                LOG.warning("null value in joined data, row " + i + " column " + d + FormatUtil.NEWLINE + "[" + buf.toString() + "]");
                break;
            }
        }
    }
    return first;
}
Also used : ExternalID(de.lmu.ifi.dbs.elki.data.ExternalID) MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) ArrayList(java.util.ArrayList) Object2IntOpenHashMap(it.unimi.dsi.fastutil.objects.Object2IntOpenHashMap) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Aggregations

MultipleObjectsBundle (de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle)72 AbstractDataSourceTest (de.lmu.ifi.dbs.elki.datasource.AbstractDataSourceTest)37 Test (org.junit.Test)37 DoubleVector (de.lmu.ifi.dbs.elki.data.DoubleVector)27 ArrayList (java.util.ArrayList)19 AbortException (de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)13 NumberVector (de.lmu.ifi.dbs.elki.data.NumberVector)10 ELKIBuilder (de.lmu.ifi.dbs.elki.utilities.ELKIBuilder)10 VectorFieldTypeInformation (de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation)9 MeanVariance (de.lmu.ifi.dbs.elki.math.MeanVariance)8 List (java.util.List)7 LabelList (de.lmu.ifi.dbs.elki.data.LabelList)5 SimpleTypeInformation (de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation)5 InputStreamDatabaseConnection (de.lmu.ifi.dbs.elki.datasource.InputStreamDatabaseConnection)5 InputStream (java.io.InputStream)5 ClassLabel (de.lmu.ifi.dbs.elki.data.ClassLabel)4 TypeInformation (de.lmu.ifi.dbs.elki.data.type.TypeInformation)4 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)4 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)4 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)4