Search in sources :

Example 96 with AbortException

use of de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException in project elki by elki-project.

the class PresortedBlindJoinDatabaseConnection method loadData.

@Override
public MultipleObjectsBundle loadData() {
    List<MultipleObjectsBundle> bundles = new ArrayList<>(sources.size());
    for (DatabaseConnection dbc : sources) {
        bundles.add(dbc.loadData());
    }
    MultipleObjectsBundle first = bundles.get(0);
    // Process additional columns
    for (int c = 1; c < sources.size(); c++) {
        MultipleObjectsBundle cur = bundles.get(c);
        if (cur.dataLength() != first.dataLength()) {
            throw new AbortException("Data set sizes do not agree - cannot join!");
        }
        for (int i = 0; i < cur.metaLength(); i++) {
            first.appendColumn(cur.meta(i), cur.getColumn(i));
        }
    }
    return invokeBundleFilters(first);
}
Also used : MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) ArrayList(java.util.ArrayList) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Example 97 with AbortException

use of de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException in project elki by elki-project.

the class SplitNumberVectorFilter method filter.

@Override
public MultipleObjectsBundle filter(MultipleObjectsBundle objects) {
    if (objects.dataLength() == 0) {
        return objects;
    }
    MultipleObjectsBundle bundle = new MultipleObjectsBundle();
    for (int r = 0; r < objects.metaLength(); r++) {
        @SuppressWarnings("unchecked") SimpleTypeInformation<Object> type = (SimpleTypeInformation<Object>) objects.meta(r);
        @SuppressWarnings("unchecked") final List<Object> column = (List<Object>) objects.getColumn(r);
        if (!getInputTypeRestriction().isAssignableFromType(type)) {
            bundle.appendColumn(type, column);
            continue;
        }
        // Should be a vector type after above test.
        @SuppressWarnings("unchecked") final VectorFieldTypeInformation<V> vtype = VectorFieldTypeInformation.class.cast(type);
        NumberVector.Factory<V> factory = FilterUtil.guessFactory(vtype);
        // Get the replacement type informations
        VectorFieldTypeInformation<V> type1 = new VectorFieldTypeInformation<>(factory, dims.length);
        VectorFieldTypeInformation<V> type2 = new VectorFieldTypeInformation<>(factory, vtype.getDimensionality() - dims.length);
        final List<V> col1 = new ArrayList<>(column.size());
        final List<V> col2 = new ArrayList<>(column.size());
        bundle.appendColumn(type1, col1);
        bundle.appendColumn(type2, col2);
        // Build other dimensions array.
        int[] odims = new int[vtype.getDimensionality() - dims.length];
        {
            int i = 0;
            for (int d = 0; d < vtype.getDimensionality(); d++) {
                boolean found = false;
                for (int j = 0; j < dims.length; j++) {
                    if (dims[j] == d) {
                        found = true;
                        break;
                    }
                }
                if (!found) {
                    if (i >= odims.length) {
                        throw new AbortException("Dimensionalities not proper!");
                    }
                    odims[i] = d;
                    i++;
                }
            }
        }
        // Splitting scan.
        for (int i = 0; i < objects.dataLength(); i++) {
            @SuppressWarnings("unchecked") final V obj = (V) column.get(i);
            double[] part1 = new double[dims.length];
            double[] part2 = new double[obj.getDimensionality() - dims.length];
            for (int d = 0; d < dims.length; d++) {
                part1[d] = obj.doubleValue(dims[d]);
            }
            for (int d = 0; d < odims.length; d++) {
                part2[d] = obj.doubleValue(odims[d]);
            }
            col1.add(factory.newNumberVector(part1));
            col2.add(factory.newNumberVector(part2));
        }
    }
    return bundle;
}
Also used : MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) ArrayList(java.util.ArrayList) SimpleTypeInformation(de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation) NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) VectorFieldTypeInformation(de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation) ArrayList(java.util.ArrayList) List(java.util.List) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Example 98 with AbortException

use of de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException in project elki by elki-project.

the class ClassLabelFilter method filter.

@Override
public MultipleObjectsBundle filter(MultipleObjectsBundle objects) {
    MultipleObjectsBundle bundle = new MultipleObjectsBundle();
    // Find a labellist column
    boolean done = false;
    boolean keeplabelcol = false;
    for (int i = 0; i < objects.metaLength(); i++) {
        SimpleTypeInformation<?> meta = objects.meta(i);
        // Skip non-labellist columns - or if we already had a labellist
        if (done || !LabelList.class.equals(meta.getRestrictionClass())) {
            bundle.appendColumn(meta, objects.getColumn(i));
            continue;
        }
        done = true;
        // We split the label column into two parts
        List<ClassLabel> clscol = new ArrayList<>(objects.dataLength());
        List<LabelList> lblcol = new ArrayList<>(objects.dataLength());
        ArrayList<String> lbuf = new ArrayList<>();
        // Split the column
        for (Object obj : objects.getColumn(i)) {
            if (obj != null) {
                LabelList ll = (LabelList) obj;
                int off = (classLabelIndex >= 0) ? classLabelIndex : (ll.size() - classLabelIndex);
                try {
                    ClassLabel lbl = classLabelFactory.makeFromString(ll.get(off));
                    clscol.add(lbl);
                } catch (Exception e) {
                    throw new AbortException("Cannot initialize class labels: " + e.getMessage(), e);
                }
                lbuf.clear();
                for (int j = 0; j < ll.size(); j++) {
                    if (j == off) {
                        continue;
                    }
                    lbuf.add(ll.get(j));
                }
                lblcol.add(LabelList.make(lbuf));
                if (!lbuf.isEmpty()) {
                    keeplabelcol = true;
                }
            } else {
                clscol.add(null);
                lblcol.add(null);
            }
        }
        bundle.appendColumn(classLabelFactory.getTypeInformation(), clscol);
        // Only add the label column when it's not empty.
        if (keeplabelcol) {
            bundle.appendColumn(meta, lblcol);
        }
    }
    return bundle;
}
Also used : LabelList(de.lmu.ifi.dbs.elki.data.LabelList) MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) ArrayList(java.util.ArrayList) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException) SimpleClassLabel(de.lmu.ifi.dbs.elki.data.SimpleClassLabel) ClassLabel(de.lmu.ifi.dbs.elki.data.ClassLabel) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Example 99 with AbortException

use of de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException in project elki by elki-project.

the class VectorDimensionalityFilter method updateMeta.

/**
 * Update metadata.
 */
private void updateMeta() {
    meta = new BundleMeta();
    BundleMeta origmeta = source.getMeta();
    for (int i = 0; i < origmeta.size(); i++) {
        SimpleTypeInformation<?> type = origmeta.get(i);
        if (column < 0) {
            // Test whether this type matches
            if (TypeUtil.NUMBER_VECTOR_VARIABLE_LENGTH.isAssignableFromType(type)) {
                if (type instanceof VectorFieldTypeInformation) {
                    @SuppressWarnings("unchecked") final VectorFieldTypeInformation<V> castType = (VectorFieldTypeInformation<V>) type;
                    if (dim != -1 && castType.mindim() > dim) {
                        throw new AbortException("Would filter all vectors: minimum dimensionality " + castType.mindim() + " > desired dimensionality " + dim);
                    }
                    if (dim != -1 && castType.maxdim() < dim) {
                        throw new AbortException("Would filter all vectors: maximum dimensionality " + castType.maxdim() + " < desired dimensionality " + dim);
                    }
                    if (dim == -1) {
                        dim = castType.mindim();
                    }
                    if (castType.mindim() == castType.maxdim()) {
                        meta.add(castType);
                        column = i;
                        continue;
                    }
                }
                @SuppressWarnings("unchecked") final VectorTypeInformation<V> castType = (VectorTypeInformation<V>) type;
                if (dim != -1) {
                    meta.add(new VectorFieldTypeInformation<>(FilterUtil.guessFactory(castType), dim, dim, castType.getSerializer()));
                } else {
                    LOG.warning("No dimensionality yet for column " + i);
                    meta.add(castType);
                }
                column = i;
                continue;
            }
        }
        meta.add(type);
    }
}
Also used : BundleMeta(de.lmu.ifi.dbs.elki.datasource.bundle.BundleMeta) VectorFieldTypeInformation(de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation) VectorTypeInformation(de.lmu.ifi.dbs.elki.data.type.VectorTypeInformation) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Aggregations

AbortException (de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)99 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)25 IOException (java.io.IOException)24 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)22 ArrayList (java.util.ArrayList)16 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)13 MultipleObjectsBundle (de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle)13 NumberVector (de.lmu.ifi.dbs.elki.data.NumberVector)10 DBIDArrayIter (de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter)9 DoubleRelation (de.lmu.ifi.dbs.elki.database.relation.DoubleRelation)9 Clustering (de.lmu.ifi.dbs.elki.data.Clustering)8 Model (de.lmu.ifi.dbs.elki.data.model.Model)8 VectorFieldTypeInformation (de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation)8 Database (de.lmu.ifi.dbs.elki.database.Database)8 WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)8 DBIDRange (de.lmu.ifi.dbs.elki.database.ids.DBIDRange)8 OutlierResult (de.lmu.ifi.dbs.elki.result.outlier.OutlierResult)8 MaterializedDoubleRelation (de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)6 ClassLabel (de.lmu.ifi.dbs.elki.data.ClassLabel)5 DoubleVector (de.lmu.ifi.dbs.elki.data.DoubleVector)5