Search in sources :

Example 36 with AbortException

use of de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException in project elki by elki-project.

the class BundleDatabaseConnection method loadData.

@Override
public MultipleObjectsBundle loadData() {
    try {
        FileInputStream fis = new FileInputStream(infile);
        FileChannel channel = fis.getChannel();
        MultipleObjectsBundle bundle = invokeStreamFilters(new BundleReader(channel)).asMultipleObjectsBundle();
        channel.close();
        fis.close();
        return bundle;
    } catch (IOException e) {
        throw new AbortException("IO error loading bundle", e);
    }
}
Also used : FileChannel(java.nio.channels.FileChannel) MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) IOException(java.io.IOException) FileInputStream(java.io.FileInputStream) BundleReader(de.lmu.ifi.dbs.elki.datasource.bundle.BundleReader) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Example 37 with AbortException

use of de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException in project elki by elki-project.

the class ExternalIDJoinDatabaseConnection method loadData.

@Override
public MultipleObjectsBundle loadData() {
    List<MultipleObjectsBundle> bundles = new ArrayList<>(sources.size());
    for (DatabaseConnection dbc : sources) {
        bundles.add(dbc.loadData());
    }
    MultipleObjectsBundle first = bundles.get(0);
    Object2IntOpenHashMap<ExternalID> labelmap = new Object2IntOpenHashMap<>(first.dataLength());
    labelmap.defaultReturnValue(-1);
    // Process first bundle
    {
        // Identify a label column
        final int lblcol;
        {
            int lblc = -1;
            for (int i = 0; i < first.metaLength(); i++) {
                if (TypeUtil.EXTERNALID.isAssignableFromType(first.meta(i))) {
                    lblc = i;
                    break;
                }
            }
            // make static
            lblcol = lblc;
        }
        if (lblcol == -1) {
            throw new AbortException("No external ID column found in primary source.");
        }
        for (int i = 0; i < first.dataLength(); i++) {
            ExternalID data = (ExternalID) first.data(i, lblcol);
            if (data == null) {
                LOG.debug("Object without ID encountered.");
                continue;
            }
            int old = labelmap.put(data, i);
            if (old != -1) {
                LOG.debug("Duplicate id encountered: " + data + " in rows " + old + " and " + i);
            }
        }
    }
    // Process additional columns
    for (int c = 1; c < sources.size(); c++) {
        MultipleObjectsBundle cur = bundles.get(c);
        final int lblcol;
        {
            int lblc = -1;
            for (int i = 0; i < cur.metaLength(); i++) {
                if (TypeUtil.EXTERNALID.isAssignableFromType(cur.meta(i))) {
                    lblc = i;
                    break;
                }
            }
            // make static
            lblcol = lblc;
        }
        if (lblcol == -1) {
            StringBuilder buf = new StringBuilder();
            for (int i = 0; i < cur.metaLength(); i++) {
                if (buf.length() > 0) {
                    buf.append(',');
                }
                buf.append(cur.meta(i));
            }
            throw new AbortException("No external ID column found in source " + (c + 1) + " to join with. Got: " + buf.toString());
        }
        // Destination columns
        List<ArrayList<Object>> dcol = new ArrayList<>(cur.metaLength());
        for (int i = 0; i < cur.metaLength(); i++) {
            // Skip the label columns
            if (i == lblcol) {
                dcol.add(null);
                continue;
            }
            ArrayList<Object> newcol = new ArrayList<>(first.dataLength());
            // Pre-fill with nulls.
            for (int j = 0; j < first.dataLength(); j++) {
                newcol.add(null);
            }
            first.appendColumn(cur.meta(i), newcol);
            dcol.add(newcol);
        }
        for (int i = 0; i < cur.dataLength(); i++) {
            ExternalID data = (ExternalID) cur.data(i, lblcol);
            if (data == null) {
                LOG.warning("Object without label encountered.");
                continue;
            }
            int row = labelmap.getInt(data);
            if (row == -1) {
                LOG.debug("ID not found for join: " + data + " in row " + i);
                continue;
            }
            for (int d = 0; d < cur.metaLength(); d++) {
                if (d == lblcol) {
                    continue;
                }
                List<Object> col = dcol.get(d);
                assert (col != null);
                col.set(row, cur.data(i, d));
            }
        }
    }
    for (int i = 0; i < first.dataLength(); i++) {
        for (int d = 0; d < first.metaLength(); d++) {
            if (first.data(i, d) == null) {
                StringBuilder buf = new StringBuilder();
                for (int d2 = 0; d2 < first.metaLength(); d2++) {
                    if (buf.length() > 0) {
                        buf.append(", ");
                    }
                    if (first.data(i, d2) == null) {
                        buf.append("null");
                    } else {
                        buf.append(first.data(i, d2));
                    }
                }
                LOG.warning("null value in joined data, row " + i + " column " + d + FormatUtil.NEWLINE + "[" + buf.toString() + "]");
                break;
            }
        }
    }
    return first;
}
Also used : ExternalID(de.lmu.ifi.dbs.elki.data.ExternalID) MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) ArrayList(java.util.ArrayList) Object2IntOpenHashMap(it.unimi.dsi.fastutil.objects.Object2IntOpenHashMap) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Example 38 with AbortException

use of de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException in project elki by elki-project.

the class ArrayAdapterDatabaseConnection method loadData.

@Override
public MultipleObjectsBundle loadData() {
    MultipleObjectsBundle b = new MultipleObjectsBundle();
    if (startid != null) {
        b.setDBIDs(DBIDFactory.FACTORY.generateStaticDBIDRange(startid, data.length));
    }
    int mind = Integer.MAX_VALUE, maxd = 0;
    List<DoubleVector> vecs = new ArrayList<>(data.length);
    for (int i = 0; i < data.length; i++) {
        final int d = data[i].length;
        mind = d < mind ? d : mind;
        maxd = d > maxd ? d : maxd;
        vecs.add(DoubleVector.wrap(data[i]));
    }
    SimpleTypeInformation<DoubleVector> type = new VectorFieldTypeInformation<>(DoubleVector.FACTORY, mind, maxd, DoubleVector.FACTORY.getDefaultSerializer());
    b.appendColumn(type, vecs);
    if (labels != null) {
        if (labels.length != data.length) {
            throw new AbortException("Label and DBID columns must have the same size.");
        }
        b.appendColumn(TypeUtil.STRING, Arrays.asList(labels));
    }
    return invokeBundleFilters(b);
}
Also used : VectorFieldTypeInformation(de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation) MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) ArrayList(java.util.ArrayList) DoubleVector(de.lmu.ifi.dbs.elki.data.DoubleVector) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Example 39 with AbortException

use of de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException in project elki by elki-project.

the class SortByLabelFilter method filter.

@Override
public MultipleObjectsBundle filter(final MultipleObjectsBundle objects) {
    if (LOG.isDebugging()) {
        LOG.debug("Sorting the data set");
    }
    // Prepare a reposition array for cheap resorting
    final int size = objects.dataLength();
    final int[] offsets = new int[size];
    for (int i = 0; i < size; i++) {
        offsets[i] = i;
    }
    // Sort by labels - identify a label column
    final int lblcol = FilterUtil.findLabelColumn(objects);
    if (lblcol == -1) {
        throw new AbortException("No label column found - cannot sort by label.");
    }
    IntegerArrayQuickSort.sort(offsets, new IntegerComparator() {

        @Override
        public int compare(int o1, int o2) {
            String l1 = objects.data(o1, lblcol).toString();
            String l2 = objects.data(o2, lblcol).toString();
            return l1.compareToIgnoreCase(l2);
        }
    });
    MultipleObjectsBundle bundle = new MultipleObjectsBundle();
    for (int j = 0; j < objects.metaLength(); j++) {
        // Reorder column accordingly
        List<?> in = objects.getColumn(j);
        List<Object> data = new ArrayList<>(size);
        for (int i = 0; i < size; i++) {
            data.add(in.get(offsets[i]));
        }
        bundle.appendColumn(objects.meta(j), data);
    }
    return bundle;
}
Also used : MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) ArrayList(java.util.ArrayList) IntegerComparator(de.lmu.ifi.dbs.elki.utilities.datastructures.arrays.IntegerComparator) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Example 40 with AbortException

use of de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException in project elki by elki-project.

the class GlobalPrincipalComponentAnalysisTransform method prepareStart.

@Override
protected boolean prepareStart(SimpleTypeInformation<O> in) {
    if (!(in instanceof VectorFieldTypeInformation)) {
        throw new AbortException("PCA can only applied to fixed dimensionality vectors");
    }
    dim = ((VectorFieldTypeInformation<?>) in).getDimensionality();
    covmat = new CovarianceMatrix(dim);
    return true;
}
Also used : VectorFieldTypeInformation(de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException) CovarianceMatrix(de.lmu.ifi.dbs.elki.math.linearalgebra.CovarianceMatrix)

Aggregations

AbortException (de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)99 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)25 IOException (java.io.IOException)24 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)22 ArrayList (java.util.ArrayList)16 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)13 MultipleObjectsBundle (de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle)13 NumberVector (de.lmu.ifi.dbs.elki.data.NumberVector)10 DBIDArrayIter (de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter)9 DoubleRelation (de.lmu.ifi.dbs.elki.database.relation.DoubleRelation)9 Clustering (de.lmu.ifi.dbs.elki.data.Clustering)8 Model (de.lmu.ifi.dbs.elki.data.model.Model)8 VectorFieldTypeInformation (de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation)8 Database (de.lmu.ifi.dbs.elki.database.Database)8 WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)8 DBIDRange (de.lmu.ifi.dbs.elki.database.ids.DBIDRange)8 OutlierResult (de.lmu.ifi.dbs.elki.result.outlier.OutlierResult)8 MaterializedDoubleRelation (de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)6 ClassLabel (de.lmu.ifi.dbs.elki.data.ClassLabel)5 DoubleVector (de.lmu.ifi.dbs.elki.data.DoubleVector)5