Search in sources :

Example 6 with LabelList

use of de.lmu.ifi.dbs.elki.data.LabelList in project elki by elki-project.

the class ByLabelFilter method nextEvent.

@Override
public Event nextEvent() {
    while (true) {
        Event ev = source.nextEvent();
        switch(ev) {
            case END_OF_STREAM:
                if (lblcol < 0) {
                    LOG.warning("By label filter was used, but never saw a label relation!");
                }
                return Event.END_OF_STREAM;
            case META_CHANGED:
                // Search for the first label column
                if (lblcol < 0) {
                    BundleMeta meta = source.getMeta();
                    lblcol = FilterUtil.findLabelColumn(meta);
                }
                return Event.META_CHANGED;
            case NEXT_OBJECT:
                if (lblcol > 0) {
                    Object l = source.data(lblcol);
                    if (l instanceof LabelList) {
                        boolean good = false;
                        final LabelList ll = (LabelList) l;
                        for (int i = 0; i < ll.size(); i++) {
                            matcher.reset(ll.get(i));
                            if (matcher.matches()) {
                                good = true;
                                break;
                            }
                        }
                        if (good == inverted) {
                            continue;
                        }
                    } else {
                        matcher.reset(l.toString());
                        if (!matcher.matches()) {
                            continue;
                        }
                    }
                } else {
                    // No labels known yet.
                    if (!inverted) {
                        continue;
                    }
                }
                return Event.NEXT_OBJECT;
            default:
                LOG.warning("Unknown event: " + ev);
        }
    }
}
Also used : BundleMeta(de.lmu.ifi.dbs.elki.datasource.bundle.BundleMeta) LabelList(de.lmu.ifi.dbs.elki.data.LabelList)

Example 7 with LabelList

use of de.lmu.ifi.dbs.elki.data.LabelList in project elki by elki-project.

the class LabelJoinDatabaseConnection method loadData.

@Override
public MultipleObjectsBundle loadData() {
    List<MultipleObjectsBundle> bundles = new ArrayList<>(sources.size());
    for (DatabaseConnection dbc : sources) {
        bundles.add(dbc.loadData());
    }
    MultipleObjectsBundle first = bundles.get(0);
    Object2IntOpenHashMap<String> labelmap = new Object2IntOpenHashMap<>(first.dataLength());
    labelmap.defaultReturnValue(-1);
    // Process first bundle
    {
        // Identify a label column
        final int lblcol = FilterUtil.findLabelColumn(first);
        if (lblcol == -1) {
            throw new AbortException("No label column found in first source, cannot join (do you want to use " + ExternalIDJoinDatabaseConnection.class.getSimpleName() + " instead?)");
        }
        for (int i = 0; i < first.dataLength(); i++) {
            Object data = first.data(i, lblcol);
            if (data == null) {
                LOG.warning("Object without label encountered.");
                continue;
            }
            if (data instanceof String) {
                int old = labelmap.put((String) data, i);
                if (old != -1) {
                    LOG.warning("Duplicate label encountered: " + data + " in rows " + old + " and " + i);
                }
            } else if (data instanceof LabelList) {
                final LabelList ll = (LabelList) data;
                for (int j = 0; j < ll.size(); j++) {
                    String lbl = ll.get(j);
                    int old = labelmap.put(lbl, i);
                    if (old != -1) {
                        LOG.warning("Duplicate label encountered: " + lbl + " in rows " + old + " and " + i);
                    }
                }
            } else {
                String lbl = data.toString();
                int old = labelmap.put(lbl, i);
                if (old != -1) {
                    LOG.warning("Duplicate label encountered: " + lbl + " in rows " + old + " and " + i);
                }
            }
        }
    }
    // Process additional columns
    for (int c = 1; c < sources.size(); c++) {
        MultipleObjectsBundle cur = bundles.get(c);
        final int lblcol = FilterUtil.findLabelColumn(cur);
        if (lblcol == -1) {
            throw new AbortException("No label column found in source " + (c + 1) + ", cannot join (do you want to use " + ExternalIDJoinDatabaseConnection.class.getSimpleName() + " instead?)");
        }
        // Destination columns
        List<ArrayList<Object>> dcol = new ArrayList<>(cur.metaLength());
        for (int i = 0; i < cur.metaLength(); i++) {
            // Skip the label columns
            if (i == lblcol) {
                dcol.add(null);
                continue;
            }
            ArrayList<Object> newcol = new ArrayList<>(first.dataLength());
            // Pre-fill with nulls.
            for (int j = 0; j < first.dataLength(); j++) {
                newcol.add(null);
            }
            first.appendColumn(cur.meta(i), newcol);
            dcol.add(newcol);
        }
        for (int i = 0; i < cur.dataLength(); i++) {
            Object data = cur.data(i, lblcol);
            if (data == null) {
                LOG.warning("Object without label encountered.");
                continue;
            }
            int row = -1;
            if (data instanceof String) {
                row = labelmap.getInt(data);
            } else if (data instanceof LabelList) {
                final LabelList ll = (LabelList) data;
                for (int j = 0; j < ll.size(); j++) {
                    row = labelmap.getInt(ll.get(j));
                    if (row >= 0) {
                        break;
                    }
                }
            } else {
                row = labelmap.getInt(data.toString());
            }
            if (row < 0) {
                LOG.warning("Label not found for join: " + data + " in row " + i);
                continue;
            }
            for (int d = 0; d < cur.metaLength(); d++) {
                if (d == lblcol) {
                    continue;
                }
                List<Object> col = dcol.get(d);
                assert (col != null);
                col.set(row, cur.data(i, d));
            }
        }
    }
    for (int i = 0; i < first.dataLength(); i++) {
        for (int d = 0; d < first.metaLength(); d++) {
            if (first.data(i, d) == null) {
                StringBuilder buf = new StringBuilder();
                for (int d2 = 0; d2 < first.metaLength(); d2++) {
                    if (buf.length() > 0) {
                        buf.append(", ");
                    }
                    if (first.data(i, d2) == null) {
                        buf.append("null");
                    } else {
                        buf.append(first.data(i, d2));
                    }
                }
                LOG.warning("null value in joined data, row " + i + " column " + d + FormatUtil.NEWLINE + "[" + buf.toString() + "]");
                break;
            }
        }
    }
    return first;
}
Also used : LabelList(de.lmu.ifi.dbs.elki.data.LabelList) MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) ArrayList(java.util.ArrayList) Object2IntOpenHashMap(it.unimi.dsi.fastutil.objects.Object2IntOpenHashMap) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Example 8 with LabelList

use of de.lmu.ifi.dbs.elki.data.LabelList in project elki by elki-project.

the class ClassLabelFilter method filter.

@Override
public MultipleObjectsBundle filter(MultipleObjectsBundle objects) {
    MultipleObjectsBundle bundle = new MultipleObjectsBundle();
    // Find a labellist column
    boolean done = false;
    boolean keeplabelcol = false;
    for (int i = 0; i < objects.metaLength(); i++) {
        SimpleTypeInformation<?> meta = objects.meta(i);
        // Skip non-labellist columns - or if we already had a labellist
        if (done || !LabelList.class.equals(meta.getRestrictionClass())) {
            bundle.appendColumn(meta, objects.getColumn(i));
            continue;
        }
        done = true;
        // We split the label column into two parts
        List<ClassLabel> clscol = new ArrayList<>(objects.dataLength());
        List<LabelList> lblcol = new ArrayList<>(objects.dataLength());
        ArrayList<String> lbuf = new ArrayList<>();
        // Split the column
        for (Object obj : objects.getColumn(i)) {
            if (obj != null) {
                LabelList ll = (LabelList) obj;
                int off = (classLabelIndex >= 0) ? classLabelIndex : (ll.size() - classLabelIndex);
                try {
                    ClassLabel lbl = classLabelFactory.makeFromString(ll.get(off));
                    clscol.add(lbl);
                } catch (Exception e) {
                    throw new AbortException("Cannot initialize class labels: " + e.getMessage(), e);
                }
                lbuf.clear();
                for (int j = 0; j < ll.size(); j++) {
                    if (j == off) {
                        continue;
                    }
                    lbuf.add(ll.get(j));
                }
                lblcol.add(LabelList.make(lbuf));
                if (!lbuf.isEmpty()) {
                    keeplabelcol = true;
                }
            } else {
                clscol.add(null);
                lblcol.add(null);
            }
        }
        bundle.appendColumn(classLabelFactory.getTypeInformation(), clscol);
        // Only add the label column when it's not empty.
        if (keeplabelcol) {
            bundle.appendColumn(meta, lblcol);
        }
    }
    return bundle;
}
Also used : LabelList(de.lmu.ifi.dbs.elki.data.LabelList) MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) ArrayList(java.util.ArrayList) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException) SimpleClassLabel(de.lmu.ifi.dbs.elki.data.SimpleClassLabel) ClassLabel(de.lmu.ifi.dbs.elki.data.ClassLabel) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Example 9 with LabelList

use of de.lmu.ifi.dbs.elki.data.LabelList in project elki by elki-project.

the class StringParser method parse.

@Override
public MultipleObjectsBundle parse(InputStream in) {
    BufferedReader reader = new BufferedReader(new InputStreamReader(in));
    int lineNumber = 0;
    List<String> data = new ArrayList<>();
    List<LabelList> labels = new ArrayList<>();
    ArrayList<String> ll = new ArrayList<>(1);
    try {
        for (String line; (line = reader.readLine()) != null; lineNumber++) {
            // Skip empty lines and comments
            if (line.length() <= 0 || (comment != null && comment.reset(line).matches())) {
                continue;
            }
            final String val = trimWhitespace ? line.trim() : line;
            data.add(val);
            ll.clear();
            ll.add(val);
            labels.add(LabelList.make(ll));
        }
    } catch (IOException e) {
        throw new IllegalArgumentException("Error while parsing line " + lineNumber + ".");
    }
    return MultipleObjectsBundle.makeSimple(TypeUtil.STRING, data, TypeUtil.LABELLIST, labels);
}
Also used : InputStreamReader(java.io.InputStreamReader) LabelList(de.lmu.ifi.dbs.elki.data.LabelList) BufferedReader(java.io.BufferedReader) ArrayList(java.util.ArrayList) IOException(java.io.IOException)

Aggregations

LabelList (de.lmu.ifi.dbs.elki.data.LabelList)9 MultipleObjectsBundle (de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle)5 ArrayList (java.util.ArrayList)4 AbstractDataSourceTest (de.lmu.ifi.dbs.elki.datasource.AbstractDataSourceTest)2 AbortException (de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)2 Test (org.junit.Test)2 ClassLabel (de.lmu.ifi.dbs.elki.data.ClassLabel)1 ExternalID (de.lmu.ifi.dbs.elki.data.ExternalID)1 SimpleClassLabel (de.lmu.ifi.dbs.elki.data.SimpleClassLabel)1 BundleMeta (de.lmu.ifi.dbs.elki.datasource.bundle.BundleMeta)1 ELKIBuilder (de.lmu.ifi.dbs.elki.utilities.ELKIBuilder)1 Object2IntOpenHashMap (it.unimi.dsi.fastutil.objects.Object2IntOpenHashMap)1 BufferedReader (java.io.BufferedReader)1 IOException (java.io.IOException)1 InputStreamReader (java.io.InputStreamReader)1