use of de.lmu.ifi.dbs.elki.data.LabelList in project elki by elki-project.
the class ByLabelFilter method nextEvent.
@Override
public Event nextEvent() {
while (true) {
Event ev = source.nextEvent();
switch(ev) {
case END_OF_STREAM:
if (lblcol < 0) {
LOG.warning("By label filter was used, but never saw a label relation!");
}
return Event.END_OF_STREAM;
case META_CHANGED:
// Search for the first label column
if (lblcol < 0) {
BundleMeta meta = source.getMeta();
lblcol = FilterUtil.findLabelColumn(meta);
}
return Event.META_CHANGED;
case NEXT_OBJECT:
if (lblcol > 0) {
Object l = source.data(lblcol);
if (l instanceof LabelList) {
boolean good = false;
final LabelList ll = (LabelList) l;
for (int i = 0; i < ll.size(); i++) {
matcher.reset(ll.get(i));
if (matcher.matches()) {
good = true;
break;
}
}
if (good == inverted) {
continue;
}
} else {
matcher.reset(l.toString());
if (!matcher.matches()) {
continue;
}
}
} else {
// No labels known yet.
if (!inverted) {
continue;
}
}
return Event.NEXT_OBJECT;
default:
LOG.warning("Unknown event: " + ev);
}
}
}
use of de.lmu.ifi.dbs.elki.data.LabelList in project elki by elki-project.
the class LabelJoinDatabaseConnection method loadData.
@Override
public MultipleObjectsBundle loadData() {
List<MultipleObjectsBundle> bundles = new ArrayList<>(sources.size());
for (DatabaseConnection dbc : sources) {
bundles.add(dbc.loadData());
}
MultipleObjectsBundle first = bundles.get(0);
Object2IntOpenHashMap<String> labelmap = new Object2IntOpenHashMap<>(first.dataLength());
labelmap.defaultReturnValue(-1);
// Process first bundle
{
// Identify a label column
final int lblcol = FilterUtil.findLabelColumn(first);
if (lblcol == -1) {
throw new AbortException("No label column found in first source, cannot join (do you want to use " + ExternalIDJoinDatabaseConnection.class.getSimpleName() + " instead?)");
}
for (int i = 0; i < first.dataLength(); i++) {
Object data = first.data(i, lblcol);
if (data == null) {
LOG.warning("Object without label encountered.");
continue;
}
if (data instanceof String) {
int old = labelmap.put((String) data, i);
if (old != -1) {
LOG.warning("Duplicate label encountered: " + data + " in rows " + old + " and " + i);
}
} else if (data instanceof LabelList) {
final LabelList ll = (LabelList) data;
for (int j = 0; j < ll.size(); j++) {
String lbl = ll.get(j);
int old = labelmap.put(lbl, i);
if (old != -1) {
LOG.warning("Duplicate label encountered: " + lbl + " in rows " + old + " and " + i);
}
}
} else {
String lbl = data.toString();
int old = labelmap.put(lbl, i);
if (old != -1) {
LOG.warning("Duplicate label encountered: " + lbl + " in rows " + old + " and " + i);
}
}
}
}
// Process additional columns
for (int c = 1; c < sources.size(); c++) {
MultipleObjectsBundle cur = bundles.get(c);
final int lblcol = FilterUtil.findLabelColumn(cur);
if (lblcol == -1) {
throw new AbortException("No label column found in source " + (c + 1) + ", cannot join (do you want to use " + ExternalIDJoinDatabaseConnection.class.getSimpleName() + " instead?)");
}
// Destination columns
List<ArrayList<Object>> dcol = new ArrayList<>(cur.metaLength());
for (int i = 0; i < cur.metaLength(); i++) {
// Skip the label columns
if (i == lblcol) {
dcol.add(null);
continue;
}
ArrayList<Object> newcol = new ArrayList<>(first.dataLength());
// Pre-fill with nulls.
for (int j = 0; j < first.dataLength(); j++) {
newcol.add(null);
}
first.appendColumn(cur.meta(i), newcol);
dcol.add(newcol);
}
for (int i = 0; i < cur.dataLength(); i++) {
Object data = cur.data(i, lblcol);
if (data == null) {
LOG.warning("Object without label encountered.");
continue;
}
int row = -1;
if (data instanceof String) {
row = labelmap.getInt(data);
} else if (data instanceof LabelList) {
final LabelList ll = (LabelList) data;
for (int j = 0; j < ll.size(); j++) {
row = labelmap.getInt(ll.get(j));
if (row >= 0) {
break;
}
}
} else {
row = labelmap.getInt(data.toString());
}
if (row < 0) {
LOG.warning("Label not found for join: " + data + " in row " + i);
continue;
}
for (int d = 0; d < cur.metaLength(); d++) {
if (d == lblcol) {
continue;
}
List<Object> col = dcol.get(d);
assert (col != null);
col.set(row, cur.data(i, d));
}
}
}
for (int i = 0; i < first.dataLength(); i++) {
for (int d = 0; d < first.metaLength(); d++) {
if (first.data(i, d) == null) {
StringBuilder buf = new StringBuilder();
for (int d2 = 0; d2 < first.metaLength(); d2++) {
if (buf.length() > 0) {
buf.append(", ");
}
if (first.data(i, d2) == null) {
buf.append("null");
} else {
buf.append(first.data(i, d2));
}
}
LOG.warning("null value in joined data, row " + i + " column " + d + FormatUtil.NEWLINE + "[" + buf.toString() + "]");
break;
}
}
}
return first;
}
use of de.lmu.ifi.dbs.elki.data.LabelList in project elki by elki-project.
the class ClassLabelFilter method filter.
@Override
public MultipleObjectsBundle filter(MultipleObjectsBundle objects) {
MultipleObjectsBundle bundle = new MultipleObjectsBundle();
// Find a labellist column
boolean done = false;
boolean keeplabelcol = false;
for (int i = 0; i < objects.metaLength(); i++) {
SimpleTypeInformation<?> meta = objects.meta(i);
// Skip non-labellist columns - or if we already had a labellist
if (done || !LabelList.class.equals(meta.getRestrictionClass())) {
bundle.appendColumn(meta, objects.getColumn(i));
continue;
}
done = true;
// We split the label column into two parts
List<ClassLabel> clscol = new ArrayList<>(objects.dataLength());
List<LabelList> lblcol = new ArrayList<>(objects.dataLength());
ArrayList<String> lbuf = new ArrayList<>();
// Split the column
for (Object obj : objects.getColumn(i)) {
if (obj != null) {
LabelList ll = (LabelList) obj;
int off = (classLabelIndex >= 0) ? classLabelIndex : (ll.size() - classLabelIndex);
try {
ClassLabel lbl = classLabelFactory.makeFromString(ll.get(off));
clscol.add(lbl);
} catch (Exception e) {
throw new AbortException("Cannot initialize class labels: " + e.getMessage(), e);
}
lbuf.clear();
for (int j = 0; j < ll.size(); j++) {
if (j == off) {
continue;
}
lbuf.add(ll.get(j));
}
lblcol.add(LabelList.make(lbuf));
if (!lbuf.isEmpty()) {
keeplabelcol = true;
}
} else {
clscol.add(null);
lblcol.add(null);
}
}
bundle.appendColumn(classLabelFactory.getTypeInformation(), clscol);
// Only add the label column when it's not empty.
if (keeplabelcol) {
bundle.appendColumn(meta, lblcol);
}
}
return bundle;
}
use of de.lmu.ifi.dbs.elki.data.LabelList in project elki by elki-project.
the class StringParser method parse.
@Override
public MultipleObjectsBundle parse(InputStream in) {
BufferedReader reader = new BufferedReader(new InputStreamReader(in));
int lineNumber = 0;
List<String> data = new ArrayList<>();
List<LabelList> labels = new ArrayList<>();
ArrayList<String> ll = new ArrayList<>(1);
try {
for (String line; (line = reader.readLine()) != null; lineNumber++) {
// Skip empty lines and comments
if (line.length() <= 0 || (comment != null && comment.reset(line).matches())) {
continue;
}
final String val = trimWhitespace ? line.trim() : line;
data.add(val);
ll.clear();
ll.add(val);
labels.add(LabelList.make(ll));
}
} catch (IOException e) {
throw new IllegalArgumentException("Error while parsing line " + lineNumber + ".");
}
return MultipleObjectsBundle.makeSimple(TypeUtil.STRING, data, TypeUtil.LABELLIST, labels);
}
Aggregations