use of de.lmu.ifi.dbs.elki.data.SimpleClassLabel in project elki by elki-project.
the class ArffParser method loadDenseInstance.
private Object[] loadDenseInstance(StreamTokenizer tokenizer, int[] dimsize, TypeInformation[] etyp, int outdim) throws IOException {
Object[] data = new Object[outdim];
for (int out = 0; out < outdim; out++) {
if (TypeUtil.NUMBER_VECTOR_FIELD.equals(etyp[out])) {
// For multi-column vectors, read successive columns
double[] cur = new double[dimsize[out]];
for (int k = 0; k < dimsize[out]; k++) {
if (tokenizer.ttype == '?') {
cur[k] = Double.NaN;
} else if (tokenizer.ttype == StreamTokenizer.TT_WORD) {
try {
cur[k] = ParseUtil.parseDouble(tokenizer.sval);
} catch (NumberFormatException e) {
throw new AbortException("Expected number value, got: " + tokenizer.sval);
}
} else {
throw new AbortException("Expected word token, got: " + tokenizer.toString());
}
nextToken(tokenizer);
}
data[out] = denseFactory.newNumberVector(cur);
} else if (TypeUtil.LABELLIST.equals(etyp[out])) {
// Build a label list out of successive labels
labels.clear();
for (int k = 0; k < dimsize[out]; k++) {
if (tokenizer.ttype != StreamTokenizer.TT_WORD) {
throw new AbortException("Expected word token, got: " + tokenizer.toString());
}
labels.add(tokenizer.sval);
nextToken(tokenizer);
}
data[out] = LabelList.make(labels);
} else if (TypeUtil.EXTERNALID.equals(etyp[out])) {
if (tokenizer.ttype != StreamTokenizer.TT_WORD) {
throw new AbortException("Expected word token, got: " + tokenizer.toString());
}
data[out] = new ExternalID(tokenizer.sval);
nextToken(tokenizer);
} else if (TypeUtil.CLASSLABEL.equals(etyp[out])) {
if (tokenizer.ttype != StreamTokenizer.TT_WORD) {
throw new AbortException("Expected word token, got: " + tokenizer.toString());
}
// TODO: support other class label types.
ClassLabel lbl = new SimpleClassLabel(tokenizer.sval);
data[out] = lbl;
nextToken(tokenizer);
} else {
throw new AbortException("Unsupported type for column " + "->" + out + ": " + ((etyp[out] != null) ? etyp[out].toString() : "null"));
}
}
return data;
}
use of de.lmu.ifi.dbs.elki.data.SimpleClassLabel in project elki by elki-project.
the class ArffParser method loadSparseInstance.
private Object[] loadSparseInstance(StreamTokenizer tokenizer, int[] targ, int[] dimsize, TypeInformation[] elkitypes, int metaLength) throws IOException {
Int2ObjectOpenHashMap<Object> map = new Int2ObjectOpenHashMap<>();
while (true) {
nextToken(tokenizer);
assert (tokenizer.ttype != StreamTokenizer.TT_EOF && tokenizer.ttype != StreamTokenizer.TT_EOL);
if (tokenizer.ttype == '}') {
nextToken(tokenizer);
assert (tokenizer.ttype == StreamTokenizer.TT_EOF || tokenizer.ttype == StreamTokenizer.TT_EOL);
break;
} else {
// sparse token
if (tokenizer.ttype != StreamTokenizer.TT_WORD) {
throw new AbortException("Unexpected token type encountered: " + tokenizer.toString() + " type: " + tokenizer.ttype);
}
int dim = ParseUtil.parseIntBase10(tokenizer.sval);
if (map.containsKey(dim)) {
throw new AbortException("Duplicate key in sparse vector: " + tokenizer.toString());
}
nextToken(tokenizer);
if (tokenizer.ttype == StreamTokenizer.TT_WORD) {
map.put(dim, //
TypeUtil.NUMBER_VECTOR_FIELD.equals(elkitypes[targ[dim]]) ? (Double) ParseUtil.parseDouble(tokenizer.sval) : tokenizer.sval);
} else {
throw new AbortException("Unexpected token type encountered: " + tokenizer.toString());
}
}
}
Object[] data = new Object[metaLength];
for (int out = 0; out < metaLength; out++) {
// Find the first index
int s = -1;
for (int i = 0; i < targ.length; i++) {
if (targ[i] == out && s < 0) {
s = i;
break;
}
}
assert (s >= 0);
if (TypeUtil.NUMBER_VECTOR_FIELD.equals(elkitypes[out])) {
Int2DoubleOpenHashMap f = new Int2DoubleOpenHashMap(dimsize[out]);
for (ObjectIterator<Int2ObjectMap.Entry<Object>> iter = map.int2ObjectEntrySet().fastIterator(); iter.hasNext(); ) {
Int2ObjectMap.Entry<Object> entry = iter.next();
int i = entry.getIntKey();
if (i < s || i >= s + dimsize[out]) {
continue;
}
double v = ((Double) entry.getValue()).doubleValue();
f.put(i - s, v);
}
data[out] = new SparseDoubleVector(f, dimsize[out]);
} else if (TypeUtil.LABELLIST.equals(elkitypes[out])) {
// Build a label list out of successive labels
labels.clear();
for (ObjectIterator<Int2ObjectMap.Entry<Object>> iter = map.int2ObjectEntrySet().fastIterator(); iter.hasNext(); ) {
Int2ObjectMap.Entry<Object> entry = iter.next();
int i = entry.getIntKey();
if (i < s) {
continue;
}
if (i >= s + dimsize[out]) {
break;
}
if (labels.size() < i - s) {
LOG.warning("Sparse consecutive labels are currently not correctly supported.");
}
labels.add((String) entry.getValue());
}
data[out] = LabelList.make(labels);
} else if (TypeUtil.EXTERNALID.equals(elkitypes[out])) {
String val = (String) map.get(s);
if (val == null) {
throw new AbortException("External ID column not set in sparse instance." + tokenizer.toString());
}
data[out] = new ExternalID(val);
} else if (TypeUtil.CLASSLABEL.equals(elkitypes[out])) {
Object val = map.get(s);
if (val == null) {
throw new AbortException("Class label column not set in sparse instance." + tokenizer.toString());
}
// TODO: support other class label types.
ClassLabel lbl = new SimpleClassLabel(String.valueOf(val));
data[out] = lbl;
} else {
throw new AbortException("Unsupported type for column " + "->" + out + ": " + ((elkitypes[out] != null) ? elkitypes[out].toString() : "null"));
}
}
return data;
}
use of de.lmu.ifi.dbs.elki.data.SimpleClassLabel in project elki by elki-project.
the class GeneratorMain method initLabelsAndModels.
/**
* Initialize cluster labels and models.
*
* Clusters that are set to "reassign" will have their labels set to null, or
* if there is only one possible reassignment, to this target label.
*
* @param generators Cluster generators
* @param labels Labels (output)
* @param models Models (output)
* @param reassign Pattern for clusters to reassign.
*/
private void initLabelsAndModels(ArrayList<GeneratorInterface> generators, ClassLabel[] labels, Model[] models, Pattern reassign) {
int existingclusters = 0;
if (reassign != null) {
for (int i = 0; i < labels.length; i++) {
final GeneratorInterface curclus = generators.get(i);
if (!reassign.matcher(curclus.getName()).find()) {
labels[i] = new SimpleClassLabel(curclus.getName());
models[i] = curclus.makeModel();
++existingclusters;
}
}
if (existingclusters == 0) {
LOG.warning("All clusters matched the 'reassign' pattern. Ignoring.");
}
if (existingclusters == 1) {
// No need to test - only one possible answer.
for (int i = 0; i < labels.length; i++) {
if (labels[i] != null) {
Arrays.fill(labels, labels[i]);
Arrays.fill(models, models[i]);
break;
}
}
}
if (existingclusters == labels.length) {
LOG.warning("No clusters matched the 'reassign' pattern.");
}
}
// Default case, every cluster has a label and model.
if (existingclusters == 0) {
for (int i = 0; i < labels.length; i++) {
final GeneratorInterface curclus = generators.get(i);
labels[i] = new SimpleClassLabel(curclus.getName());
models[i] = curclus.makeModel();
}
}
}
Aggregations