use of de.lmu.ifi.dbs.elki.data.ClassLabel in project elki by elki-project.
the class ArffParser method loadDenseInstance.
private Object[] loadDenseInstance(StreamTokenizer tokenizer, int[] dimsize, TypeInformation[] etyp, int outdim) throws IOException {
Object[] data = new Object[outdim];
for (int out = 0; out < outdim; out++) {
if (TypeUtil.NUMBER_VECTOR_FIELD.equals(etyp[out])) {
// For multi-column vectors, read successive columns
double[] cur = new double[dimsize[out]];
for (int k = 0; k < dimsize[out]; k++) {
if (tokenizer.ttype == '?') {
cur[k] = Double.NaN;
} else if (tokenizer.ttype == StreamTokenizer.TT_WORD) {
try {
cur[k] = ParseUtil.parseDouble(tokenizer.sval);
} catch (NumberFormatException e) {
throw new AbortException("Expected number value, got: " + tokenizer.sval);
}
} else {
throw new AbortException("Expected word token, got: " + tokenizer.toString());
}
nextToken(tokenizer);
}
data[out] = denseFactory.newNumberVector(cur);
} else if (TypeUtil.LABELLIST.equals(etyp[out])) {
// Build a label list out of successive labels
labels.clear();
for (int k = 0; k < dimsize[out]; k++) {
if (tokenizer.ttype != StreamTokenizer.TT_WORD) {
throw new AbortException("Expected word token, got: " + tokenizer.toString());
}
labels.add(tokenizer.sval);
nextToken(tokenizer);
}
data[out] = LabelList.make(labels);
} else if (TypeUtil.EXTERNALID.equals(etyp[out])) {
if (tokenizer.ttype != StreamTokenizer.TT_WORD) {
throw new AbortException("Expected word token, got: " + tokenizer.toString());
}
data[out] = new ExternalID(tokenizer.sval);
nextToken(tokenizer);
} else if (TypeUtil.CLASSLABEL.equals(etyp[out])) {
if (tokenizer.ttype != StreamTokenizer.TT_WORD) {
throw new AbortException("Expected word token, got: " + tokenizer.toString());
}
// TODO: support other class label types.
ClassLabel lbl = new SimpleClassLabel(tokenizer.sval);
data[out] = lbl;
nextToken(tokenizer);
} else {
throw new AbortException("Unsupported type for column " + "->" + out + ": " + ((etyp[out] != null) ? etyp[out].toString() : "null"));
}
}
return data;
}
use of de.lmu.ifi.dbs.elki.data.ClassLabel in project elki by elki-project.
the class ArffParser method loadSparseInstance.
private Object[] loadSparseInstance(StreamTokenizer tokenizer, int[] targ, int[] dimsize, TypeInformation[] elkitypes, int metaLength) throws IOException {
Int2ObjectOpenHashMap<Object> map = new Int2ObjectOpenHashMap<>();
while (true) {
nextToken(tokenizer);
assert (tokenizer.ttype != StreamTokenizer.TT_EOF && tokenizer.ttype != StreamTokenizer.TT_EOL);
if (tokenizer.ttype == '}') {
nextToken(tokenizer);
assert (tokenizer.ttype == StreamTokenizer.TT_EOF || tokenizer.ttype == StreamTokenizer.TT_EOL);
break;
} else {
// sparse token
if (tokenizer.ttype != StreamTokenizer.TT_WORD) {
throw new AbortException("Unexpected token type encountered: " + tokenizer.toString() + " type: " + tokenizer.ttype);
}
int dim = ParseUtil.parseIntBase10(tokenizer.sval);
if (map.containsKey(dim)) {
throw new AbortException("Duplicate key in sparse vector: " + tokenizer.toString());
}
nextToken(tokenizer);
if (tokenizer.ttype == StreamTokenizer.TT_WORD) {
map.put(dim, //
TypeUtil.NUMBER_VECTOR_FIELD.equals(elkitypes[targ[dim]]) ? (Double) ParseUtil.parseDouble(tokenizer.sval) : tokenizer.sval);
} else {
throw new AbortException("Unexpected token type encountered: " + tokenizer.toString());
}
}
}
Object[] data = new Object[metaLength];
for (int out = 0; out < metaLength; out++) {
// Find the first index
int s = -1;
for (int i = 0; i < targ.length; i++) {
if (targ[i] == out && s < 0) {
s = i;
break;
}
}
assert (s >= 0);
if (TypeUtil.NUMBER_VECTOR_FIELD.equals(elkitypes[out])) {
Int2DoubleOpenHashMap f = new Int2DoubleOpenHashMap(dimsize[out]);
for (ObjectIterator<Int2ObjectMap.Entry<Object>> iter = map.int2ObjectEntrySet().fastIterator(); iter.hasNext(); ) {
Int2ObjectMap.Entry<Object> entry = iter.next();
int i = entry.getIntKey();
if (i < s || i >= s + dimsize[out]) {
continue;
}
double v = ((Double) entry.getValue()).doubleValue();
f.put(i - s, v);
}
data[out] = new SparseDoubleVector(f, dimsize[out]);
} else if (TypeUtil.LABELLIST.equals(elkitypes[out])) {
// Build a label list out of successive labels
labels.clear();
for (ObjectIterator<Int2ObjectMap.Entry<Object>> iter = map.int2ObjectEntrySet().fastIterator(); iter.hasNext(); ) {
Int2ObjectMap.Entry<Object> entry = iter.next();
int i = entry.getIntKey();
if (i < s) {
continue;
}
if (i >= s + dimsize[out]) {
break;
}
if (labels.size() < i - s) {
LOG.warning("Sparse consecutive labels are currently not correctly supported.");
}
labels.add((String) entry.getValue());
}
data[out] = LabelList.make(labels);
} else if (TypeUtil.EXTERNALID.equals(elkitypes[out])) {
String val = (String) map.get(s);
if (val == null) {
throw new AbortException("External ID column not set in sparse instance." + tokenizer.toString());
}
data[out] = new ExternalID(val);
} else if (TypeUtil.CLASSLABEL.equals(elkitypes[out])) {
Object val = map.get(s);
if (val == null) {
throw new AbortException("Class label column not set in sparse instance." + tokenizer.toString());
}
// TODO: support other class label types.
ClassLabel lbl = new SimpleClassLabel(String.valueOf(val));
data[out] = lbl;
} else {
throw new AbortException("Unsupported type for column " + "->" + out + ": " + ((elkitypes[out] != null) ? elkitypes[out].toString() : "null"));
}
}
return data;
}
use of de.lmu.ifi.dbs.elki.data.ClassLabel in project elki by elki-project.
the class ClassifierHoldoutEvaluationTask method run.
@Override
public void run() {
Duration ptime = LOG.newDuration("evaluation.time.load").begin();
MultipleObjectsBundle allData = databaseConnection.loadData();
holdout.initialize(allData);
LOG.statistics(ptime.end());
Duration time = LOG.newDuration("evaluation.time.total").begin();
ArrayList<ClassLabel> labels = holdout.getLabels();
int[][] confusion = new int[labels.size()][labels.size()];
for (int p = 0; p < holdout.numberOfPartitions(); p++) {
TrainingAndTestSet partition = holdout.nextPartitioning();
// Load the data set into a database structure (for indexing)
Duration dur = LOG.newDuration(this.getClass().getName() + ".fold-" + (p + 1) + ".init.time").begin();
Database db = new StaticArrayDatabase(new MultipleObjectsBundleDatabaseConnection(partition.getTraining()), indexFactories);
db.initialize();
LOG.statistics(dur.end());
// Train the classifier
dur = LOG.newDuration(this.getClass().getName() + ".fold-" + (p + 1) + ".train.time").begin();
Relation<ClassLabel> lrel = db.getRelation(TypeUtil.CLASSLABEL);
algorithm.buildClassifier(db, lrel);
LOG.statistics(dur.end());
// Evaluate the test set
dur = LOG.newDuration(this.getClass().getName() + ".fold-" + (p + 1) + ".evaluation.time").begin();
// FIXME: this part is still a big hack, unfortunately!
MultipleObjectsBundle test = partition.getTest();
int lcol = AbstractHoldout.findClassLabelColumn(test);
int tcol = (lcol == 0) ? 1 : 0;
for (int i = 0, l = test.dataLength(); i < l; ++i) {
@SuppressWarnings("unchecked") O obj = (O) test.data(i, tcol);
ClassLabel truelbl = (ClassLabel) test.data(i, lcol);
ClassLabel predlbl = algorithm.classify(obj);
int pred = Collections.binarySearch(labels, predlbl);
int real = Collections.binarySearch(labels, truelbl);
confusion[pred][real]++;
}
LOG.statistics(dur.end());
}
LOG.statistics(time.end());
ConfusionMatrix m = new ConfusionMatrix(labels, confusion);
LOG.statistics(m.toString());
}
use of de.lmu.ifi.dbs.elki.data.ClassLabel in project elki by elki-project.
the class KNNClassifier method classify.
@Override
public ClassLabel classify(O instance) {
Object2IntOpenHashMap<ClassLabel> count = new Object2IntOpenHashMap<>();
KNNList query = knnq.getKNNForObject(instance, k);
for (DoubleDBIDListIter neighbor = query.iter(); neighbor.valid(); neighbor.advance()) {
count.addTo(labelrep.get(neighbor), 1);
}
int bestoccur = Integer.MIN_VALUE;
ClassLabel bestl = null;
for (ObjectIterator<Entry<ClassLabel>> iter = count.object2IntEntrySet().fastIterator(); iter.hasNext(); ) {
Entry<ClassLabel> entry = iter.next();
if (entry.getIntValue() > bestoccur) {
bestoccur = entry.getIntValue();
bestl = entry.getKey();
}
}
return bestl;
}
use of de.lmu.ifi.dbs.elki.data.ClassLabel in project elki by elki-project.
the class GeneratorMain method generate.
/**
* Main loop to generate data set.
*
* @return Generated data set
*/
public MultipleObjectsBundle generate() {
// we actually need some clusters.
if (generators.isEmpty()) {
throw new AbortException("No clusters specified.");
}
// Assert that cluster dimensions agree.
final int dim = generators.get(0).getDim();
for (GeneratorInterface c : generators) {
if (c.getDim() != dim) {
throw new AbortException("Cluster dimensions do not agree.");
}
}
// Prepare result bundle
MultipleObjectsBundle bundle = new MultipleObjectsBundle();
VectorFieldTypeInformation<DoubleVector> type = new VectorFieldTypeInformation<>(DoubleVector.FACTORY, dim);
bundle.appendColumn(type, new ArrayList<>());
bundle.appendColumn(TypeUtil.CLASSLABEL, new ArrayList<>());
bundle.appendColumn(Model.TYPE, new ArrayList<Model>());
// generate clusters
ClassLabel[] labels = new ClassLabel[generators.size()];
Model[] models = new Model[generators.size()];
initLabelsAndModels(generators, labels, models, relabelClusters);
final AssignPoint assignment;
if (!testAgainstModel) {
assignment = new AssignPoint();
} else if (relabelClusters == null) {
assignment = new TestModel();
} else if (!relabelDistance) {
assignment = new AssignLabelsByDensity(labels);
} else {
assignment = new AssignLabelsByDistance(labels);
}
for (int i = 0; i < labels.length; i++) {
final GeneratorInterface curclus = generators.get(i);
assignment.newCluster(i, curclus);
// Only dynamic generators allow rejection / model testing:
GeneratorInterfaceDynamic cursclus = (curclus instanceof GeneratorInterfaceDynamic) ? (GeneratorInterfaceDynamic) curclus : null;
int kept = 0;
while (kept < curclus.getSize()) {
// generate the "missing" number of points
List<double[]> newp = curclus.generate(curclus.getSize() - kept);
for (double[] p : newp) {
int bestc = assignment.getAssignment(i, p);
if (bestc < 0) {
cursclus.incrementDiscarded();
continue;
}
bundle.appendSimple(DoubleVector.wrap(p), labels[bestc], models[bestc]);
++kept;
}
}
}
return bundle;
}
Aggregations