Search in sources :

Example 1 with ClassLabel

use of de.lmu.ifi.dbs.elki.data.ClassLabel in project elki by elki-project.

the class ArffParser method loadDenseInstance.

private Object[] loadDenseInstance(StreamTokenizer tokenizer, int[] dimsize, TypeInformation[] etyp, int outdim) throws IOException {
    Object[] data = new Object[outdim];
    for (int out = 0; out < outdim; out++) {
        if (TypeUtil.NUMBER_VECTOR_FIELD.equals(etyp[out])) {
            // For multi-column vectors, read successive columns
            double[] cur = new double[dimsize[out]];
            for (int k = 0; k < dimsize[out]; k++) {
                if (tokenizer.ttype == '?') {
                    cur[k] = Double.NaN;
                } else if (tokenizer.ttype == StreamTokenizer.TT_WORD) {
                    try {
                        cur[k] = ParseUtil.parseDouble(tokenizer.sval);
                    } catch (NumberFormatException e) {
                        throw new AbortException("Expected number value, got: " + tokenizer.sval);
                    }
                } else {
                    throw new AbortException("Expected word token, got: " + tokenizer.toString());
                }
                nextToken(tokenizer);
            }
            data[out] = denseFactory.newNumberVector(cur);
        } else if (TypeUtil.LABELLIST.equals(etyp[out])) {
            // Build a label list out of successive labels
            labels.clear();
            for (int k = 0; k < dimsize[out]; k++) {
                if (tokenizer.ttype != StreamTokenizer.TT_WORD) {
                    throw new AbortException("Expected word token, got: " + tokenizer.toString());
                }
                labels.add(tokenizer.sval);
                nextToken(tokenizer);
            }
            data[out] = LabelList.make(labels);
        } else if (TypeUtil.EXTERNALID.equals(etyp[out])) {
            if (tokenizer.ttype != StreamTokenizer.TT_WORD) {
                throw new AbortException("Expected word token, got: " + tokenizer.toString());
            }
            data[out] = new ExternalID(tokenizer.sval);
            nextToken(tokenizer);
        } else if (TypeUtil.CLASSLABEL.equals(etyp[out])) {
            if (tokenizer.ttype != StreamTokenizer.TT_WORD) {
                throw new AbortException("Expected word token, got: " + tokenizer.toString());
            }
            // TODO: support other class label types.
            ClassLabel lbl = new SimpleClassLabel(tokenizer.sval);
            data[out] = lbl;
            nextToken(tokenizer);
        } else {
            throw new AbortException("Unsupported type for column " + "->" + out + ": " + ((etyp[out] != null) ? etyp[out].toString() : "null"));
        }
    }
    return data;
}
Also used : SimpleClassLabel(de.lmu.ifi.dbs.elki.data.SimpleClassLabel) ClassLabel(de.lmu.ifi.dbs.elki.data.ClassLabel) ExternalID(de.lmu.ifi.dbs.elki.data.ExternalID) SimpleClassLabel(de.lmu.ifi.dbs.elki.data.SimpleClassLabel) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Example 2 with ClassLabel

use of de.lmu.ifi.dbs.elki.data.ClassLabel in project elki by elki-project.

the class ArffParser method loadSparseInstance.

private Object[] loadSparseInstance(StreamTokenizer tokenizer, int[] targ, int[] dimsize, TypeInformation[] elkitypes, int metaLength) throws IOException {
    Int2ObjectOpenHashMap<Object> map = new Int2ObjectOpenHashMap<>();
    while (true) {
        nextToken(tokenizer);
        assert (tokenizer.ttype != StreamTokenizer.TT_EOF && tokenizer.ttype != StreamTokenizer.TT_EOL);
        if (tokenizer.ttype == '}') {
            nextToken(tokenizer);
            assert (tokenizer.ttype == StreamTokenizer.TT_EOF || tokenizer.ttype == StreamTokenizer.TT_EOL);
            break;
        } else {
            // sparse token
            if (tokenizer.ttype != StreamTokenizer.TT_WORD) {
                throw new AbortException("Unexpected token type encountered: " + tokenizer.toString() + " type: " + tokenizer.ttype);
            }
            int dim = ParseUtil.parseIntBase10(tokenizer.sval);
            if (map.containsKey(dim)) {
                throw new AbortException("Duplicate key in sparse vector: " + tokenizer.toString());
            }
            nextToken(tokenizer);
            if (tokenizer.ttype == StreamTokenizer.TT_WORD) {
                map.put(dim, // 
                TypeUtil.NUMBER_VECTOR_FIELD.equals(elkitypes[targ[dim]]) ? (Double) ParseUtil.parseDouble(tokenizer.sval) : tokenizer.sval);
            } else {
                throw new AbortException("Unexpected token type encountered: " + tokenizer.toString());
            }
        }
    }
    Object[] data = new Object[metaLength];
    for (int out = 0; out < metaLength; out++) {
        // Find the first index
        int s = -1;
        for (int i = 0; i < targ.length; i++) {
            if (targ[i] == out && s < 0) {
                s = i;
                break;
            }
        }
        assert (s >= 0);
        if (TypeUtil.NUMBER_VECTOR_FIELD.equals(elkitypes[out])) {
            Int2DoubleOpenHashMap f = new Int2DoubleOpenHashMap(dimsize[out]);
            for (ObjectIterator<Int2ObjectMap.Entry<Object>> iter = map.int2ObjectEntrySet().fastIterator(); iter.hasNext(); ) {
                Int2ObjectMap.Entry<Object> entry = iter.next();
                int i = entry.getIntKey();
                if (i < s || i >= s + dimsize[out]) {
                    continue;
                }
                double v = ((Double) entry.getValue()).doubleValue();
                f.put(i - s, v);
            }
            data[out] = new SparseDoubleVector(f, dimsize[out]);
        } else if (TypeUtil.LABELLIST.equals(elkitypes[out])) {
            // Build a label list out of successive labels
            labels.clear();
            for (ObjectIterator<Int2ObjectMap.Entry<Object>> iter = map.int2ObjectEntrySet().fastIterator(); iter.hasNext(); ) {
                Int2ObjectMap.Entry<Object> entry = iter.next();
                int i = entry.getIntKey();
                if (i < s) {
                    continue;
                }
                if (i >= s + dimsize[out]) {
                    break;
                }
                if (labels.size() < i - s) {
                    LOG.warning("Sparse consecutive labels are currently not correctly supported.");
                }
                labels.add((String) entry.getValue());
            }
            data[out] = LabelList.make(labels);
        } else if (TypeUtil.EXTERNALID.equals(elkitypes[out])) {
            String val = (String) map.get(s);
            if (val == null) {
                throw new AbortException("External ID column not set in sparse instance." + tokenizer.toString());
            }
            data[out] = new ExternalID(val);
        } else if (TypeUtil.CLASSLABEL.equals(elkitypes[out])) {
            Object val = map.get(s);
            if (val == null) {
                throw new AbortException("Class label column not set in sparse instance." + tokenizer.toString());
            }
            // TODO: support other class label types.
            ClassLabel lbl = new SimpleClassLabel(String.valueOf(val));
            data[out] = lbl;
        } else {
            throw new AbortException("Unsupported type for column " + "->" + out + ": " + ((elkitypes[out] != null) ? elkitypes[out].toString() : "null"));
        }
    }
    return data;
}
Also used : Int2ObjectOpenHashMap(it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap) ExternalID(de.lmu.ifi.dbs.elki.data.ExternalID) Int2ObjectMap(it.unimi.dsi.fastutil.ints.Int2ObjectMap) SimpleClassLabel(de.lmu.ifi.dbs.elki.data.SimpleClassLabel) SparseDoubleVector(de.lmu.ifi.dbs.elki.data.SparseDoubleVector) ObjectIterator(it.unimi.dsi.fastutil.objects.ObjectIterator) SimpleClassLabel(de.lmu.ifi.dbs.elki.data.SimpleClassLabel) ClassLabel(de.lmu.ifi.dbs.elki.data.ClassLabel) Int2DoubleOpenHashMap(it.unimi.dsi.fastutil.ints.Int2DoubleOpenHashMap) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Example 3 with ClassLabel

use of de.lmu.ifi.dbs.elki.data.ClassLabel in project elki by elki-project.

the class ClassifierHoldoutEvaluationTask method run.

@Override
public void run() {
    Duration ptime = LOG.newDuration("evaluation.time.load").begin();
    MultipleObjectsBundle allData = databaseConnection.loadData();
    holdout.initialize(allData);
    LOG.statistics(ptime.end());
    Duration time = LOG.newDuration("evaluation.time.total").begin();
    ArrayList<ClassLabel> labels = holdout.getLabels();
    int[][] confusion = new int[labels.size()][labels.size()];
    for (int p = 0; p < holdout.numberOfPartitions(); p++) {
        TrainingAndTestSet partition = holdout.nextPartitioning();
        // Load the data set into a database structure (for indexing)
        Duration dur = LOG.newDuration(this.getClass().getName() + ".fold-" + (p + 1) + ".init.time").begin();
        Database db = new StaticArrayDatabase(new MultipleObjectsBundleDatabaseConnection(partition.getTraining()), indexFactories);
        db.initialize();
        LOG.statistics(dur.end());
        // Train the classifier
        dur = LOG.newDuration(this.getClass().getName() + ".fold-" + (p + 1) + ".train.time").begin();
        Relation<ClassLabel> lrel = db.getRelation(TypeUtil.CLASSLABEL);
        algorithm.buildClassifier(db, lrel);
        LOG.statistics(dur.end());
        // Evaluate the test set
        dur = LOG.newDuration(this.getClass().getName() + ".fold-" + (p + 1) + ".evaluation.time").begin();
        // FIXME: this part is still a big hack, unfortunately!
        MultipleObjectsBundle test = partition.getTest();
        int lcol = AbstractHoldout.findClassLabelColumn(test);
        int tcol = (lcol == 0) ? 1 : 0;
        for (int i = 0, l = test.dataLength(); i < l; ++i) {
            @SuppressWarnings("unchecked") O obj = (O) test.data(i, tcol);
            ClassLabel truelbl = (ClassLabel) test.data(i, lcol);
            ClassLabel predlbl = algorithm.classify(obj);
            int pred = Collections.binarySearch(labels, predlbl);
            int real = Collections.binarySearch(labels, truelbl);
            confusion[pred][real]++;
        }
        LOG.statistics(dur.end());
    }
    LOG.statistics(time.end());
    ConfusionMatrix m = new ConfusionMatrix(labels, confusion);
    LOG.statistics(m.toString());
}
Also used : ConfusionMatrix(de.lmu.ifi.dbs.elki.evaluation.classification.ConfusionMatrix) MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) Duration(de.lmu.ifi.dbs.elki.logging.statistics.Duration) TrainingAndTestSet(de.lmu.ifi.dbs.elki.evaluation.classification.holdout.TrainingAndTestSet) ClassLabel(de.lmu.ifi.dbs.elki.data.ClassLabel) AbstractDatabase(de.lmu.ifi.dbs.elki.database.AbstractDatabase) StaticArrayDatabase(de.lmu.ifi.dbs.elki.database.StaticArrayDatabase) Database(de.lmu.ifi.dbs.elki.database.Database) MultipleObjectsBundleDatabaseConnection(de.lmu.ifi.dbs.elki.datasource.MultipleObjectsBundleDatabaseConnection) StaticArrayDatabase(de.lmu.ifi.dbs.elki.database.StaticArrayDatabase)

Example 4 with ClassLabel

use of de.lmu.ifi.dbs.elki.data.ClassLabel in project elki by elki-project.

the class KNNClassifier method classify.

@Override
public ClassLabel classify(O instance) {
    Object2IntOpenHashMap<ClassLabel> count = new Object2IntOpenHashMap<>();
    KNNList query = knnq.getKNNForObject(instance, k);
    for (DoubleDBIDListIter neighbor = query.iter(); neighbor.valid(); neighbor.advance()) {
        count.addTo(labelrep.get(neighbor), 1);
    }
    int bestoccur = Integer.MIN_VALUE;
    ClassLabel bestl = null;
    for (ObjectIterator<Entry<ClassLabel>> iter = count.object2IntEntrySet().fastIterator(); iter.hasNext(); ) {
        Entry<ClassLabel> entry = iter.next();
        if (entry.getIntValue() > bestoccur) {
            bestoccur = entry.getIntValue();
            bestl = entry.getKey();
        }
    }
    return bestl;
}
Also used : DoubleDBIDListIter(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter) Entry(it.unimi.dsi.fastutil.objects.Object2IntMap.Entry) ClassLabel(de.lmu.ifi.dbs.elki.data.ClassLabel) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) Object2IntOpenHashMap(it.unimi.dsi.fastutil.objects.Object2IntOpenHashMap)

Example 5 with ClassLabel

use of de.lmu.ifi.dbs.elki.data.ClassLabel in project elki by elki-project.

the class GeneratorMain method generate.

/**
 * Main loop to generate data set.
 *
 * @return Generated data set
 */
public MultipleObjectsBundle generate() {
    // we actually need some clusters.
    if (generators.isEmpty()) {
        throw new AbortException("No clusters specified.");
    }
    // Assert that cluster dimensions agree.
    final int dim = generators.get(0).getDim();
    for (GeneratorInterface c : generators) {
        if (c.getDim() != dim) {
            throw new AbortException("Cluster dimensions do not agree.");
        }
    }
    // Prepare result bundle
    MultipleObjectsBundle bundle = new MultipleObjectsBundle();
    VectorFieldTypeInformation<DoubleVector> type = new VectorFieldTypeInformation<>(DoubleVector.FACTORY, dim);
    bundle.appendColumn(type, new ArrayList<>());
    bundle.appendColumn(TypeUtil.CLASSLABEL, new ArrayList<>());
    bundle.appendColumn(Model.TYPE, new ArrayList<Model>());
    // generate clusters
    ClassLabel[] labels = new ClassLabel[generators.size()];
    Model[] models = new Model[generators.size()];
    initLabelsAndModels(generators, labels, models, relabelClusters);
    final AssignPoint assignment;
    if (!testAgainstModel) {
        assignment = new AssignPoint();
    } else if (relabelClusters == null) {
        assignment = new TestModel();
    } else if (!relabelDistance) {
        assignment = new AssignLabelsByDensity(labels);
    } else {
        assignment = new AssignLabelsByDistance(labels);
    }
    for (int i = 0; i < labels.length; i++) {
        final GeneratorInterface curclus = generators.get(i);
        assignment.newCluster(i, curclus);
        // Only dynamic generators allow rejection / model testing:
        GeneratorInterfaceDynamic cursclus = (curclus instanceof GeneratorInterfaceDynamic) ? (GeneratorInterfaceDynamic) curclus : null;
        int kept = 0;
        while (kept < curclus.getSize()) {
            // generate the "missing" number of points
            List<double[]> newp = curclus.generate(curclus.getSize() - kept);
            for (double[] p : newp) {
                int bestc = assignment.getAssignment(i, p);
                if (bestc < 0) {
                    cursclus.incrementDiscarded();
                    continue;
                }
                bundle.appendSimple(DoubleVector.wrap(p), labels[bestc], models[bestc]);
                ++kept;
            }
        }
    }
    return bundle;
}
Also used : MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) SimpleClassLabel(de.lmu.ifi.dbs.elki.data.SimpleClassLabel) ClassLabel(de.lmu.ifi.dbs.elki.data.ClassLabel) VectorFieldTypeInformation(de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation) Model(de.lmu.ifi.dbs.elki.data.model.Model) DoubleVector(de.lmu.ifi.dbs.elki.data.DoubleVector) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Aggregations

ClassLabel (de.lmu.ifi.dbs.elki.data.ClassLabel)12 AbortException (de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)5 SimpleClassLabel (de.lmu.ifi.dbs.elki.data.SimpleClassLabel)4 MultipleObjectsBundle (de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle)4 ExternalID (de.lmu.ifi.dbs.elki.data.ExternalID)2 Model (de.lmu.ifi.dbs.elki.data.model.Model)2 VectorFieldTypeInformation (de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation)2 IntArrayList (it.unimi.dsi.fastutil.ints.IntArrayList)2 IntList (it.unimi.dsi.fastutil.ints.IntList)2 Entry (it.unimi.dsi.fastutil.objects.Object2IntMap.Entry)2 Object2IntOpenHashMap (it.unimi.dsi.fastutil.objects.Object2IntOpenHashMap)2 ArrayList (java.util.ArrayList)2 Cluster (de.lmu.ifi.dbs.elki.data.Cluster)1 Clustering (de.lmu.ifi.dbs.elki.data.Clustering)1 DoubleVector (de.lmu.ifi.dbs.elki.data.DoubleVector)1 LabelList (de.lmu.ifi.dbs.elki.data.LabelList)1 NumberVector (de.lmu.ifi.dbs.elki.data.NumberVector)1 SparseDoubleVector (de.lmu.ifi.dbs.elki.data.SparseDoubleVector)1 ClusterModel (de.lmu.ifi.dbs.elki.data.model.ClusterModel)1 NoSupportedDataTypeException (de.lmu.ifi.dbs.elki.data.type.NoSupportedDataTypeException)1