Search in sources :

Example 1 with SimpleClassLabel

use of de.lmu.ifi.dbs.elki.data.SimpleClassLabel in project elki by elki-project.

the class ArffParser method loadDenseInstance.

private Object[] loadDenseInstance(StreamTokenizer tokenizer, int[] dimsize, TypeInformation[] etyp, int outdim) throws IOException {
    Object[] data = new Object[outdim];
    for (int out = 0; out < outdim; out++) {
        if (TypeUtil.NUMBER_VECTOR_FIELD.equals(etyp[out])) {
            // For multi-column vectors, read successive columns
            double[] cur = new double[dimsize[out]];
            for (int k = 0; k < dimsize[out]; k++) {
                if (tokenizer.ttype == '?') {
                    cur[k] = Double.NaN;
                } else if (tokenizer.ttype == StreamTokenizer.TT_WORD) {
                    try {
                        cur[k] = ParseUtil.parseDouble(tokenizer.sval);
                    } catch (NumberFormatException e) {
                        throw new AbortException("Expected number value, got: " + tokenizer.sval);
                    }
                } else {
                    throw new AbortException("Expected word token, got: " + tokenizer.toString());
                }
                nextToken(tokenizer);
            }
            data[out] = denseFactory.newNumberVector(cur);
        } else if (TypeUtil.LABELLIST.equals(etyp[out])) {
            // Build a label list out of successive labels
            labels.clear();
            for (int k = 0; k < dimsize[out]; k++) {
                if (tokenizer.ttype != StreamTokenizer.TT_WORD) {
                    throw new AbortException("Expected word token, got: " + tokenizer.toString());
                }
                labels.add(tokenizer.sval);
                nextToken(tokenizer);
            }
            data[out] = LabelList.make(labels);
        } else if (TypeUtil.EXTERNALID.equals(etyp[out])) {
            if (tokenizer.ttype != StreamTokenizer.TT_WORD) {
                throw new AbortException("Expected word token, got: " + tokenizer.toString());
            }
            data[out] = new ExternalID(tokenizer.sval);
            nextToken(tokenizer);
        } else if (TypeUtil.CLASSLABEL.equals(etyp[out])) {
            if (tokenizer.ttype != StreamTokenizer.TT_WORD) {
                throw new AbortException("Expected word token, got: " + tokenizer.toString());
            }
            // TODO: support other class label types.
            ClassLabel lbl = new SimpleClassLabel(tokenizer.sval);
            data[out] = lbl;
            nextToken(tokenizer);
        } else {
            throw new AbortException("Unsupported type for column " + "->" + out + ": " + ((etyp[out] != null) ? etyp[out].toString() : "null"));
        }
    }
    return data;
}
Also used : SimpleClassLabel(de.lmu.ifi.dbs.elki.data.SimpleClassLabel) ClassLabel(de.lmu.ifi.dbs.elki.data.ClassLabel) ExternalID(de.lmu.ifi.dbs.elki.data.ExternalID) SimpleClassLabel(de.lmu.ifi.dbs.elki.data.SimpleClassLabel) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Example 2 with SimpleClassLabel

use of de.lmu.ifi.dbs.elki.data.SimpleClassLabel in project elki by elki-project.

the class ArffParser method loadSparseInstance.

private Object[] loadSparseInstance(StreamTokenizer tokenizer, int[] targ, int[] dimsize, TypeInformation[] elkitypes, int metaLength) throws IOException {
    Int2ObjectOpenHashMap<Object> map = new Int2ObjectOpenHashMap<>();
    while (true) {
        nextToken(tokenizer);
        assert (tokenizer.ttype != StreamTokenizer.TT_EOF && tokenizer.ttype != StreamTokenizer.TT_EOL);
        if (tokenizer.ttype == '}') {
            nextToken(tokenizer);
            assert (tokenizer.ttype == StreamTokenizer.TT_EOF || tokenizer.ttype == StreamTokenizer.TT_EOL);
            break;
        } else {
            // sparse token
            if (tokenizer.ttype != StreamTokenizer.TT_WORD) {
                throw new AbortException("Unexpected token type encountered: " + tokenizer.toString() + " type: " + tokenizer.ttype);
            }
            int dim = ParseUtil.parseIntBase10(tokenizer.sval);
            if (map.containsKey(dim)) {
                throw new AbortException("Duplicate key in sparse vector: " + tokenizer.toString());
            }
            nextToken(tokenizer);
            if (tokenizer.ttype == StreamTokenizer.TT_WORD) {
                map.put(dim, // 
                TypeUtil.NUMBER_VECTOR_FIELD.equals(elkitypes[targ[dim]]) ? (Double) ParseUtil.parseDouble(tokenizer.sval) : tokenizer.sval);
            } else {
                throw new AbortException("Unexpected token type encountered: " + tokenizer.toString());
            }
        }
    }
    Object[] data = new Object[metaLength];
    for (int out = 0; out < metaLength; out++) {
        // Find the first index
        int s = -1;
        for (int i = 0; i < targ.length; i++) {
            if (targ[i] == out && s < 0) {
                s = i;
                break;
            }
        }
        assert (s >= 0);
        if (TypeUtil.NUMBER_VECTOR_FIELD.equals(elkitypes[out])) {
            Int2DoubleOpenHashMap f = new Int2DoubleOpenHashMap(dimsize[out]);
            for (ObjectIterator<Int2ObjectMap.Entry<Object>> iter = map.int2ObjectEntrySet().fastIterator(); iter.hasNext(); ) {
                Int2ObjectMap.Entry<Object> entry = iter.next();
                int i = entry.getIntKey();
                if (i < s || i >= s + dimsize[out]) {
                    continue;
                }
                double v = ((Double) entry.getValue()).doubleValue();
                f.put(i - s, v);
            }
            data[out] = new SparseDoubleVector(f, dimsize[out]);
        } else if (TypeUtil.LABELLIST.equals(elkitypes[out])) {
            // Build a label list out of successive labels
            labels.clear();
            for (ObjectIterator<Int2ObjectMap.Entry<Object>> iter = map.int2ObjectEntrySet().fastIterator(); iter.hasNext(); ) {
                Int2ObjectMap.Entry<Object> entry = iter.next();
                int i = entry.getIntKey();
                if (i < s) {
                    continue;
                }
                if (i >= s + dimsize[out]) {
                    break;
                }
                if (labels.size() < i - s) {
                    LOG.warning("Sparse consecutive labels are currently not correctly supported.");
                }
                labels.add((String) entry.getValue());
            }
            data[out] = LabelList.make(labels);
        } else if (TypeUtil.EXTERNALID.equals(elkitypes[out])) {
            String val = (String) map.get(s);
            if (val == null) {
                throw new AbortException("External ID column not set in sparse instance." + tokenizer.toString());
            }
            data[out] = new ExternalID(val);
        } else if (TypeUtil.CLASSLABEL.equals(elkitypes[out])) {
            Object val = map.get(s);
            if (val == null) {
                throw new AbortException("Class label column not set in sparse instance." + tokenizer.toString());
            }
            // TODO: support other class label types.
            ClassLabel lbl = new SimpleClassLabel(String.valueOf(val));
            data[out] = lbl;
        } else {
            throw new AbortException("Unsupported type for column " + "->" + out + ": " + ((elkitypes[out] != null) ? elkitypes[out].toString() : "null"));
        }
    }
    return data;
}
Also used : Int2ObjectOpenHashMap(it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap) ExternalID(de.lmu.ifi.dbs.elki.data.ExternalID) Int2ObjectMap(it.unimi.dsi.fastutil.ints.Int2ObjectMap) SimpleClassLabel(de.lmu.ifi.dbs.elki.data.SimpleClassLabel) SparseDoubleVector(de.lmu.ifi.dbs.elki.data.SparseDoubleVector) ObjectIterator(it.unimi.dsi.fastutil.objects.ObjectIterator) SimpleClassLabel(de.lmu.ifi.dbs.elki.data.SimpleClassLabel) ClassLabel(de.lmu.ifi.dbs.elki.data.ClassLabel) Int2DoubleOpenHashMap(it.unimi.dsi.fastutil.ints.Int2DoubleOpenHashMap) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Example 3 with SimpleClassLabel

use of de.lmu.ifi.dbs.elki.data.SimpleClassLabel in project elki by elki-project.

the class GeneratorMain method initLabelsAndModels.

/**
 * Initialize cluster labels and models.
 *
 * Clusters that are set to "reassign" will have their labels set to null, or
 * if there is only one possible reassignment, to this target label.
 *
 * @param generators Cluster generators
 * @param labels Labels (output)
 * @param models Models (output)
 * @param reassign Pattern for clusters to reassign.
 */
private void initLabelsAndModels(ArrayList<GeneratorInterface> generators, ClassLabel[] labels, Model[] models, Pattern reassign) {
    int existingclusters = 0;
    if (reassign != null) {
        for (int i = 0; i < labels.length; i++) {
            final GeneratorInterface curclus = generators.get(i);
            if (!reassign.matcher(curclus.getName()).find()) {
                labels[i] = new SimpleClassLabel(curclus.getName());
                models[i] = curclus.makeModel();
                ++existingclusters;
            }
        }
        if (existingclusters == 0) {
            LOG.warning("All clusters matched the 'reassign' pattern. Ignoring.");
        }
        if (existingclusters == 1) {
            // No need to test - only one possible answer.
            for (int i = 0; i < labels.length; i++) {
                if (labels[i] != null) {
                    Arrays.fill(labels, labels[i]);
                    Arrays.fill(models, models[i]);
                    break;
                }
            }
        }
        if (existingclusters == labels.length) {
            LOG.warning("No clusters matched the 'reassign' pattern.");
        }
    }
    // Default case, every cluster has a label and model.
    if (existingclusters == 0) {
        for (int i = 0; i < labels.length; i++) {
            final GeneratorInterface curclus = generators.get(i);
            labels[i] = new SimpleClassLabel(curclus.getName());
            models[i] = curclus.makeModel();
        }
    }
}
Also used : SimpleClassLabel(de.lmu.ifi.dbs.elki.data.SimpleClassLabel)

Aggregations

SimpleClassLabel (de.lmu.ifi.dbs.elki.data.SimpleClassLabel)3 ClassLabel (de.lmu.ifi.dbs.elki.data.ClassLabel)2 ExternalID (de.lmu.ifi.dbs.elki.data.ExternalID)2 AbortException (de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)2 SparseDoubleVector (de.lmu.ifi.dbs.elki.data.SparseDoubleVector)1 Int2DoubleOpenHashMap (it.unimi.dsi.fastutil.ints.Int2DoubleOpenHashMap)1 Int2ObjectMap (it.unimi.dsi.fastutil.ints.Int2ObjectMap)1 Int2ObjectOpenHashMap (it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap)1 ObjectIterator (it.unimi.dsi.fastutil.objects.ObjectIterator)1