Search in sources :

Example 11 with Object2IntOpenHashMap

use of it.unimi.dsi.fastutil.objects.Object2IntOpenHashMap in project elki by elki-project.

the class PriorProbabilityClassifier method buildClassifier.

/**
 * Learns the prior probability for all classes.
 */
@Override
public void buildClassifier(Database database, Relation<? extends ClassLabel> labelrep) {
    Object2IntOpenHashMap<ClassLabel> count = new Object2IntOpenHashMap<>();
    for (DBIDIter iter = labelrep.iterDBIDs(); iter.valid(); iter.advance()) {
        count.addTo(labelrep.get(iter), 1);
    }
    int max = Integer.MIN_VALUE;
    double size = labelrep.size();
    distribution = new double[count.size()];
    labels = new ArrayList<>(count.size());
    ObjectIterator<Entry<ClassLabel>> iter = count.object2IntEntrySet().fastIterator();
    for (int i = 0; iter.hasNext(); ++i) {
        Entry<ClassLabel> entry = iter.next();
        distribution[i] = entry.getIntValue() / size;
        labels.add(entry.getKey());
        if (entry.getIntValue() > max) {
            max = entry.getIntValue();
            prediction = entry.getKey();
        }
    }
}
Also used : Entry(it.unimi.dsi.fastutil.objects.Object2IntMap.Entry) ClassLabel(de.lmu.ifi.dbs.elki.data.ClassLabel) Object2IntOpenHashMap(it.unimi.dsi.fastutil.objects.Object2IntOpenHashMap) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 12 with Object2IntOpenHashMap

use of it.unimi.dsi.fastutil.objects.Object2IntOpenHashMap in project elki by elki-project.

the class LabelJoinDatabaseConnection method loadData.

@Override
public MultipleObjectsBundle loadData() {
    List<MultipleObjectsBundle> bundles = new ArrayList<>(sources.size());
    for (DatabaseConnection dbc : sources) {
        bundles.add(dbc.loadData());
    }
    MultipleObjectsBundle first = bundles.get(0);
    Object2IntOpenHashMap<String> labelmap = new Object2IntOpenHashMap<>(first.dataLength());
    labelmap.defaultReturnValue(-1);
    // Process first bundle
    {
        // Identify a label column
        final int lblcol = FilterUtil.findLabelColumn(first);
        if (lblcol == -1) {
            throw new AbortException("No label column found in first source, cannot join (do you want to use " + ExternalIDJoinDatabaseConnection.class.getSimpleName() + " instead?)");
        }
        for (int i = 0; i < first.dataLength(); i++) {
            Object data = first.data(i, lblcol);
            if (data == null) {
                LOG.warning("Object without label encountered.");
                continue;
            }
            if (data instanceof String) {
                int old = labelmap.put((String) data, i);
                if (old != -1) {
                    LOG.warning("Duplicate label encountered: " + data + " in rows " + old + " and " + i);
                }
            } else if (data instanceof LabelList) {
                final LabelList ll = (LabelList) data;
                for (int j = 0; j < ll.size(); j++) {
                    String lbl = ll.get(j);
                    int old = labelmap.put(lbl, i);
                    if (old != -1) {
                        LOG.warning("Duplicate label encountered: " + lbl + " in rows " + old + " and " + i);
                    }
                }
            } else {
                String lbl = data.toString();
                int old = labelmap.put(lbl, i);
                if (old != -1) {
                    LOG.warning("Duplicate label encountered: " + lbl + " in rows " + old + " and " + i);
                }
            }
        }
    }
    // Process additional columns
    for (int c = 1; c < sources.size(); c++) {
        MultipleObjectsBundle cur = bundles.get(c);
        final int lblcol = FilterUtil.findLabelColumn(cur);
        if (lblcol == -1) {
            throw new AbortException("No label column found in source " + (c + 1) + ", cannot join (do you want to use " + ExternalIDJoinDatabaseConnection.class.getSimpleName() + " instead?)");
        }
        // Destination columns
        List<ArrayList<Object>> dcol = new ArrayList<>(cur.metaLength());
        for (int i = 0; i < cur.metaLength(); i++) {
            // Skip the label columns
            if (i == lblcol) {
                dcol.add(null);
                continue;
            }
            ArrayList<Object> newcol = new ArrayList<>(first.dataLength());
            // Pre-fill with nulls.
            for (int j = 0; j < first.dataLength(); j++) {
                newcol.add(null);
            }
            first.appendColumn(cur.meta(i), newcol);
            dcol.add(newcol);
        }
        for (int i = 0; i < cur.dataLength(); i++) {
            Object data = cur.data(i, lblcol);
            if (data == null) {
                LOG.warning("Object without label encountered.");
                continue;
            }
            int row = -1;
            if (data instanceof String) {
                row = labelmap.getInt(data);
            } else if (data instanceof LabelList) {
                final LabelList ll = (LabelList) data;
                for (int j = 0; j < ll.size(); j++) {
                    row = labelmap.getInt(ll.get(j));
                    if (row >= 0) {
                        break;
                    }
                }
            } else {
                row = labelmap.getInt(data.toString());
            }
            if (row < 0) {
                LOG.warning("Label not found for join: " + data + " in row " + i);
                continue;
            }
            for (int d = 0; d < cur.metaLength(); d++) {
                if (d == lblcol) {
                    continue;
                }
                List<Object> col = dcol.get(d);
                assert (col != null);
                col.set(row, cur.data(i, d));
            }
        }
    }
    for (int i = 0; i < first.dataLength(); i++) {
        for (int d = 0; d < first.metaLength(); d++) {
            if (first.data(i, d) == null) {
                StringBuilder buf = new StringBuilder();
                for (int d2 = 0; d2 < first.metaLength(); d2++) {
                    if (buf.length() > 0) {
                        buf.append(", ");
                    }
                    if (first.data(i, d2) == null) {
                        buf.append("null");
                    } else {
                        buf.append(first.data(i, d2));
                    }
                }
                LOG.warning("null value in joined data, row " + i + " column " + d + FormatUtil.NEWLINE + "[" + buf.toString() + "]");
                break;
            }
        }
    }
    return first;
}
Also used : LabelList(de.lmu.ifi.dbs.elki.data.LabelList) MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) ArrayList(java.util.ArrayList) Object2IntOpenHashMap(it.unimi.dsi.fastutil.objects.Object2IntOpenHashMap) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Example 13 with Object2IntOpenHashMap

use of it.unimi.dsi.fastutil.objects.Object2IntOpenHashMap in project druid by druid-io.

the class DruidCoordinator method computeNumsUnavailableUsedSegmentsPerDataSource.

public Object2IntMap<String> computeNumsUnavailableUsedSegmentsPerDataSource() {
    if (segmentReplicantLookup == null) {
        return Object2IntMaps.emptyMap();
    }
    final Object2IntOpenHashMap<String> numsUnavailableUsedSegmentsPerDataSource = new Object2IntOpenHashMap<>();
    final Iterable<DataSegment> dataSegments = segmentsMetadataManager.iterateAllUsedSegments();
    for (DataSegment segment : dataSegments) {
        if (segmentReplicantLookup.getLoadedReplicants(segment.getId()) == 0) {
            numsUnavailableUsedSegmentsPerDataSource.addTo(segment.getDataSource(), 1);
        } else {
            numsUnavailableUsedSegmentsPerDataSource.addTo(segment.getDataSource(), 0);
        }
    }
    return numsUnavailableUsedSegmentsPerDataSource;
}
Also used : Object2IntOpenHashMap(it.unimi.dsi.fastutil.objects.Object2IntOpenHashMap) DataSegment(org.apache.druid.timeline.DataSegment)

Aggregations

Object2IntOpenHashMap (it.unimi.dsi.fastutil.objects.Object2IntOpenHashMap)13 AbortException (de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)3 ClassLabel (de.lmu.ifi.dbs.elki.data.ClassLabel)2 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)2 MultipleObjectsBundle (de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle)2 Double2IntOpenHashMap (it.unimi.dsi.fastutil.doubles.Double2IntOpenHashMap)2 Float2IntOpenHashMap (it.unimi.dsi.fastutil.floats.Float2IntOpenHashMap)2 Int2IntOpenHashMap (it.unimi.dsi.fastutil.ints.Int2IntOpenHashMap)2 Long2IntOpenHashMap (it.unimi.dsi.fastutil.longs.Long2IntOpenHashMap)2 Entry (it.unimi.dsi.fastutil.objects.Object2IntMap.Entry)2 ArrayList (java.util.ArrayList)2 Array2DRowRealMatrix (org.apache.commons.math3.linear.Array2DRowRealMatrix)2 RealMatrix (org.apache.commons.math3.linear.RealMatrix)2 JsonWriter (com.google.gson.stream.JsonWriter)1 FixedByteSingleValueMultiColWriter (com.linkedin.pinot.core.io.writer.impl.FixedByteSingleValueMultiColWriter)1 ExternalID (de.lmu.ifi.dbs.elki.data.ExternalID)1 LabelList (de.lmu.ifi.dbs.elki.data.LabelList)1 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)1 DoubleDBIDListIter (de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter)1 KNNList (de.lmu.ifi.dbs.elki.database.ids.KNNList)1