Search in sources :

Example 1 with Object2IntOpenHashMap

use of it.unimi.dsi.fastutil.objects.Object2IntOpenHashMap in project gatk by broadinstitute.

the class ReadCountCollection method arrangeTargets.

/**
     * Rearrange the targets so that they are in a particular order.
     * @return a new collection.
     * @throws IllegalArgumentException if any of the following is true:
     * <ul>
     *     <li>{@code targetsInOrder} is {@code null},</li>
     *     <li>is empty,</li>
     *     <li>it contains {@code null},</li>
     *     <li>contains any target not present in this collection.</li>
     * </ul>
     */
public ReadCountCollection arrangeTargets(final List<Target> targetsInOrder) {
    Utils.nonNull(targetsInOrder);
    Utils.nonEmpty(targetsInOrder, "the input targets list cannot be empty");
    final RealMatrix counts = new Array2DRowRealMatrix(targetsInOrder.size(), columnNames.size());
    final Object2IntMap<Target> targetToIndex = new Object2IntOpenHashMap<>(targets.size());
    for (int i = 0; i < targets.size(); i++) {
        targetToIndex.put(targets.get(i), i);
    }
    for (int i = 0; i < targetsInOrder.size(); i++) {
        final Target target = targetsInOrder.get(i);
        Utils.validateArg(targetToIndex.containsKey(target), () -> String.format("target '%s' is not present in the collection", target.getName()));
        counts.setRow(i, this.counts.getRow(targetToIndex.getInt(target)));
    }
    return new ReadCountCollection(new ArrayList<>(targetsInOrder), columnNames, counts, false);
}
Also used : Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) RealMatrix(org.apache.commons.math3.linear.RealMatrix) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) Object2IntOpenHashMap(it.unimi.dsi.fastutil.objects.Object2IntOpenHashMap)

Example 2 with Object2IntOpenHashMap

use of it.unimi.dsi.fastutil.objects.Object2IntOpenHashMap in project elki by elki-project.

the class KNNClassifier method classify.

@Override
public ClassLabel classify(O instance) {
    Object2IntOpenHashMap<ClassLabel> count = new Object2IntOpenHashMap<>();
    KNNList query = knnq.getKNNForObject(instance, k);
    for (DoubleDBIDListIter neighbor = query.iter(); neighbor.valid(); neighbor.advance()) {
        count.addTo(labelrep.get(neighbor), 1);
    }
    int bestoccur = Integer.MIN_VALUE;
    ClassLabel bestl = null;
    for (ObjectIterator<Entry<ClassLabel>> iter = count.object2IntEntrySet().fastIterator(); iter.hasNext(); ) {
        Entry<ClassLabel> entry = iter.next();
        if (entry.getIntValue() > bestoccur) {
            bestoccur = entry.getIntValue();
            bestl = entry.getKey();
        }
    }
    return bestl;
}
Also used : DoubleDBIDListIter(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter) Entry(it.unimi.dsi.fastutil.objects.Object2IntMap.Entry) ClassLabel(de.lmu.ifi.dbs.elki.data.ClassLabel) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) Object2IntOpenHashMap(it.unimi.dsi.fastutil.objects.Object2IntOpenHashMap)

Example 3 with Object2IntOpenHashMap

use of it.unimi.dsi.fastutil.objects.Object2IntOpenHashMap in project elki by elki-project.

the class ExternalIDJoinDatabaseConnection method loadData.

@Override
public MultipleObjectsBundle loadData() {
    List<MultipleObjectsBundle> bundles = new ArrayList<>(sources.size());
    for (DatabaseConnection dbc : sources) {
        bundles.add(dbc.loadData());
    }
    MultipleObjectsBundle first = bundles.get(0);
    Object2IntOpenHashMap<ExternalID> labelmap = new Object2IntOpenHashMap<>(first.dataLength());
    labelmap.defaultReturnValue(-1);
    // Process first bundle
    {
        // Identify a label column
        final int lblcol;
        {
            int lblc = -1;
            for (int i = 0; i < first.metaLength(); i++) {
                if (TypeUtil.EXTERNALID.isAssignableFromType(first.meta(i))) {
                    lblc = i;
                    break;
                }
            }
            // make static
            lblcol = lblc;
        }
        if (lblcol == -1) {
            throw new AbortException("No external ID column found in primary source.");
        }
        for (int i = 0; i < first.dataLength(); i++) {
            ExternalID data = (ExternalID) first.data(i, lblcol);
            if (data == null) {
                LOG.debug("Object without ID encountered.");
                continue;
            }
            int old = labelmap.put(data, i);
            if (old != -1) {
                LOG.debug("Duplicate id encountered: " + data + " in rows " + old + " and " + i);
            }
        }
    }
    // Process additional columns
    for (int c = 1; c < sources.size(); c++) {
        MultipleObjectsBundle cur = bundles.get(c);
        final int lblcol;
        {
            int lblc = -1;
            for (int i = 0; i < cur.metaLength(); i++) {
                if (TypeUtil.EXTERNALID.isAssignableFromType(cur.meta(i))) {
                    lblc = i;
                    break;
                }
            }
            // make static
            lblcol = lblc;
        }
        if (lblcol == -1) {
            StringBuilder buf = new StringBuilder();
            for (int i = 0; i < cur.metaLength(); i++) {
                if (buf.length() > 0) {
                    buf.append(',');
                }
                buf.append(cur.meta(i));
            }
            throw new AbortException("No external ID column found in source " + (c + 1) + " to join with. Got: " + buf.toString());
        }
        // Destination columns
        List<ArrayList<Object>> dcol = new ArrayList<>(cur.metaLength());
        for (int i = 0; i < cur.metaLength(); i++) {
            // Skip the label columns
            if (i == lblcol) {
                dcol.add(null);
                continue;
            }
            ArrayList<Object> newcol = new ArrayList<>(first.dataLength());
            // Pre-fill with nulls.
            for (int j = 0; j < first.dataLength(); j++) {
                newcol.add(null);
            }
            first.appendColumn(cur.meta(i), newcol);
            dcol.add(newcol);
        }
        for (int i = 0; i < cur.dataLength(); i++) {
            ExternalID data = (ExternalID) cur.data(i, lblcol);
            if (data == null) {
                LOG.warning("Object without label encountered.");
                continue;
            }
            int row = labelmap.getInt(data);
            if (row == -1) {
                LOG.debug("ID not found for join: " + data + " in row " + i);
                continue;
            }
            for (int d = 0; d < cur.metaLength(); d++) {
                if (d == lblcol) {
                    continue;
                }
                List<Object> col = dcol.get(d);
                assert (col != null);
                col.set(row, cur.data(i, d));
            }
        }
    }
    for (int i = 0; i < first.dataLength(); i++) {
        for (int d = 0; d < first.metaLength(); d++) {
            if (first.data(i, d) == null) {
                StringBuilder buf = new StringBuilder();
                for (int d2 = 0; d2 < first.metaLength(); d2++) {
                    if (buf.length() > 0) {
                        buf.append(", ");
                    }
                    if (first.data(i, d2) == null) {
                        buf.append("null");
                    } else {
                        buf.append(first.data(i, d2));
                    }
                }
                LOG.warning("null value in joined data, row " + i + " column " + d + FormatUtil.NEWLINE + "[" + buf.toString() + "]");
                break;
            }
        }
    }
    return first;
}
Also used : ExternalID(de.lmu.ifi.dbs.elki.data.ExternalID) MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) ArrayList(java.util.ArrayList) Object2IntOpenHashMap(it.unimi.dsi.fastutil.objects.Object2IntOpenHashMap) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Example 4 with Object2IntOpenHashMap

use of it.unimi.dsi.fastutil.objects.Object2IntOpenHashMap in project elki by elki-project.

the class EvaluateRetrievalPerformance method run.

/**
 * Run the algorithm
 *
 * @param database Database to run on (for kNN queries)
 * @param relation Relation for distance computations
 * @param lrelation Relation for class label comparison
 * @return Vectors containing mean and standard deviation.
 */
public RetrievalPerformanceResult run(Database database, Relation<O> relation, Relation<?> lrelation) {
    final DistanceQuery<O> distQuery = database.getDistanceQuery(relation, getDistanceFunction());
    final DBIDs ids = DBIDUtil.randomSample(relation.getDBIDs(), sampling, random);
    // For storing the positive neighbors.
    ModifiableDBIDs posn = DBIDUtil.newHashSet();
    // Distance storage.
    ModifiableDoubleDBIDList nlist = DBIDUtil.newDistanceDBIDList(relation.size());
    // For counting labels seen in kNN
    Object2IntOpenHashMap<Object> counters = new Object2IntOpenHashMap<>();
    // Statistics tracking
    double map = 0., mroc = 0.;
    double[] knnperf = new double[maxk];
    int samples = 0;
    FiniteProgress objloop = LOG.isVerbose() ? new FiniteProgress("Processing query objects", ids.size(), LOG) : null;
    for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
        Object label = lrelation.get(iter);
        findMatches(posn, lrelation, label);
        if (posn.size() > 0) {
            computeDistances(nlist, iter, distQuery, relation);
            if (nlist.size() != relation.size() - (includeSelf ? 0 : 1)) {
                LOG.warning("Neighbor list does not have the desired size: " + nlist.size());
            }
            map += AveragePrecisionEvaluation.STATIC.evaluate(posn, nlist);
            mroc += ROCEvaluation.STATIC.evaluate(posn, nlist);
            KNNEvaluator.STATIC.evaluateKNN(knnperf, nlist, lrelation, counters, label);
            samples += 1;
        }
        LOG.incrementProcessed(objloop);
    }
    LOG.ensureCompleted(objloop);
    if (samples < 1) {
        throw new AbortException("No object matched - are labels parsed correctly?");
    }
    if (!(map >= 0) || !(mroc >= 0)) {
        throw new AbortException("NaN in MAP/ROC.");
    }
    map /= samples;
    mroc /= samples;
    LOG.statistics(new DoubleStatistic(PREFIX + ".map", map));
    LOG.statistics(new DoubleStatistic(PREFIX + ".rocauc", mroc));
    LOG.statistics(new DoubleStatistic(PREFIX + ".samples", samples));
    for (int k = 0; k < maxk; k++) {
        knnperf[k] = knnperf[k] / samples;
        LOG.statistics(new DoubleStatistic(PREFIX + ".knn-" + (k + 1), knnperf[k]));
    }
    return new RetrievalPerformanceResult(samples, map, mroc, knnperf);
}
Also used : ModifiableDoubleDBIDList(de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) DoubleStatistic(de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) Object2IntOpenHashMap(it.unimi.dsi.fastutil.objects.Object2IntOpenHashMap) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Example 5 with Object2IntOpenHashMap

use of it.unimi.dsi.fastutil.objects.Object2IntOpenHashMap in project druid by druid-io.

the class NativeQueryMaker method mapResultSequence.

private Sequence<Object[]> mapResultSequence(final Sequence<Object[]> sequence, final List<String> originalFields, final List<String> newFields, final List<SqlTypeName> newTypes) {
    // Build hash map for looking up original field positions, in case the number of fields is super high.
    final Object2IntMap<String> originalFieldsLookup = new Object2IntOpenHashMap<>();
    originalFieldsLookup.defaultReturnValue(-1);
    for (int i = 0; i < originalFields.size(); i++) {
        originalFieldsLookup.put(originalFields.get(i), i);
    }
    // Build "mapping" array of new field index -> old field index.
    final int[] mapping = new int[newFields.size()];
    for (int i = 0; i < newFields.size(); i++) {
        final String newField = newFields.get(i);
        final int idx = originalFieldsLookup.getInt(newField);
        if (idx < 0) {
            throw new ISE("newField[%s] not contained in originalFields[%s]", newField, String.join(", ", originalFields));
        }
        mapping[i] = idx;
    }
    return Sequences.map(sequence, array -> {
        final Object[] newArray = new Object[mapping.length];
        for (int i = 0; i < mapping.length; i++) {
            newArray[i] = coerce(array[mapping[i]], newTypes.get(i));
        }
        return newArray;
    });
}
Also used : Object2IntOpenHashMap(it.unimi.dsi.fastutil.objects.Object2IntOpenHashMap) ISE(org.apache.druid.java.util.common.ISE) NlsString(org.apache.calcite.util.NlsString)

Aggregations

Object2IntOpenHashMap (it.unimi.dsi.fastutil.objects.Object2IntOpenHashMap)13 AbortException (de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)3 ClassLabel (de.lmu.ifi.dbs.elki.data.ClassLabel)2 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)2 MultipleObjectsBundle (de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle)2 Double2IntOpenHashMap (it.unimi.dsi.fastutil.doubles.Double2IntOpenHashMap)2 Float2IntOpenHashMap (it.unimi.dsi.fastutil.floats.Float2IntOpenHashMap)2 Int2IntOpenHashMap (it.unimi.dsi.fastutil.ints.Int2IntOpenHashMap)2 Long2IntOpenHashMap (it.unimi.dsi.fastutil.longs.Long2IntOpenHashMap)2 Entry (it.unimi.dsi.fastutil.objects.Object2IntMap.Entry)2 ArrayList (java.util.ArrayList)2 Array2DRowRealMatrix (org.apache.commons.math3.linear.Array2DRowRealMatrix)2 RealMatrix (org.apache.commons.math3.linear.RealMatrix)2 JsonWriter (com.google.gson.stream.JsonWriter)1 FixedByteSingleValueMultiColWriter (com.linkedin.pinot.core.io.writer.impl.FixedByteSingleValueMultiColWriter)1 ExternalID (de.lmu.ifi.dbs.elki.data.ExternalID)1 LabelList (de.lmu.ifi.dbs.elki.data.LabelList)1 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)1 DoubleDBIDListIter (de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter)1 KNNList (de.lmu.ifi.dbs.elki.database.ids.KNNList)1