use of it.unimi.dsi.fastutil.objects.Object2IntOpenHashMap in project gatk by broadinstitute.
the class ReadCountCollection method arrangeTargets.
/**
* Rearrange the targets so that they are in a particular order.
* @return a new collection.
* @throws IllegalArgumentException if any of the following is true:
* <ul>
* <li>{@code targetsInOrder} is {@code null},</li>
* <li>is empty,</li>
* <li>it contains {@code null},</li>
* <li>contains any target not present in this collection.</li>
* </ul>
*/
public ReadCountCollection arrangeTargets(final List<Target> targetsInOrder) {
Utils.nonNull(targetsInOrder);
Utils.nonEmpty(targetsInOrder, "the input targets list cannot be empty");
final RealMatrix counts = new Array2DRowRealMatrix(targetsInOrder.size(), columnNames.size());
final Object2IntMap<Target> targetToIndex = new Object2IntOpenHashMap<>(targets.size());
for (int i = 0; i < targets.size(); i++) {
targetToIndex.put(targets.get(i), i);
}
for (int i = 0; i < targetsInOrder.size(); i++) {
final Target target = targetsInOrder.get(i);
Utils.validateArg(targetToIndex.containsKey(target), () -> String.format("target '%s' is not present in the collection", target.getName()));
counts.setRow(i, this.counts.getRow(targetToIndex.getInt(target)));
}
return new ReadCountCollection(new ArrayList<>(targetsInOrder), columnNames, counts, false);
}
use of it.unimi.dsi.fastutil.objects.Object2IntOpenHashMap in project elki by elki-project.
the class KNNClassifier method classify.
@Override
public ClassLabel classify(O instance) {
Object2IntOpenHashMap<ClassLabel> count = new Object2IntOpenHashMap<>();
KNNList query = knnq.getKNNForObject(instance, k);
for (DoubleDBIDListIter neighbor = query.iter(); neighbor.valid(); neighbor.advance()) {
count.addTo(labelrep.get(neighbor), 1);
}
int bestoccur = Integer.MIN_VALUE;
ClassLabel bestl = null;
for (ObjectIterator<Entry<ClassLabel>> iter = count.object2IntEntrySet().fastIterator(); iter.hasNext(); ) {
Entry<ClassLabel> entry = iter.next();
if (entry.getIntValue() > bestoccur) {
bestoccur = entry.getIntValue();
bestl = entry.getKey();
}
}
return bestl;
}
use of it.unimi.dsi.fastutil.objects.Object2IntOpenHashMap in project elki by elki-project.
the class ExternalIDJoinDatabaseConnection method loadData.
@Override
public MultipleObjectsBundle loadData() {
List<MultipleObjectsBundle> bundles = new ArrayList<>(sources.size());
for (DatabaseConnection dbc : sources) {
bundles.add(dbc.loadData());
}
MultipleObjectsBundle first = bundles.get(0);
Object2IntOpenHashMap<ExternalID> labelmap = new Object2IntOpenHashMap<>(first.dataLength());
labelmap.defaultReturnValue(-1);
// Process first bundle
{
// Identify a label column
final int lblcol;
{
int lblc = -1;
for (int i = 0; i < first.metaLength(); i++) {
if (TypeUtil.EXTERNALID.isAssignableFromType(first.meta(i))) {
lblc = i;
break;
}
}
// make static
lblcol = lblc;
}
if (lblcol == -1) {
throw new AbortException("No external ID column found in primary source.");
}
for (int i = 0; i < first.dataLength(); i++) {
ExternalID data = (ExternalID) first.data(i, lblcol);
if (data == null) {
LOG.debug("Object without ID encountered.");
continue;
}
int old = labelmap.put(data, i);
if (old != -1) {
LOG.debug("Duplicate id encountered: " + data + " in rows " + old + " and " + i);
}
}
}
// Process additional columns
for (int c = 1; c < sources.size(); c++) {
MultipleObjectsBundle cur = bundles.get(c);
final int lblcol;
{
int lblc = -1;
for (int i = 0; i < cur.metaLength(); i++) {
if (TypeUtil.EXTERNALID.isAssignableFromType(cur.meta(i))) {
lblc = i;
break;
}
}
// make static
lblcol = lblc;
}
if (lblcol == -1) {
StringBuilder buf = new StringBuilder();
for (int i = 0; i < cur.metaLength(); i++) {
if (buf.length() > 0) {
buf.append(',');
}
buf.append(cur.meta(i));
}
throw new AbortException("No external ID column found in source " + (c + 1) + " to join with. Got: " + buf.toString());
}
// Destination columns
List<ArrayList<Object>> dcol = new ArrayList<>(cur.metaLength());
for (int i = 0; i < cur.metaLength(); i++) {
// Skip the label columns
if (i == lblcol) {
dcol.add(null);
continue;
}
ArrayList<Object> newcol = new ArrayList<>(first.dataLength());
// Pre-fill with nulls.
for (int j = 0; j < first.dataLength(); j++) {
newcol.add(null);
}
first.appendColumn(cur.meta(i), newcol);
dcol.add(newcol);
}
for (int i = 0; i < cur.dataLength(); i++) {
ExternalID data = (ExternalID) cur.data(i, lblcol);
if (data == null) {
LOG.warning("Object without label encountered.");
continue;
}
int row = labelmap.getInt(data);
if (row == -1) {
LOG.debug("ID not found for join: " + data + " in row " + i);
continue;
}
for (int d = 0; d < cur.metaLength(); d++) {
if (d == lblcol) {
continue;
}
List<Object> col = dcol.get(d);
assert (col != null);
col.set(row, cur.data(i, d));
}
}
}
for (int i = 0; i < first.dataLength(); i++) {
for (int d = 0; d < first.metaLength(); d++) {
if (first.data(i, d) == null) {
StringBuilder buf = new StringBuilder();
for (int d2 = 0; d2 < first.metaLength(); d2++) {
if (buf.length() > 0) {
buf.append(", ");
}
if (first.data(i, d2) == null) {
buf.append("null");
} else {
buf.append(first.data(i, d2));
}
}
LOG.warning("null value in joined data, row " + i + " column " + d + FormatUtil.NEWLINE + "[" + buf.toString() + "]");
break;
}
}
}
return first;
}
use of it.unimi.dsi.fastutil.objects.Object2IntOpenHashMap in project elki by elki-project.
the class EvaluateRetrievalPerformance method run.
/**
* Run the algorithm
*
* @param database Database to run on (for kNN queries)
* @param relation Relation for distance computations
* @param lrelation Relation for class label comparison
* @return Vectors containing mean and standard deviation.
*/
public RetrievalPerformanceResult run(Database database, Relation<O> relation, Relation<?> lrelation) {
final DistanceQuery<O> distQuery = database.getDistanceQuery(relation, getDistanceFunction());
final DBIDs ids = DBIDUtil.randomSample(relation.getDBIDs(), sampling, random);
// For storing the positive neighbors.
ModifiableDBIDs posn = DBIDUtil.newHashSet();
// Distance storage.
ModifiableDoubleDBIDList nlist = DBIDUtil.newDistanceDBIDList(relation.size());
// For counting labels seen in kNN
Object2IntOpenHashMap<Object> counters = new Object2IntOpenHashMap<>();
// Statistics tracking
double map = 0., mroc = 0.;
double[] knnperf = new double[maxk];
int samples = 0;
FiniteProgress objloop = LOG.isVerbose() ? new FiniteProgress("Processing query objects", ids.size(), LOG) : null;
for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
Object label = lrelation.get(iter);
findMatches(posn, lrelation, label);
if (posn.size() > 0) {
computeDistances(nlist, iter, distQuery, relation);
if (nlist.size() != relation.size() - (includeSelf ? 0 : 1)) {
LOG.warning("Neighbor list does not have the desired size: " + nlist.size());
}
map += AveragePrecisionEvaluation.STATIC.evaluate(posn, nlist);
mroc += ROCEvaluation.STATIC.evaluate(posn, nlist);
KNNEvaluator.STATIC.evaluateKNN(knnperf, nlist, lrelation, counters, label);
samples += 1;
}
LOG.incrementProcessed(objloop);
}
LOG.ensureCompleted(objloop);
if (samples < 1) {
throw new AbortException("No object matched - are labels parsed correctly?");
}
if (!(map >= 0) || !(mroc >= 0)) {
throw new AbortException("NaN in MAP/ROC.");
}
map /= samples;
mroc /= samples;
LOG.statistics(new DoubleStatistic(PREFIX + ".map", map));
LOG.statistics(new DoubleStatistic(PREFIX + ".rocauc", mroc));
LOG.statistics(new DoubleStatistic(PREFIX + ".samples", samples));
for (int k = 0; k < maxk; k++) {
knnperf[k] = knnperf[k] / samples;
LOG.statistics(new DoubleStatistic(PREFIX + ".knn-" + (k + 1), knnperf[k]));
}
return new RetrievalPerformanceResult(samples, map, mroc, knnperf);
}
use of it.unimi.dsi.fastutil.objects.Object2IntOpenHashMap in project druid by druid-io.
the class NativeQueryMaker method mapResultSequence.
private Sequence<Object[]> mapResultSequence(final Sequence<Object[]> sequence, final List<String> originalFields, final List<String> newFields, final List<SqlTypeName> newTypes) {
// Build hash map for looking up original field positions, in case the number of fields is super high.
final Object2IntMap<String> originalFieldsLookup = new Object2IntOpenHashMap<>();
originalFieldsLookup.defaultReturnValue(-1);
for (int i = 0; i < originalFields.size(); i++) {
originalFieldsLookup.put(originalFields.get(i), i);
}
// Build "mapping" array of new field index -> old field index.
final int[] mapping = new int[newFields.size()];
for (int i = 0; i < newFields.size(); i++) {
final String newField = newFields.get(i);
final int idx = originalFieldsLookup.getInt(newField);
if (idx < 0) {
throw new ISE("newField[%s] not contained in originalFields[%s]", newField, String.join(", ", originalFields));
}
mapping[i] = idx;
}
return Sequences.map(sequence, array -> {
final Object[] newArray = new Object[mapping.length];
for (int i = 0; i < mapping.length; i++) {
newArray[i] = coerce(array[mapping[i]], newTypes.get(i));
}
return newArray;
});
}
Aggregations