use of it.unimi.dsi.fastutil.longs.LongArrayList in project druid by druid-io.
the class InDimFilter method getLongPredicateSupplier.
// As the set of filtered values can be large, parsing them as longs should be done only if needed, and only once.
// Pass in a common long predicate supplier to all filters created by .toFilter(), so that
// we only compute the long hashset/array once per query.
// This supplier must be thread-safe, since this DimFilter will be accessed in the query runners.
private Supplier<DruidLongPredicate> getLongPredicateSupplier() {
return new Supplier<DruidLongPredicate>() {
private final Object initLock = new Object();
private DruidLongPredicate predicate;
private void initLongValues() {
if (predicate != null) {
return;
}
synchronized (initLock) {
if (predicate != null) {
return;
}
LongArrayList longs = new LongArrayList(values.size());
for (String value : values) {
Long longValue = GuavaUtils.tryParseLong(value);
if (longValue != null) {
longs.add(longValue);
}
}
if (longs.size() > NUMERIC_HASHING_THRESHOLD) {
final LongOpenHashSet longHashSet = new LongOpenHashSet(longs);
predicate = new DruidLongPredicate() {
@Override
public boolean applyLong(long input) {
return longHashSet.contains(input);
}
};
} else {
final long[] longArray = longs.toLongArray();
Arrays.sort(longArray);
predicate = new DruidLongPredicate() {
@Override
public boolean applyLong(long input) {
return Arrays.binarySearch(longArray, input) >= 0;
}
};
}
}
}
@Override
public DruidLongPredicate get() {
initLongValues();
return predicate;
}
};
}
use of it.unimi.dsi.fastutil.longs.LongArrayList in project presto by prestodb.
the class TestJoinProbeCompiler method testSingleChannel.
@Test(dataProvider = "hashEnabledValues")
public void testSingleChannel(boolean hashEnabled) throws Exception {
taskContext.addPipelineContext(0, true, true).addDriverContext();
ImmutableList<Type> types = ImmutableList.of(VARCHAR, DOUBLE);
ImmutableList<Type> outputTypes = ImmutableList.of(VARCHAR);
List<Integer> outputChannels = ImmutableList.of(0);
LookupSourceSupplierFactory lookupSourceSupplierFactory = joinCompiler.compileLookupSourceFactory(types, Ints.asList(0));
// crate hash strategy with a single channel blocks -- make sure there is some overlap in values
List<Block> varcharChannel = ImmutableList.of(BlockAssertions.createStringSequenceBlock(10, 20), BlockAssertions.createStringSequenceBlock(20, 30), BlockAssertions.createStringSequenceBlock(15, 25));
List<Block> extraUnusedDoubleChannel = ImmutableList.of(BlockAssertions.createDoubleSequenceBlock(10, 20), BlockAssertions.createDoubleSequenceBlock(20, 30), BlockAssertions.createDoubleSequenceBlock(15, 25));
LongArrayList addresses = new LongArrayList();
for (int blockIndex = 0; blockIndex < varcharChannel.size(); blockIndex++) {
Block block = varcharChannel.get(blockIndex);
for (int positionIndex = 0; positionIndex < block.getPositionCount(); positionIndex++) {
addresses.add(encodeSyntheticAddress(blockIndex, positionIndex));
}
}
Optional<Integer> hashChannel = Optional.empty();
List<List<Block>> channels = ImmutableList.of(varcharChannel, extraUnusedDoubleChannel);
if (hashEnabled) {
ImmutableList.Builder<Block> hashChannelBuilder = ImmutableList.builder();
for (Block block : varcharChannel) {
hashChannelBuilder.add(TypeUtils.getHashBlock(ImmutableList.<Type>of(VARCHAR), block));
}
types = ImmutableList.of(VARCHAR, DOUBLE, BigintType.BIGINT);
hashChannel = Optional.of(2);
channels = ImmutableList.of(varcharChannel, extraUnusedDoubleChannel, hashChannelBuilder.build());
outputChannels = ImmutableList.of(0, 2);
outputTypes = ImmutableList.of(VARCHAR, BigintType.BIGINT);
}
LookupSource lookupSource = lookupSourceSupplierFactory.createLookupSourceSupplier(taskContext.getSession().toConnectorSession(), addresses, channels, hashChannel, Optional.empty()).get();
JoinProbeCompiler joinProbeCompiler = new JoinProbeCompiler();
JoinProbeFactory probeFactory = joinProbeCompiler.internalCompileJoinProbe(types, outputChannels, Ints.asList(0), hashChannel);
Page page = SequencePageBuilder.createSequencePage(types, 10, 10, 10);
Page outputPage = new Page(page.getBlock(0));
if (hashEnabled) {
page = new Page(page.getBlock(0), page.getBlock(1), TypeUtils.getHashBlock(ImmutableList.of(VARCHAR), page.getBlock(0)));
outputPage = new Page(page.getBlock(0), page.getBlock(2));
}
JoinProbe joinProbe = probeFactory.createJoinProbe(lookupSource, page);
// verify channel count
assertEquals(joinProbe.getOutputChannelCount(), outputChannels.size());
PageBuilder pageBuilder = new PageBuilder(outputTypes);
for (int position = 0; position < page.getPositionCount(); position++) {
assertTrue(joinProbe.advanceNextPosition());
pageBuilder.declarePosition();
joinProbe.appendTo(pageBuilder);
assertEquals(joinProbe.getCurrentJoinPosition(), lookupSource.getJoinPosition(position, page, page));
}
assertFalse(joinProbe.advanceNextPosition());
assertPageEquals(outputTypes, pageBuilder.build(), outputPage);
}
use of it.unimi.dsi.fastutil.longs.LongArrayList in project gridss by PapenfussLab.
the class KmerEncodingHelperTest method partialSequenceBasesDifferent_should_allow_seq_out_of_ref_bounds_end.
@Test
public void partialSequenceBasesDifferent_should_allow_seq_out_of_ref_bounds_end() {
int k = 4;
LongArrayList ref = KPN(k, "AAATAACA", 1, 1, true).pathKmers();
LongArrayList seq = KPN(k, "AATAACCATGC", 1, 1, true).pathKmers();
assertEquals(1, KmerEncodingHelper.partialSequenceBasesDifferent(k, ref, seq, 1, true));
}
use of it.unimi.dsi.fastutil.longs.LongArrayList in project gridss by PapenfussLab.
the class KmerEncodingHelperTest method partialSequenceBasesDifferent_regression_test_1.
@Test
public void partialSequenceBasesDifferent_regression_test_1() {
int k = 25;
LongArrayList ref = KPN(k, "TAGTGGCAGGCACCTGTAATCCCAGTTACTTGGGAGGCTGAGGCAGGAGA", 1, 1, true).pathKmers();
LongArrayList seq = KPN(k, "AGTGGCAGGCACCTGTAATGCCAGCTACTTGGGAGGCTGAGGCA", 1, 1, true).pathKmers();
// ^ ^ base different attributed to anchor
assertEquals(1, KmerEncodingHelper.partialSequenceBasesDifferent(k, ref, seq, 1, false));
}
use of it.unimi.dsi.fastutil.longs.LongArrayList in project gridss by PapenfussLab.
the class EvidenceTracker method traverse.
public Set<KmerEvidence> traverse(Collection<KmerPathSubnode> contig, boolean remove) {
Set<KmerEvidence> evidence = Collections.newSetFromMap(new IdentityHashMap<KmerEvidence, Boolean>());
for (KmerPathSubnode sn : contig) {
int start = sn.firstStart();
int end = sn.firstEnd();
for (int i = 0; i < sn.length(); i++) {
toCollection(evidence, sn.kmer(i), start + i, end + i, remove);
}
LongArrayList collapsed = sn.node().collapsedKmers();
IntArrayList collapsedOffset = sn.node().collapsedKmerOffsets();
for (int i = 0; i < collapsed.size(); i++) {
int offset = collapsedOffset.getInt(i);
toCollection(evidence, collapsed.getLong(i), start + offset, end + offset, remove);
}
}
if (remove) {
for (KmerEvidence e : evidence) {
// remove any leftover evidence kmers not on the called path
remove(e);
}
}
return evidence;
}
Aggregations