use of org.apache.druid.data.input.MapBasedRow in project druid by druid-io.
the class LimitedBufferHashGrouperTest method testLimitAndBufferSwapping.
@Test
public void testLimitAndBufferSwapping() {
final TestColumnSelectorFactory columnSelectorFactory = GrouperTestUtil.newColumnSelectorFactory();
final LimitedBufferHashGrouper<Integer> grouper = makeGrouper(columnSelectorFactory, 20000);
columnSelectorFactory.setRow(new MapBasedRow(0, ImmutableMap.of("value", 10L)));
for (int i = 0; i < NUM_ROWS; i++) {
Assert.assertTrue(String.valueOf(i + KEY_BASE), grouper.aggregate(i + KEY_BASE).isOk());
}
if (NullHandling.replaceWithDefault()) {
// bucket size is hash(int) + key(int) + aggs(2 longs) + heap offset(int) = 28 bytes
// limit is 100 so heap occupies 101 * 4 bytes = 404 bytes
// buffer is 20000 bytes, so table arena size is 20000 - 404 = 19596 bytes
// table arena is split in halves when doing push down, so each half is 9798 bytes
// each table arena half can hold 9798 / 28 = 349 buckets, with load factor of 0.5 max buckets per half is 174
// First buffer swap occurs when we hit 174 buckets
// Subsequent buffer swaps occur after every 74 buckets, since we keep 100 buckets due to the limit
// With 1000 keys inserted, this results in one swap at the first 174 buckets, then 11 swaps afterwards.
// After the last swap, we have 100 keys + 12 new keys inserted.
Assert.assertEquals(12, grouper.getGrowthCount());
Assert.assertEquals(112, grouper.getSize());
Assert.assertEquals(349, grouper.getBuckets());
Assert.assertEquals(174, grouper.getMaxSize());
} else {
// With Nullability enabled
// bucket size is hash(int) + key(int) + aggs(2 longs + 1 bytes for Long Agg nullability) + heap offset(int) = 29 bytes
// limit is 100 so heap occupies 101 * 4 bytes = 404 bytes
// buffer is 20000 bytes, so table arena size is 20000 - 404 = 19596 bytes
// table arena is split in halves when doing push down, so each half is 9798 bytes
// each table arena half can hold 9798 / 29 = 337 buckets, with load factor of 0.5 max buckets per half is 168
// First buffer swap occurs when we hit 168 buckets
// Subsequent buffer swaps occur after every 68 buckets, since we keep 100 buckets due to the limit
// With 1000 keys inserted, this results in one swap at the first 169 buckets, then 12 swaps afterwards.
// After the last swap, we have 100 keys + 16 new keys inserted.
Assert.assertEquals(13, grouper.getGrowthCount());
Assert.assertEquals(116, grouper.getSize());
Assert.assertEquals(337, grouper.getBuckets());
Assert.assertEquals(168, grouper.getMaxSize());
}
Assert.assertEquals(100, grouper.getLimit());
// Aggregate slightly different row
// Since these keys are smaller, they will evict the previous 100 top entries
// First 100 of these new rows will be the expected results.
columnSelectorFactory.setRow(new MapBasedRow(0, ImmutableMap.of("value", 11L)));
for (int i = 0; i < NUM_ROWS; i++) {
Assert.assertTrue(String.valueOf(i), grouper.aggregate(i).isOk());
}
if (NullHandling.replaceWithDefault()) {
// we added another 1000 unique keys
// previous size is 112, so next swap occurs after 62 rows
// after that, there are 1000 - 62 = 938 rows, 938 / 74 = 12 additional swaps after the first,
// with 50 keys being added after the final swap.
Assert.assertEquals(25, grouper.getGrowthCount());
Assert.assertEquals(150, grouper.getSize());
Assert.assertEquals(349, grouper.getBuckets());
Assert.assertEquals(174, grouper.getMaxSize());
} else {
// With Nullable Aggregator
// we added another 1000 unique keys
// previous size is 116, so next swap occurs after 52 rows
// after that, there are 1000 - 52 = 948 rows, 948 / 68 = 13 additional swaps after the first,
// with 64 keys being added after the final swap.
Assert.assertEquals(27, grouper.getGrowthCount());
Assert.assertEquals(164, grouper.getSize());
Assert.assertEquals(337, grouper.getBuckets());
Assert.assertEquals(168, grouper.getMaxSize());
}
Assert.assertEquals(100, grouper.getLimit());
final List<Grouper.Entry<Integer>> expected = new ArrayList<>();
for (int i = 0; i < LIMIT; i++) {
expected.add(new Grouper.Entry<>(i, new Object[] { 11L, 1L }));
}
Assert.assertEquals(expected, Lists.newArrayList(grouper.iterator(true)));
// iterate again, even though the min-max offset heap has been destroyed, it is replaced with a reverse sorted array
Assert.assertEquals(expected, Lists.newArrayList(grouper.iterator(true)));
}
use of org.apache.druid.data.input.MapBasedRow in project druid by druid-io.
the class ResultRowTest method testMapBasedRowWithNullValues.
@Test
public void testMapBasedRowWithNullValues() {
GroupByQuery query = new GroupByQuery(new TableDataSource(QueryRunnerTestHelper.DATA_SOURCE), new MultipleIntervalSegmentSpec(ImmutableList.of(Intervals.of("2011/2012"))), null, null, Granularities.ALL, ImmutableList.of(new DefaultDimensionSpec("dim1", "dim1"), new DefaultDimensionSpec("dim2", "dim2"), new DefaultDimensionSpec("dim3", "dim3")), ImmutableList.of(new CountAggregatorFactory("count")), null, null, null, null, null);
final ResultRow row = ResultRow.of("1", "2", null);
MapBasedRow mapBasedRow = row.toMapBasedRow(query);
// Let's make sure values are there as expected
Assert.assertEquals("1", mapBasedRow.getRaw("dim1"));
Assert.assertEquals("2", mapBasedRow.getRaw("dim2"));
Assert.assertNull(mapBasedRow.getRaw("dim3"));
// Also, let's make sure that the dimension with null value is actually present in the map
Assert.assertTrue(mapBasedRow.getEvent().containsKey("dim3"));
}
use of org.apache.druid.data.input.MapBasedRow in project druid by druid-io.
the class BufferHashGrouperTest method testGrowing.
@Test
public void testGrowing() {
final TestColumnSelectorFactory columnSelectorFactory = GrouperTestUtil.newColumnSelectorFactory();
final Grouper<Integer> grouper = makeGrouper(columnSelectorFactory, 10000, 2, 0.75f);
final int expectedMaxSize = NullHandling.replaceWithDefault() ? 219 : 210;
columnSelectorFactory.setRow(new MapBasedRow(0, ImmutableMap.of("value", 10L)));
for (int i = 0; i < expectedMaxSize; i++) {
Assert.assertTrue(String.valueOf(i), grouper.aggregate(i).isOk());
}
Assert.assertFalse(grouper.aggregate(expectedMaxSize).isOk());
// Aggregate slightly different row
columnSelectorFactory.setRow(new MapBasedRow(0, ImmutableMap.of("value", 11L)));
for (int i = 0; i < expectedMaxSize; i++) {
Assert.assertTrue(String.valueOf(i), grouper.aggregate(i).isOk());
}
Assert.assertFalse(grouper.aggregate(expectedMaxSize).isOk());
final List<Grouper.Entry<Integer>> expected = new ArrayList<>();
for (int i = 0; i < expectedMaxSize; i++) {
expected.add(new Grouper.Entry<>(i, new Object[] { 21L, 2L }));
}
Assert.assertEquals(expected, Lists.newArrayList(grouper.iterator(true)));
}
use of org.apache.druid.data.input.MapBasedRow in project druid by druid-io.
the class BufferHashGrouperTest method testSimple.
@Test
public void testSimple() {
final TestColumnSelectorFactory columnSelectorFactory = GrouperTestUtil.newColumnSelectorFactory();
final Grouper<Integer> grouper = new BufferHashGrouper<>(Suppliers.ofInstance(ByteBuffer.allocate(1000)), GrouperTestUtil.intKeySerde(), AggregatorAdapters.factorizeBuffered(columnSelectorFactory, ImmutableList.of(new LongSumAggregatorFactory("valueSum", "value"), new CountAggregatorFactory("count"))), Integer.MAX_VALUE, 0, 0, true);
grouper.init();
columnSelectorFactory.setRow(new MapBasedRow(0, ImmutableMap.of("value", 10L)));
grouper.aggregate(12);
grouper.aggregate(6);
grouper.aggregate(10);
grouper.aggregate(6);
grouper.aggregate(12);
grouper.aggregate(12);
final List<Grouper.Entry<Integer>> expected = ImmutableList.of(new Grouper.Entry<>(6, new Object[] { 20L, 2L }), new Grouper.Entry<>(10, new Object[] { 10L, 1L }), new Grouper.Entry<>(12, new Object[] { 30L, 3L }));
final List<Grouper.Entry<Integer>> unsortedEntries = Lists.newArrayList(grouper.iterator(false));
final List<Grouper.Entry<Integer>> sortedEntries = Lists.newArrayList(grouper.iterator(true));
Assert.assertEquals(expected, sortedEntries);
Assert.assertEquals(expected, Ordering.from(new Comparator<Grouper.Entry<Integer>>() {
@Override
public int compare(Grouper.Entry<Integer> o1, Grouper.Entry<Integer> o2) {
return Ints.compare(o1.getKey(), o2.getKey());
}
}).sortedCopy(unsortedEntries));
}
use of org.apache.druid.data.input.MapBasedRow in project druid by druid-io.
the class ExpressionTestHelper method testFilter.
void testFilter(final SqlOperator op, final List<? extends RexNode> exprs, final List<VirtualColumn> expectedVirtualColumns, final DimFilter expectedFilter, final boolean expectedResult) {
final RexNode rexNode = rexBuilder.makeCall(op, exprs);
final VirtualColumnRegistry virtualColumnRegistry = VirtualColumnRegistry.create(rowSignature, TestExprMacroTable.INSTANCE);
final DimFilter filter = Expressions.toFilter(PLANNER_CONTEXT, rowSignature, virtualColumnRegistry, rexNode);
Assert.assertEquals("Filter for: " + rexNode, expectedFilter, filter);
final List<VirtualColumn> virtualColumns = filter.getRequiredColumns().stream().map(virtualColumnRegistry::getVirtualColumn).filter(Objects::nonNull).sorted(Comparator.comparing(VirtualColumn::getOutputName)).collect(Collectors.toList());
Assert.assertEquals("Virtual columns for: " + rexNode, expectedVirtualColumns.stream().sorted(Comparator.comparing(VirtualColumn::getOutputName)).collect(Collectors.toList()), virtualColumns);
final ValueMatcher matcher = expectedFilter.toFilter().makeMatcher(new VirtualizedColumnSelectorFactory(RowBasedColumnSelectorFactory.create(RowAdapters.standardRow(), () -> new MapBasedRow(0L, bindings), rowSignature, false), VirtualColumns.create(virtualColumns)));
Assert.assertEquals("Result for: " + rexNode, expectedResult, matcher.matches());
}
Aggregations