Search in sources :

Example 61 with MapBasedRow

use of org.apache.druid.data.input.MapBasedRow in project druid by druid-io.

the class LimitedBufferHashGrouperTest method testLimitAndBufferSwapping.

@Test
public void testLimitAndBufferSwapping() {
    final TestColumnSelectorFactory columnSelectorFactory = GrouperTestUtil.newColumnSelectorFactory();
    final LimitedBufferHashGrouper<Integer> grouper = makeGrouper(columnSelectorFactory, 20000);
    columnSelectorFactory.setRow(new MapBasedRow(0, ImmutableMap.of("value", 10L)));
    for (int i = 0; i < NUM_ROWS; i++) {
        Assert.assertTrue(String.valueOf(i + KEY_BASE), grouper.aggregate(i + KEY_BASE).isOk());
    }
    if (NullHandling.replaceWithDefault()) {
        // bucket size is hash(int) + key(int) + aggs(2 longs) + heap offset(int) = 28 bytes
        // limit is 100 so heap occupies 101 * 4 bytes = 404 bytes
        // buffer is 20000 bytes, so table arena size is 20000 - 404 = 19596 bytes
        // table arena is split in halves when doing push down, so each half is 9798 bytes
        // each table arena half can hold 9798 / 28 = 349 buckets, with load factor of 0.5 max buckets per half is 174
        // First buffer swap occurs when we hit 174 buckets
        // Subsequent buffer swaps occur after every 74 buckets, since we keep 100 buckets due to the limit
        // With 1000 keys inserted, this results in one swap at the first 174 buckets, then 11 swaps afterwards.
        // After the last swap, we have 100 keys + 12 new keys inserted.
        Assert.assertEquals(12, grouper.getGrowthCount());
        Assert.assertEquals(112, grouper.getSize());
        Assert.assertEquals(349, grouper.getBuckets());
        Assert.assertEquals(174, grouper.getMaxSize());
    } else {
        // With Nullability enabled
        // bucket size is hash(int) + key(int) + aggs(2 longs + 1 bytes for Long Agg nullability) + heap offset(int) = 29 bytes
        // limit is 100 so heap occupies 101 * 4 bytes = 404 bytes
        // buffer is 20000 bytes, so table arena size is 20000 - 404 = 19596 bytes
        // table arena is split in halves when doing push down, so each half is 9798 bytes
        // each table arena half can hold 9798 / 29 = 337 buckets, with load factor of 0.5 max buckets per half is 168
        // First buffer swap occurs when we hit 168 buckets
        // Subsequent buffer swaps occur after every 68 buckets, since we keep 100 buckets due to the limit
        // With 1000 keys inserted, this results in one swap at the first 169 buckets, then 12 swaps afterwards.
        // After the last swap, we have 100 keys + 16 new keys inserted.
        Assert.assertEquals(13, grouper.getGrowthCount());
        Assert.assertEquals(116, grouper.getSize());
        Assert.assertEquals(337, grouper.getBuckets());
        Assert.assertEquals(168, grouper.getMaxSize());
    }
    Assert.assertEquals(100, grouper.getLimit());
    // Aggregate slightly different row
    // Since these keys are smaller, they will evict the previous 100 top entries
    // First 100 of these new rows will be the expected results.
    columnSelectorFactory.setRow(new MapBasedRow(0, ImmutableMap.of("value", 11L)));
    for (int i = 0; i < NUM_ROWS; i++) {
        Assert.assertTrue(String.valueOf(i), grouper.aggregate(i).isOk());
    }
    if (NullHandling.replaceWithDefault()) {
        // we added another 1000 unique keys
        // previous size is 112, so next swap occurs after 62 rows
        // after that, there are 1000 - 62 = 938 rows, 938 / 74 = 12 additional swaps after the first,
        // with 50 keys being added after the final swap.
        Assert.assertEquals(25, grouper.getGrowthCount());
        Assert.assertEquals(150, grouper.getSize());
        Assert.assertEquals(349, grouper.getBuckets());
        Assert.assertEquals(174, grouper.getMaxSize());
    } else {
        // With Nullable Aggregator
        // we added another 1000 unique keys
        // previous size is 116, so next swap occurs after 52 rows
        // after that, there are 1000 - 52 = 948 rows, 948 / 68 = 13 additional swaps after the first,
        // with 64 keys being added after the final swap.
        Assert.assertEquals(27, grouper.getGrowthCount());
        Assert.assertEquals(164, grouper.getSize());
        Assert.assertEquals(337, grouper.getBuckets());
        Assert.assertEquals(168, grouper.getMaxSize());
    }
    Assert.assertEquals(100, grouper.getLimit());
    final List<Grouper.Entry<Integer>> expected = new ArrayList<>();
    for (int i = 0; i < LIMIT; i++) {
        expected.add(new Grouper.Entry<>(i, new Object[] { 11L, 1L }));
    }
    Assert.assertEquals(expected, Lists.newArrayList(grouper.iterator(true)));
    // iterate again, even though the min-max offset heap has been destroyed, it is replaced with a reverse sorted array
    Assert.assertEquals(expected, Lists.newArrayList(grouper.iterator(true)));
}
Also used : MapBasedRow(org.apache.druid.data.input.MapBasedRow) ArrayList(java.util.ArrayList) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 62 with MapBasedRow

use of org.apache.druid.data.input.MapBasedRow in project druid by druid-io.

the class ResultRowTest method testMapBasedRowWithNullValues.

@Test
public void testMapBasedRowWithNullValues() {
    GroupByQuery query = new GroupByQuery(new TableDataSource(QueryRunnerTestHelper.DATA_SOURCE), new MultipleIntervalSegmentSpec(ImmutableList.of(Intervals.of("2011/2012"))), null, null, Granularities.ALL, ImmutableList.of(new DefaultDimensionSpec("dim1", "dim1"), new DefaultDimensionSpec("dim2", "dim2"), new DefaultDimensionSpec("dim3", "dim3")), ImmutableList.of(new CountAggregatorFactory("count")), null, null, null, null, null);
    final ResultRow row = ResultRow.of("1", "2", null);
    MapBasedRow mapBasedRow = row.toMapBasedRow(query);
    // Let's make sure values are there as expected
    Assert.assertEquals("1", mapBasedRow.getRaw("dim1"));
    Assert.assertEquals("2", mapBasedRow.getRaw("dim2"));
    Assert.assertNull(mapBasedRow.getRaw("dim3"));
    // Also, let's make sure that the dimension with null value is actually present in the map
    Assert.assertTrue(mapBasedRow.getEvent().containsKey("dim3"));
}
Also used : MapBasedRow(org.apache.druid.data.input.MapBasedRow) TableDataSource(org.apache.druid.query.TableDataSource) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) MultipleIntervalSegmentSpec(org.apache.druid.query.spec.MultipleIntervalSegmentSpec) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) Test(org.junit.Test)

Example 63 with MapBasedRow

use of org.apache.druid.data.input.MapBasedRow in project druid by druid-io.

the class BufferHashGrouperTest method testGrowing.

@Test
public void testGrowing() {
    final TestColumnSelectorFactory columnSelectorFactory = GrouperTestUtil.newColumnSelectorFactory();
    final Grouper<Integer> grouper = makeGrouper(columnSelectorFactory, 10000, 2, 0.75f);
    final int expectedMaxSize = NullHandling.replaceWithDefault() ? 219 : 210;
    columnSelectorFactory.setRow(new MapBasedRow(0, ImmutableMap.of("value", 10L)));
    for (int i = 0; i < expectedMaxSize; i++) {
        Assert.assertTrue(String.valueOf(i), grouper.aggregate(i).isOk());
    }
    Assert.assertFalse(grouper.aggregate(expectedMaxSize).isOk());
    // Aggregate slightly different row
    columnSelectorFactory.setRow(new MapBasedRow(0, ImmutableMap.of("value", 11L)));
    for (int i = 0; i < expectedMaxSize; i++) {
        Assert.assertTrue(String.valueOf(i), grouper.aggregate(i).isOk());
    }
    Assert.assertFalse(grouper.aggregate(expectedMaxSize).isOk());
    final List<Grouper.Entry<Integer>> expected = new ArrayList<>();
    for (int i = 0; i < expectedMaxSize; i++) {
        expected.add(new Grouper.Entry<>(i, new Object[] { 21L, 2L }));
    }
    Assert.assertEquals(expected, Lists.newArrayList(grouper.iterator(true)));
}
Also used : MapBasedRow(org.apache.druid.data.input.MapBasedRow) ArrayList(java.util.ArrayList) Test(org.junit.Test)

Example 64 with MapBasedRow

use of org.apache.druid.data.input.MapBasedRow in project druid by druid-io.

the class BufferHashGrouperTest method testSimple.

@Test
public void testSimple() {
    final TestColumnSelectorFactory columnSelectorFactory = GrouperTestUtil.newColumnSelectorFactory();
    final Grouper<Integer> grouper = new BufferHashGrouper<>(Suppliers.ofInstance(ByteBuffer.allocate(1000)), GrouperTestUtil.intKeySerde(), AggregatorAdapters.factorizeBuffered(columnSelectorFactory, ImmutableList.of(new LongSumAggregatorFactory("valueSum", "value"), new CountAggregatorFactory("count"))), Integer.MAX_VALUE, 0, 0, true);
    grouper.init();
    columnSelectorFactory.setRow(new MapBasedRow(0, ImmutableMap.of("value", 10L)));
    grouper.aggregate(12);
    grouper.aggregate(6);
    grouper.aggregate(10);
    grouper.aggregate(6);
    grouper.aggregate(12);
    grouper.aggregate(12);
    final List<Grouper.Entry<Integer>> expected = ImmutableList.of(new Grouper.Entry<>(6, new Object[] { 20L, 2L }), new Grouper.Entry<>(10, new Object[] { 10L, 1L }), new Grouper.Entry<>(12, new Object[] { 30L, 3L }));
    final List<Grouper.Entry<Integer>> unsortedEntries = Lists.newArrayList(grouper.iterator(false));
    final List<Grouper.Entry<Integer>> sortedEntries = Lists.newArrayList(grouper.iterator(true));
    Assert.assertEquals(expected, sortedEntries);
    Assert.assertEquals(expected, Ordering.from(new Comparator<Grouper.Entry<Integer>>() {

        @Override
        public int compare(Grouper.Entry<Integer> o1, Grouper.Entry<Integer> o2) {
            return Ints.compare(o1.getKey(), o2.getKey());
        }
    }).sortedCopy(unsortedEntries));
}
Also used : LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) MapBasedRow(org.apache.druid.data.input.MapBasedRow) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) Test(org.junit.Test)

Example 65 with MapBasedRow

use of org.apache.druid.data.input.MapBasedRow in project druid by druid-io.

the class ExpressionTestHelper method testFilter.

void testFilter(final SqlOperator op, final List<? extends RexNode> exprs, final List<VirtualColumn> expectedVirtualColumns, final DimFilter expectedFilter, final boolean expectedResult) {
    final RexNode rexNode = rexBuilder.makeCall(op, exprs);
    final VirtualColumnRegistry virtualColumnRegistry = VirtualColumnRegistry.create(rowSignature, TestExprMacroTable.INSTANCE);
    final DimFilter filter = Expressions.toFilter(PLANNER_CONTEXT, rowSignature, virtualColumnRegistry, rexNode);
    Assert.assertEquals("Filter for: " + rexNode, expectedFilter, filter);
    final List<VirtualColumn> virtualColumns = filter.getRequiredColumns().stream().map(virtualColumnRegistry::getVirtualColumn).filter(Objects::nonNull).sorted(Comparator.comparing(VirtualColumn::getOutputName)).collect(Collectors.toList());
    Assert.assertEquals("Virtual columns for: " + rexNode, expectedVirtualColumns.stream().sorted(Comparator.comparing(VirtualColumn::getOutputName)).collect(Collectors.toList()), virtualColumns);
    final ValueMatcher matcher = expectedFilter.toFilter().makeMatcher(new VirtualizedColumnSelectorFactory(RowBasedColumnSelectorFactory.create(RowAdapters.standardRow(), () -> new MapBasedRow(0L, bindings), rowSignature, false), VirtualColumns.create(virtualColumns)));
    Assert.assertEquals("Result for: " + rexNode, expectedResult, matcher.matches());
}
Also used : MapBasedRow(org.apache.druid.data.input.MapBasedRow) VirtualColumnRegistry(org.apache.druid.sql.calcite.rel.VirtualColumnRegistry) ValueMatcher(org.apache.druid.query.filter.ValueMatcher) Objects(java.util.Objects) VirtualColumn(org.apache.druid.segment.VirtualColumn) DimFilter(org.apache.druid.query.filter.DimFilter) VirtualizedColumnSelectorFactory(org.apache.druid.segment.virtual.VirtualizedColumnSelectorFactory) RexNode(org.apache.calcite.rex.RexNode)

Aggregations

MapBasedRow (org.apache.druid.data.input.MapBasedRow)65 Test (org.junit.Test)50 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)36 ArrayList (java.util.ArrayList)21 Row (org.apache.druid.data.input.Row)16 LongSumAggregatorFactory (org.apache.druid.query.aggregation.LongSumAggregatorFactory)16 GroupByQueryRunnerTest (org.apache.druid.query.groupby.GroupByQueryRunnerTest)16 DefaultDimensionSpec (org.apache.druid.query.dimension.DefaultDimensionSpec)15 HashMap (java.util.HashMap)13 DimensionSpec (org.apache.druid.query.dimension.DimensionSpec)12 GroupByQuery (org.apache.druid.query.groupby.GroupByQuery)10 List (java.util.List)9 ResultRow (org.apache.druid.query.groupby.ResultRow)9 LongMeanAveragerFactory (org.apache.druid.query.movingaverage.averagers.LongMeanAveragerFactory)9 AggregatorFactory (org.apache.druid.query.aggregation.AggregatorFactory)8 File (java.io.File)7 ByteBuffer (java.nio.ByteBuffer)6 GroupByQueryConfig (org.apache.druid.query.groupby.GroupByQueryConfig)6 TimeseriesResultValue (org.apache.druid.query.timeseries.TimeseriesResultValue)6 IOException (java.io.IOException)5