use of io.trino.operator.HashAggregationOperator.HashAggregationOperatorFactory in project trino by trinodb.
the class TestHashAggregationOperator method testAdaptivePartialAggregationTriggeredOnlyOnFlush.
@Test
public void testAdaptivePartialAggregationTriggeredOnlyOnFlush() {
List<Integer> hashChannels = Ints.asList(0);
PartialAggregationController partialAggregationController = new PartialAggregationController(5, 0.8);
HashAggregationOperatorFactory operatorFactory = new HashAggregationOperatorFactory(0, new PlanNodeId("test"), ImmutableList.of(BIGINT), hashChannels, ImmutableList.of(), PARTIAL, ImmutableList.of(LONG_MIN.createAggregatorFactory(PARTIAL, ImmutableList.of(0), OptionalInt.empty())), Optional.empty(), Optional.empty(), 10, // this setting makes operator to flush only after all pages
Optional.of(DataSize.of(16, MEGABYTE)), joinCompiler, blockTypeOperators, // use 5 rows threshold to trigger adaptive partial aggregation after each page flush
Optional.of(partialAggregationController));
List<Page> operator1Input = rowPagesBuilder(false, hashChannels, BIGINT).addSequencePage(10, // first page are unique values, so it would trigger adaptation, but it won't because flush is not called
0).addBlocksPage(// second page will be hashed to existing value 1
createRLEBlock(1, 2)).build();
// the total unique ows ratio for the first operator will be 10/12 so > 0.8 (adaptive partial aggregation uniqueRowsRatioThreshold)
List<Page> operator1Expected = rowPagesBuilder(BIGINT, BIGINT).addSequencePage(10, 0, // we are expecting second page to be squashed with the first
0).build();
assertOperatorEquals(operatorFactory, operator1Input, operator1Expected);
// the first operator flush disables partial aggregation
assertTrue(partialAggregationController.isPartialAggregationDisabled());
// second operator using the same factory, reuses PartialAggregationControl, so it will only produce raw pages (partial aggregation is disabled at this point)
List<Page> operator2Input = rowPagesBuilder(false, hashChannels, BIGINT).addBlocksPage(createRLEBlock(1, 10)).addBlocksPage(createRLEBlock(2, 10)).build();
List<Page> operator2Expected = rowPagesBuilder(BIGINT, BIGINT).addBlocksPage(createRLEBlock(1, 10), createRLEBlock(1, 10)).addBlocksPage(createRLEBlock(2, 10), createRLEBlock(2, 10)).build();
assertOperatorEquals(operatorFactory, operator2Input, operator2Expected);
}
use of io.trino.operator.HashAggregationOperator.HashAggregationOperatorFactory in project trino by trinodb.
the class TestHashAggregationOperator method testAdaptivePartialAggregation.
@Test
public void testAdaptivePartialAggregation() {
List<Integer> hashChannels = Ints.asList(0);
PartialAggregationController partialAggregationController = new PartialAggregationController(5, 0.8);
HashAggregationOperatorFactory operatorFactory = new HashAggregationOperatorFactory(0, new PlanNodeId("test"), ImmutableList.of(BIGINT), hashChannels, ImmutableList.of(), PARTIAL, ImmutableList.of(LONG_MIN.createAggregatorFactory(PARTIAL, ImmutableList.of(0), OptionalInt.empty())), Optional.empty(), Optional.empty(), 100, // this setting makes operator to flush after each page
Optional.of(DataSize.ofBytes(1)), joinCompiler, blockTypeOperators, // use 5 rows threshold to trigger adaptive partial aggregation after each page flush
Optional.of(partialAggregationController));
// at the start partial aggregation is enabled
assertFalse(partialAggregationController.isPartialAggregationDisabled());
// First operator will trigger adaptive partial aggregation after the first page
List<Page> operator1Input = rowPagesBuilder(false, hashChannels, BIGINT).addBlocksPage(// first page will be hashed but the values are almost unique, so it will trigger adaptation
createLongsBlock(0, 1, 2, 3, 4, 5, 6, 7, 8, 8)).addBlocksPage(// second page would be hashed to existing value 1. but if adaptive PA kicks in, the raw values will be passed on
createRLEBlock(1, 10)).build();
List<Page> operator1Expected = rowPagesBuilder(BIGINT, BIGINT).addBlocksPage(createLongsBlock(0, 1, 2, 3, 4, 5, 6, 7, 8), // the last position was aggregated
createLongsBlock(0, 1, 2, 3, 4, 5, 6, 7, 8)).addBlocksPage(createRLEBlock(1, 10), // we are expecting second page with raw values
createRLEBlock(1, 10)).build();
assertOperatorEquals(operatorFactory, operator1Input, operator1Expected);
// the first operator flush disables partial aggregation
assertTrue(partialAggregationController.isPartialAggregationDisabled());
// second operator using the same factory, reuses PartialAggregationControl, so it will only produce raw pages (partial aggregation is disabled at this point)
List<Page> operator2Input = rowPagesBuilder(false, hashChannels, BIGINT).addBlocksPage(createRLEBlock(1, 10)).addBlocksPage(createRLEBlock(2, 10)).build();
List<Page> operator2Expected = rowPagesBuilder(BIGINT, BIGINT).addBlocksPage(createRLEBlock(1, 10), createRLEBlock(1, 10)).addBlocksPage(createRLEBlock(2, 10), createRLEBlock(2, 10)).build();
assertOperatorEquals(operatorFactory, operator2Input, operator2Expected);
}
use of io.trino.operator.HashAggregationOperator.HashAggregationOperatorFactory in project trino by trinodb.
the class TestHashAggregationOperator method testHashAggregationWithGlobals.
@Test(dataProvider = "hashEnabledAndMemoryLimitForMergeValues")
public void testHashAggregationWithGlobals(boolean hashEnabled, boolean spillEnabled, boolean revokeMemoryWhenAddingPages, long memoryLimitForMerge, long memoryLimitForMergeWithMemory) {
TestingAggregationFunction countVarcharColumn = FUNCTION_RESOLUTION.getAggregateFunction(QualifiedName.of("count"), fromTypes(VARCHAR));
TestingAggregationFunction countBooleanColumn = FUNCTION_RESOLUTION.getAggregateFunction(QualifiedName.of("count"), fromTypes(BOOLEAN));
TestingAggregationFunction maxVarcharColumn = FUNCTION_RESOLUTION.getAggregateFunction(QualifiedName.of("max"), fromTypes(VARCHAR));
Optional<Integer> groupIdChannel = Optional.of(1);
List<Integer> groupByChannels = Ints.asList(1, 2);
List<Integer> globalAggregationGroupIds = Ints.asList(42, 49);
RowPagesBuilder rowPagesBuilder = rowPagesBuilder(hashEnabled, groupByChannels, VARCHAR, VARCHAR, VARCHAR, BIGINT, BIGINT, BOOLEAN);
List<Page> input = rowPagesBuilder.build();
HashAggregationOperatorFactory operatorFactory = new HashAggregationOperatorFactory(0, new PlanNodeId("test"), ImmutableList.of(VARCHAR, BIGINT), groupByChannels, globalAggregationGroupIds, SINGLE, true, ImmutableList.of(COUNT.createAggregatorFactory(SINGLE, ImmutableList.of(0), OptionalInt.empty()), LONG_MIN.createAggregatorFactory(SINGLE, ImmutableList.of(4), OptionalInt.empty()), LONG_AVERAGE.createAggregatorFactory(SINGLE, ImmutableList.of(4), OptionalInt.empty()), maxVarcharColumn.createAggregatorFactory(SINGLE, ImmutableList.of(2), OptionalInt.empty()), countVarcharColumn.createAggregatorFactory(SINGLE, ImmutableList.of(0), OptionalInt.empty()), countBooleanColumn.createAggregatorFactory(SINGLE, ImmutableList.of(5), OptionalInt.empty())), rowPagesBuilder.getHashChannel(), groupIdChannel, 100_000, Optional.of(DataSize.of(16, MEGABYTE)), spillEnabled, succinctBytes(memoryLimitForMerge), succinctBytes(memoryLimitForMergeWithMemory), spillerFactory, joinCompiler, blockTypeOperators, Optional.empty());
DriverContext driverContext = createDriverContext(memoryLimitForMerge);
MaterializedResult expected = resultBuilder(driverContext.getSession(), VARCHAR, BIGINT, BIGINT, BIGINT, DOUBLE, VARCHAR, BIGINT, BIGINT).row(null, 42L, 0L, null, null, null, 0L, 0L).row(null, 49L, 0L, null, null, null, 0L, 0L).build();
assertOperatorEqualsIgnoreOrder(operatorFactory, driverContext, input, expected, hashEnabled, Optional.of(groupByChannels.size()), revokeMemoryWhenAddingPages);
}
use of io.trino.operator.HashAggregationOperator.HashAggregationOperatorFactory in project trino by trinodb.
the class TestHashAggregationOperator method testSpillerFailure.
@Test
public void testSpillerFailure() {
TestingAggregationFunction maxVarcharColumn = FUNCTION_RESOLUTION.getAggregateFunction(QualifiedName.of("max"), fromTypes(VARCHAR));
List<Integer> hashChannels = Ints.asList(1);
ImmutableList<Type> types = ImmutableList.of(VARCHAR, BIGINT, VARCHAR, BIGINT);
RowPagesBuilder rowPagesBuilder = rowPagesBuilder(false, hashChannels, types);
List<Page> input = rowPagesBuilder.addSequencePage(10, 100, 0, 100, 0).addSequencePage(10, 100, 0, 200, 0).addSequencePage(10, 100, 0, 300, 0).build();
DriverContext driverContext = TestingTaskContext.builder(executor, scheduledExecutor, TEST_SESSION).setQueryMaxMemory(DataSize.valueOf("7MB")).setMemoryPoolSize(DataSize.valueOf("1GB")).build().addPipelineContext(0, true, true, false).addDriverContext();
HashAggregationOperatorFactory operatorFactory = new HashAggregationOperatorFactory(0, new PlanNodeId("test"), ImmutableList.of(BIGINT), hashChannels, ImmutableList.of(), SINGLE, false, ImmutableList.of(COUNT.createAggregatorFactory(SINGLE, ImmutableList.of(0), OptionalInt.empty()), LONG_MIN.createAggregatorFactory(SINGLE, ImmutableList.of(3), OptionalInt.empty()), LONG_AVERAGE.createAggregatorFactory(SINGLE, ImmutableList.of(3), OptionalInt.empty()), maxVarcharColumn.createAggregatorFactory(SINGLE, ImmutableList.of(2), OptionalInt.empty())), rowPagesBuilder.getHashChannel(), Optional.empty(), 100_000, Optional.of(DataSize.of(16, MEGABYTE)), true, succinctBytes(8), succinctBytes(Integer.MAX_VALUE), new FailingSpillerFactory(), joinCompiler, blockTypeOperators, Optional.empty());
assertThatThrownBy(() -> toPages(operatorFactory, driverContext, input)).isInstanceOf(RuntimeException.class).hasCauseInstanceOf(IOException.class).hasMessageEndingWith("Failed to spill");
}
use of io.trino.operator.HashAggregationOperator.HashAggregationOperatorFactory in project trino by trinodb.
the class TestHashAggregationOperator method testHashAggregationMemoryReservation.
@Test(dataProvider = "hashEnabledAndMemoryLimitForMergeValues")
public void testHashAggregationMemoryReservation(boolean hashEnabled, boolean spillEnabled, boolean revokeMemoryWhenAddingPages, long memoryLimitForMerge, long memoryLimitForMergeWithMemory) {
TestingAggregationFunction arrayAggColumn = FUNCTION_RESOLUTION.getAggregateFunction(QualifiedName.of("array_agg"), fromTypes(BIGINT));
List<Integer> hashChannels = Ints.asList(1);
RowPagesBuilder rowPagesBuilder = rowPagesBuilder(hashEnabled, hashChannels, BIGINT, BIGINT);
List<Page> input = rowPagesBuilder.addSequencePage(10, 100, 0).addSequencePage(10, 200, 0).addSequencePage(10, 300, 0).build();
DriverContext driverContext = createTaskContext(executor, scheduledExecutor, TEST_SESSION, DataSize.of(11, Unit.MEGABYTE)).addPipelineContext(0, true, true, false).addDriverContext();
HashAggregationOperatorFactory operatorFactory = new HashAggregationOperatorFactory(0, new PlanNodeId("test"), ImmutableList.of(BIGINT), hashChannels, ImmutableList.of(), SINGLE, true, ImmutableList.of(arrayAggColumn.createAggregatorFactory(SINGLE, ImmutableList.of(0), OptionalInt.empty())), rowPagesBuilder.getHashChannel(), Optional.empty(), 100_000, Optional.of(DataSize.of(16, MEGABYTE)), spillEnabled, succinctBytes(memoryLimitForMerge), succinctBytes(memoryLimitForMergeWithMemory), spillerFactory, joinCompiler, blockTypeOperators, Optional.empty());
Operator operator = operatorFactory.createOperator(driverContext);
toPages(operator, input.iterator(), revokeMemoryWhenAddingPages);
// TODO (https://github.com/trinodb/trino/issues/10596): it should be 0, since operator is finished
assertEquals(getOnlyElement(operator.getOperatorContext().getNestedOperatorStats()).getUserMemoryReservation().toBytes(), spillEnabled && revokeMemoryWhenAddingPages ? 5_322_192 : 0);
assertEquals(getOnlyElement(operator.getOperatorContext().getNestedOperatorStats()).getRevocableMemoryReservation().toBytes(), 0);
}
Aggregations