Examples with HashAggregationOperatorFactory - io.trino.operator.HashAggregationOperator.HashAggregationOperatorFactory

Example 1 with HashAggregationOperatorFactory

use of io.trino.operator.HashAggregationOperator.HashAggregationOperatorFactory in project trino by trinodb.

the class TestHashAggregationOperator method testAdaptivePartialAggregationTriggeredOnlyOnFlush.

@Test
public void testAdaptivePartialAggregationTriggeredOnlyOnFlush() {
    List<Integer> hashChannels = Ints.asList(0);
    PartialAggregationController partialAggregationController = new PartialAggregationController(5, 0.8);
    HashAggregationOperatorFactory operatorFactory = new HashAggregationOperatorFactory(0, new PlanNodeId("test"), ImmutableList.of(BIGINT), hashChannels, ImmutableList.of(), PARTIAL, ImmutableList.of(LONG_MIN.createAggregatorFactory(PARTIAL, ImmutableList.of(0), OptionalInt.empty())), Optional.empty(), Optional.empty(), 10, // this setting makes operator to flush only after all pages
    Optional.of(DataSize.of(16, MEGABYTE)), joinCompiler, blockTypeOperators, // use 5 rows threshold to trigger adaptive partial aggregation after each page flush
    Optional.of(partialAggregationController));
    List<Page> operator1Input = rowPagesBuilder(false, hashChannels, BIGINT).addSequencePage(10, // first page are unique values, so it would trigger adaptation, but it won't because flush is not called
    0).addBlocksPage(// second page will be hashed to existing value 1
    createRLEBlock(1, 2)).build();
    // the total unique ows ratio for the first operator will be 10/12 so > 0.8 (adaptive partial aggregation uniqueRowsRatioThreshold)
    List<Page> operator1Expected = rowPagesBuilder(BIGINT, BIGINT).addSequencePage(10, 0, // we are expecting second page to be squashed with the first
    0).build();
    assertOperatorEquals(operatorFactory, operator1Input, operator1Expected);
    // the first operator flush disables partial aggregation
    assertTrue(partialAggregationController.isPartialAggregationDisabled());
    // second operator using the same factory, reuses PartialAggregationControl, so it will only produce raw pages (partial aggregation is disabled at this point)
    List<Page> operator2Input = rowPagesBuilder(false, hashChannels, BIGINT).addBlocksPage(createRLEBlock(1, 10)).addBlocksPage(createRLEBlock(2, 10)).build();
    List<Page> operator2Expected = rowPagesBuilder(BIGINT, BIGINT).addBlocksPage(createRLEBlock(1, 10), createRLEBlock(1, 10)).addBlocksPage(createRLEBlock(2, 10), createRLEBlock(2, 10)).build();
    assertOperatorEquals(operatorFactory, operator2Input, operator2Expected);
}

Also used : PlanNodeId(io.trino.sql.planner.plan.PlanNodeId) PartialAggregationController(io.trino.operator.aggregation.partial.PartialAggregationController) Page(io.trino.spi.Page) HashAggregationOperatorFactory(io.trino.operator.HashAggregationOperator.HashAggregationOperatorFactory) Test(org.testng.annotations.Test)

Example 2 with HashAggregationOperatorFactory

use of io.trino.operator.HashAggregationOperator.HashAggregationOperatorFactory in project trino by trinodb.

the class TestHashAggregationOperator method testAdaptivePartialAggregation.

@Test
public void testAdaptivePartialAggregation() {
    List<Integer> hashChannels = Ints.asList(0);
    PartialAggregationController partialAggregationController = new PartialAggregationController(5, 0.8);
    HashAggregationOperatorFactory operatorFactory = new HashAggregationOperatorFactory(0, new PlanNodeId("test"), ImmutableList.of(BIGINT), hashChannels, ImmutableList.of(), PARTIAL, ImmutableList.of(LONG_MIN.createAggregatorFactory(PARTIAL, ImmutableList.of(0), OptionalInt.empty())), Optional.empty(), Optional.empty(), 100, // this setting makes operator to flush after each page
    Optional.of(DataSize.ofBytes(1)), joinCompiler, blockTypeOperators, // use 5 rows threshold to trigger adaptive partial aggregation after each page flush
    Optional.of(partialAggregationController));
    // at the start partial aggregation is enabled
    assertFalse(partialAggregationController.isPartialAggregationDisabled());
    // First operator will trigger adaptive partial aggregation after the first page
    List<Page> operator1Input = rowPagesBuilder(false, hashChannels, BIGINT).addBlocksPage(// first page will be hashed but the values are almost unique, so it will trigger adaptation
    createLongsBlock(0, 1, 2, 3, 4, 5, 6, 7, 8, 8)).addBlocksPage(// second page would be hashed to existing value 1. but if adaptive PA kicks in, the raw values will be passed on
    createRLEBlock(1, 10)).build();
    List<Page> operator1Expected = rowPagesBuilder(BIGINT, BIGINT).addBlocksPage(createLongsBlock(0, 1, 2, 3, 4, 5, 6, 7, 8), // the last position was aggregated
    createLongsBlock(0, 1, 2, 3, 4, 5, 6, 7, 8)).addBlocksPage(createRLEBlock(1, 10), // we are expecting second page with raw values
    createRLEBlock(1, 10)).build();
    assertOperatorEquals(operatorFactory, operator1Input, operator1Expected);
    // the first operator flush disables partial aggregation
    assertTrue(partialAggregationController.isPartialAggregationDisabled());
    // second operator using the same factory, reuses PartialAggregationControl, so it will only produce raw pages (partial aggregation is disabled at this point)
    List<Page> operator2Input = rowPagesBuilder(false, hashChannels, BIGINT).addBlocksPage(createRLEBlock(1, 10)).addBlocksPage(createRLEBlock(2, 10)).build();
    List<Page> operator2Expected = rowPagesBuilder(BIGINT, BIGINT).addBlocksPage(createRLEBlock(1, 10), createRLEBlock(1, 10)).addBlocksPage(createRLEBlock(2, 10), createRLEBlock(2, 10)).build();
    assertOperatorEquals(operatorFactory, operator2Input, operator2Expected);
}

Example 3 with HashAggregationOperatorFactory

use of io.trino.operator.HashAggregationOperator.HashAggregationOperatorFactory in project trino by trinodb.

the class TestHashAggregationOperator method testHashAggregationWithGlobals.

@Test(dataProvider = "hashEnabledAndMemoryLimitForMergeValues")
public void testHashAggregationWithGlobals(boolean hashEnabled, boolean spillEnabled, boolean revokeMemoryWhenAddingPages, long memoryLimitForMerge, long memoryLimitForMergeWithMemory) {
    TestingAggregationFunction countVarcharColumn = FUNCTION_RESOLUTION.getAggregateFunction(QualifiedName.of("count"), fromTypes(VARCHAR));
    TestingAggregationFunction countBooleanColumn = FUNCTION_RESOLUTION.getAggregateFunction(QualifiedName.of("count"), fromTypes(BOOLEAN));
    TestingAggregationFunction maxVarcharColumn = FUNCTION_RESOLUTION.getAggregateFunction(QualifiedName.of("max"), fromTypes(VARCHAR));
    Optional<Integer> groupIdChannel = Optional.of(1);
    List<Integer> groupByChannels = Ints.asList(1, 2);
    List<Integer> globalAggregationGroupIds = Ints.asList(42, 49);
    RowPagesBuilder rowPagesBuilder = rowPagesBuilder(hashEnabled, groupByChannels, VARCHAR, VARCHAR, VARCHAR, BIGINT, BIGINT, BOOLEAN);
    List<Page> input = rowPagesBuilder.build();
    HashAggregationOperatorFactory operatorFactory = new HashAggregationOperatorFactory(0, new PlanNodeId("test"), ImmutableList.of(VARCHAR, BIGINT), groupByChannels, globalAggregationGroupIds, SINGLE, true, ImmutableList.of(COUNT.createAggregatorFactory(SINGLE, ImmutableList.of(0), OptionalInt.empty()), LONG_MIN.createAggregatorFactory(SINGLE, ImmutableList.of(4), OptionalInt.empty()), LONG_AVERAGE.createAggregatorFactory(SINGLE, ImmutableList.of(4), OptionalInt.empty()), maxVarcharColumn.createAggregatorFactory(SINGLE, ImmutableList.of(2), OptionalInt.empty()), countVarcharColumn.createAggregatorFactory(SINGLE, ImmutableList.of(0), OptionalInt.empty()), countBooleanColumn.createAggregatorFactory(SINGLE, ImmutableList.of(5), OptionalInt.empty())), rowPagesBuilder.getHashChannel(), groupIdChannel, 100_000, Optional.of(DataSize.of(16, MEGABYTE)), spillEnabled, succinctBytes(memoryLimitForMerge), succinctBytes(memoryLimitForMergeWithMemory), spillerFactory, joinCompiler, blockTypeOperators, Optional.empty());
    DriverContext driverContext = createDriverContext(memoryLimitForMerge);
    MaterializedResult expected = resultBuilder(driverContext.getSession(), VARCHAR, BIGINT, BIGINT, BIGINT, DOUBLE, VARCHAR, BIGINT, BIGINT).row(null, 42L, 0L, null, null, null, 0L, 0L).row(null, 49L, 0L, null, null, null, 0L, 0L).build();
    assertOperatorEqualsIgnoreOrder(operatorFactory, driverContext, input, expected, hashEnabled, Optional.of(groupByChannels.size()), revokeMemoryWhenAddingPages);
}

Also used : PlanNodeId(io.trino.sql.planner.plan.PlanNodeId) RowPagesBuilder(io.trino.RowPagesBuilder) Page(io.trino.spi.Page) MaterializedResult(io.trino.testing.MaterializedResult) OperatorAssertion.toMaterializedResult(io.trino.operator.OperatorAssertion.toMaterializedResult) TestingAggregationFunction(io.trino.operator.aggregation.TestingAggregationFunction) HashAggregationOperatorFactory(io.trino.operator.HashAggregationOperator.HashAggregationOperatorFactory) Test(org.testng.annotations.Test)

Example 4 with HashAggregationOperatorFactory

use of io.trino.operator.HashAggregationOperator.HashAggregationOperatorFactory in project trino by trinodb.

the class TestHashAggregationOperator method testSpillerFailure.

@Test
public void testSpillerFailure() {
    TestingAggregationFunction maxVarcharColumn = FUNCTION_RESOLUTION.getAggregateFunction(QualifiedName.of("max"), fromTypes(VARCHAR));
    List<Integer> hashChannels = Ints.asList(1);
    ImmutableList<Type> types = ImmutableList.of(VARCHAR, BIGINT, VARCHAR, BIGINT);
    RowPagesBuilder rowPagesBuilder = rowPagesBuilder(false, hashChannels, types);
    List<Page> input = rowPagesBuilder.addSequencePage(10, 100, 0, 100, 0).addSequencePage(10, 100, 0, 200, 0).addSequencePage(10, 100, 0, 300, 0).build();
    DriverContext driverContext = TestingTaskContext.builder(executor, scheduledExecutor, TEST_SESSION).setQueryMaxMemory(DataSize.valueOf("7MB")).setMemoryPoolSize(DataSize.valueOf("1GB")).build().addPipelineContext(0, true, true, false).addDriverContext();
    HashAggregationOperatorFactory operatorFactory = new HashAggregationOperatorFactory(0, new PlanNodeId("test"), ImmutableList.of(BIGINT), hashChannels, ImmutableList.of(), SINGLE, false, ImmutableList.of(COUNT.createAggregatorFactory(SINGLE, ImmutableList.of(0), OptionalInt.empty()), LONG_MIN.createAggregatorFactory(SINGLE, ImmutableList.of(3), OptionalInt.empty()), LONG_AVERAGE.createAggregatorFactory(SINGLE, ImmutableList.of(3), OptionalInt.empty()), maxVarcharColumn.createAggregatorFactory(SINGLE, ImmutableList.of(2), OptionalInt.empty())), rowPagesBuilder.getHashChannel(), Optional.empty(), 100_000, Optional.of(DataSize.of(16, MEGABYTE)), true, succinctBytes(8), succinctBytes(Integer.MAX_VALUE), new FailingSpillerFactory(), joinCompiler, blockTypeOperators, Optional.empty());
    assertThatThrownBy(() -> toPages(operatorFactory, driverContext, input)).isInstanceOf(RuntimeException.class).hasCauseInstanceOf(IOException.class).hasMessageEndingWith("Failed to spill");
}

Also used : RowPagesBuilder(io.trino.RowPagesBuilder) Page(io.trino.spi.Page) IOException(java.io.IOException) TestingAggregationFunction(io.trino.operator.aggregation.TestingAggregationFunction) PlanNodeId(io.trino.sql.planner.plan.PlanNodeId) Type(io.trino.spi.type.Type) HashAggregationOperatorFactory(io.trino.operator.HashAggregationOperator.HashAggregationOperatorFactory) Test(org.testng.annotations.Test)

Example 5 with HashAggregationOperatorFactory

use of io.trino.operator.HashAggregationOperator.HashAggregationOperatorFactory in project trino by trinodb.

the class TestHashAggregationOperator method testHashAggregationMemoryReservation.

@Test(dataProvider = "hashEnabledAndMemoryLimitForMergeValues")
public void testHashAggregationMemoryReservation(boolean hashEnabled, boolean spillEnabled, boolean revokeMemoryWhenAddingPages, long memoryLimitForMerge, long memoryLimitForMergeWithMemory) {
    TestingAggregationFunction arrayAggColumn = FUNCTION_RESOLUTION.getAggregateFunction(QualifiedName.of("array_agg"), fromTypes(BIGINT));
    List<Integer> hashChannels = Ints.asList(1);
    RowPagesBuilder rowPagesBuilder = rowPagesBuilder(hashEnabled, hashChannels, BIGINT, BIGINT);
    List<Page> input = rowPagesBuilder.addSequencePage(10, 100, 0).addSequencePage(10, 200, 0).addSequencePage(10, 300, 0).build();
    DriverContext driverContext = createTaskContext(executor, scheduledExecutor, TEST_SESSION, DataSize.of(11, Unit.MEGABYTE)).addPipelineContext(0, true, true, false).addDriverContext();
    HashAggregationOperatorFactory operatorFactory = new HashAggregationOperatorFactory(0, new PlanNodeId("test"), ImmutableList.of(BIGINT), hashChannels, ImmutableList.of(), SINGLE, true, ImmutableList.of(arrayAggColumn.createAggregatorFactory(SINGLE, ImmutableList.of(0), OptionalInt.empty())), rowPagesBuilder.getHashChannel(), Optional.empty(), 100_000, Optional.of(DataSize.of(16, MEGABYTE)), spillEnabled, succinctBytes(memoryLimitForMerge), succinctBytes(memoryLimitForMergeWithMemory), spillerFactory, joinCompiler, blockTypeOperators, Optional.empty());
    Operator operator = operatorFactory.createOperator(driverContext);
    toPages(operator, input.iterator(), revokeMemoryWhenAddingPages);
    // TODO (https://github.com/trinodb/trino/issues/10596): it should be 0, since operator is finished
    assertEquals(getOnlyElement(operator.getOperatorContext().getNestedOperatorStats()).getUserMemoryReservation().toBytes(), spillEnabled && revokeMemoryWhenAddingPages ? 5_322_192 : 0);
    assertEquals(getOnlyElement(operator.getOperatorContext().getNestedOperatorStats()).getRevocableMemoryReservation().toBytes(), 0);
}

Also used : PlanNodeId(io.trino.sql.planner.plan.PlanNodeId) RowPagesBuilder(io.trino.RowPagesBuilder) Page(io.trino.spi.Page) TestingAggregationFunction(io.trino.operator.aggregation.TestingAggregationFunction) HashAggregationOperatorFactory(io.trino.operator.HashAggregationOperator.HashAggregationOperatorFactory) Test(org.testng.annotations.Test)

Aggregations

HashAggregationOperatorFactory (io.trino.operator.HashAggregationOperator.HashAggregationOperatorFactory)16 PlanNodeId (io.trino.sql.planner.plan.PlanNodeId)16 Page (io.trino.spi.Page)14 Test (org.testng.annotations.Test)14 RowPagesBuilder (io.trino.RowPagesBuilder)11 TestingAggregationFunction (io.trino.operator.aggregation.TestingAggregationFunction)5 OperatorAssertion.toMaterializedResult (io.trino.operator.OperatorAssertion.toMaterializedResult)4 MaterializedResult (io.trino.testing.MaterializedResult)4 OperatorFactory (io.trino.operator.OperatorFactory)2 PartialAggregationController (io.trino.operator.aggregation.partial.PartialAggregationController)2 BlockBuilder (io.trino.spi.block.BlockBuilder)2 Type (io.trino.spi.type.Type)2 JoinCompiler (io.trino.sql.gen.JoinCompiler)2 TpchQuery1OperatorFactory (io.trino.benchmark.HandTpchQuery1.TpchQuery1Operator.TpchQuery1OperatorFactory)1 GroupByHashYieldResult (io.trino.operator.GroupByHashYieldAssertion.GroupByHashYieldResult)1 IOException (java.io.IOException)1 ArrayList (java.util.ArrayList)1