use of io.trino.operator.OperatorFactory in project trino by trinodb.
the class TestHashJoinOperator method testProbeOuterJoinWithFilterFunction.
@Test(dataProvider = "hashJoinTestValues")
public void testProbeOuterJoinWithFilterFunction(boolean parallelBuild, boolean probeHashEnabled, boolean buildHashEnabled) {
TaskContext taskContext = createTaskContext();
InternalJoinFilterFunction filterFunction = new TestInternalJoinFilterFunction(((leftPosition, leftPage, rightPosition, rightPage) -> BIGINT.getLong(rightPage.getBlock(1), rightPosition) >= 1025));
// build factory
List<Type> buildTypes = ImmutableList.of(VARCHAR, BIGINT, BIGINT);
RowPagesBuilder buildPages = rowPagesBuilder(buildHashEnabled, Ints.asList(0), ImmutableList.of(VARCHAR, BIGINT, BIGINT)).addSequencePage(10, 20, 30, 40);
BuildSideSetup buildSideSetup = setupBuildSide(nodePartitioningManager, parallelBuild, taskContext, buildPages, Optional.of(filterFunction), false, SINGLE_STREAM_SPILLER_FACTORY);
JoinBridgeManager<PartitionedLookupSourceFactory> lookupSourceFactory = buildSideSetup.getLookupSourceFactoryManager();
// probe factory
List<Type> probeTypes = ImmutableList.of(VARCHAR, BIGINT, BIGINT);
RowPagesBuilder probePages = rowPagesBuilder(probeHashEnabled, Ints.asList(0), probeTypes);
List<Page> probeInput = probePages.addSequencePage(15, 20, 1020, 2020).build();
OperatorFactory joinOperatorFactory = probeOuterJoinOperatorFactory(lookupSourceFactory, probePages, true);
// build drivers and operators
instantiateBuildDrivers(buildSideSetup, taskContext);
buildLookupSource(executor, buildSideSetup);
// expected
MaterializedResult expected = MaterializedResult.resultBuilder(taskContext.getSession(), concat(probeTypes, buildTypes)).row("20", 1020L, 2020L, null, null, null).row("21", 1021L, 2021L, null, null, null).row("22", 1022L, 2022L, null, null, null).row("23", 1023L, 2023L, null, null, null).row("24", 1024L, 2024L, null, null, null).row("25", 1025L, 2025L, "25", 35L, 45L).row("26", 1026L, 2026L, "26", 36L, 46L).row("27", 1027L, 2027L, "27", 37L, 47L).row("28", 1028L, 2028L, "28", 38L, 48L).row("29", 1029L, 2029L, "29", 39L, 49L).row("30", 1030L, 2030L, null, null, null).row("31", 1031L, 2031L, null, null, null).row("32", 1032L, 2032L, null, null, null).row("33", 1033L, 2033L, null, null, null).row("34", 1034L, 2034L, null, null, null).build();
assertOperatorEquals(joinOperatorFactory, taskContext.addPipelineContext(0, true, true, false).addDriverContext(), probeInput, expected, true, getHashChannels(probePages, buildPages));
}
use of io.trino.operator.OperatorFactory in project trino by trinodb.
the class TestHashJoinOperator method testProbeOuterJoin.
@Test(dataProvider = "hashJoinTestValues")
public void testProbeOuterJoin(boolean parallelBuild, boolean probeHashEnabled, boolean buildHashEnabled) {
TaskContext taskContext = createTaskContext();
// build factory
List<Type> buildTypes = ImmutableList.of(VARCHAR, BIGINT, BIGINT);
RowPagesBuilder buildPages = rowPagesBuilder(buildHashEnabled, Ints.asList(0), ImmutableList.of(VARCHAR, BIGINT, BIGINT)).addSequencePage(10, 20, 30, 40);
BuildSideSetup buildSideSetup = setupBuildSide(nodePartitioningManager, parallelBuild, taskContext, buildPages, Optional.empty(), false, SINGLE_STREAM_SPILLER_FACTORY);
JoinBridgeManager<PartitionedLookupSourceFactory> lookupSourceFactory = buildSideSetup.getLookupSourceFactoryManager();
// probe factory
List<Type> probeTypes = ImmutableList.of(VARCHAR, BIGINT, BIGINT);
RowPagesBuilder probePages = rowPagesBuilder(probeHashEnabled, Ints.asList(0), probeTypes);
List<Page> probeInput = probePages.addSequencePage(15, 20, 1020, 2020).build();
OperatorFactory joinOperatorFactory = probeOuterJoinOperatorFactory(lookupSourceFactory, probePages, false);
// build drivers and operators
instantiateBuildDrivers(buildSideSetup, taskContext);
buildLookupSource(executor, buildSideSetup);
// expected
MaterializedResult expected = MaterializedResult.resultBuilder(taskContext.getSession(), concat(probeTypes, buildTypes)).row("20", 1020L, 2020L, "20", 30L, 40L).row("21", 1021L, 2021L, "21", 31L, 41L).row("22", 1022L, 2022L, "22", 32L, 42L).row("23", 1023L, 2023L, "23", 33L, 43L).row("24", 1024L, 2024L, "24", 34L, 44L).row("25", 1025L, 2025L, "25", 35L, 45L).row("26", 1026L, 2026L, "26", 36L, 46L).row("27", 1027L, 2027L, "27", 37L, 47L).row("28", 1028L, 2028L, "28", 38L, 48L).row("29", 1029L, 2029L, "29", 39L, 49L).row("30", 1030L, 2030L, null, null, null).row("31", 1031L, 2031L, null, null, null).row("32", 1032L, 2032L, null, null, null).row("33", 1033L, 2033L, null, null, null).row("34", 1034L, 2034L, null, null, null).build();
assertOperatorEquals(joinOperatorFactory, taskContext.addPipelineContext(0, true, true, false).addDriverContext(), probeInput, expected, true, getHashChannels(probePages, buildPages));
}
use of io.trino.operator.OperatorFactory in project trino by trinodb.
the class TestHashJoinOperator method testProbeOuterJoinWithEmptyLookupSource.
@Test(dataProvider = "hashJoinTestValues")
public void testProbeOuterJoinWithEmptyLookupSource(boolean parallelBuild, boolean probeHashEnabled, boolean buildHashEnabled) {
TaskContext taskContext = createTaskContext();
// build factory
List<Type> buildTypes = ImmutableList.of(VARCHAR);
RowPagesBuilder buildPages = rowPagesBuilder(buildHashEnabled, Ints.asList(0), buildTypes);
BuildSideSetup buildSideSetup = setupBuildSide(nodePartitioningManager, parallelBuild, taskContext, buildPages, Optional.empty(), false, SINGLE_STREAM_SPILLER_FACTORY);
JoinBridgeManager<PartitionedLookupSourceFactory> lookupSourceFactoryManager = buildSideSetup.getLookupSourceFactoryManager();
// probe factory
List<Type> probeTypes = ImmutableList.of(VARCHAR);
RowPagesBuilder probePages = rowPagesBuilder(probeHashEnabled, Ints.asList(0), probeTypes);
List<Page> probeInput = probePages.row("a").row("b").row((String) null).row("c").build();
OperatorFactory joinOperatorFactory = operatorFactories.probeOuterJoin(0, new PlanNodeId("test"), lookupSourceFactoryManager, false, false, probePages.getTypes(), Ints.asList(0), getHashChannelAsInt(probePages), Optional.empty(), OptionalInt.of(1), PARTITIONING_SPILLER_FACTORY, TYPE_OPERATOR_FACTORY);
// build drivers and operators
instantiateBuildDrivers(buildSideSetup, taskContext);
buildLookupSource(executor, buildSideSetup);
// expected
MaterializedResult expected = MaterializedResult.resultBuilder(taskContext.getSession(), concat(probeTypes, buildTypes)).row("a", null).row("b", null).row(null, null).row("c", null).build();
assertOperatorEquals(joinOperatorFactory, taskContext.addPipelineContext(0, true, true, false).addDriverContext(), probeInput, expected, true, getHashChannels(probePages, buildPages));
}
use of io.trino.operator.OperatorFactory in project trino by trinodb.
the class TestHashJoinOperator method innerJoinWithSpill.
private void innerJoinWithSpill(boolean probeHashEnabled, List<WhenSpill> whenSpill, SingleStreamSpillerFactory buildSpillerFactory, PartitioningSpillerFactory joinSpillerFactory) throws Exception {
TaskStateMachine taskStateMachine = new TaskStateMachine(new TaskId(new StageId("query", 0), 0, 0), executor);
TaskContext taskContext = TestingTaskContext.createTaskContext(executor, scheduledExecutor, TEST_SESSION, taskStateMachine);
DriverContext joinDriverContext = taskContext.addPipelineContext(2, true, true, false).addDriverContext();
// force a yield for every match in LookupJoinOperator, set called to true after first
AtomicBoolean called = new AtomicBoolean(false);
InternalJoinFilterFunction filterFunction = new TestInternalJoinFilterFunction((leftPosition, leftPage, rightPosition, rightPage) -> {
called.set(true);
joinDriverContext.getYieldSignal().forceYieldForTesting();
return true;
});
// build factory
RowPagesBuilder buildPages = rowPagesBuilder(false, Ints.asList(0), ImmutableList.of(VARCHAR, BIGINT)).addSequencePage(4, 20, 200).addSequencePage(4, 20, 200).addSequencePage(4, 30, 300).addSequencePage(4, 40, 400);
BuildSideSetup buildSideSetup = setupBuildSide(nodePartitioningManager, true, taskContext, buildPages, Optional.of(filterFunction), true, buildSpillerFactory);
JoinBridgeManager<PartitionedLookupSourceFactory> lookupSourceFactoryManager = buildSideSetup.getLookupSourceFactoryManager();
// probe factory
RowPagesBuilder probePages = rowPagesBuilder(probeHashEnabled, Ints.asList(0), ImmutableList.of(VARCHAR, BIGINT)).row("20", 123_000L).row("20", 123_000L).pageBreak().addSequencePage(20, 0, 123_000).addSequencePage(10, 30, 123_000);
OperatorFactory joinOperatorFactory = innerJoinOperatorFactory(operatorFactories, lookupSourceFactoryManager, probePages, joinSpillerFactory, true);
// build drivers and operators
instantiateBuildDrivers(buildSideSetup, taskContext);
List<Driver> buildDrivers = buildSideSetup.getBuildDrivers();
int buildOperatorCount = buildDrivers.size();
checkState(buildOperatorCount == whenSpill.size());
LookupSourceFactory lookupSourceFactory = lookupSourceFactoryManager.getJoinBridge(Lifespan.taskWide());
try (Operator joinOperator = joinOperatorFactory.createOperator(joinDriverContext)) {
// build lookup source
ListenableFuture<LookupSourceProvider> lookupSourceProvider = lookupSourceFactory.createLookupSourceProvider();
List<Boolean> revoked = new ArrayList<>(nCopies(buildOperatorCount, false));
while (!lookupSourceProvider.isDone()) {
for (int i = 0; i < buildOperatorCount; i++) {
checkErrors(taskStateMachine);
buildDrivers.get(i).process();
HashBuilderOperator buildOperator = buildSideSetup.getBuildOperators().get(i);
if (whenSpill.get(i) == WhenSpill.DURING_BUILD && buildOperator.getOperatorContext().getReservedRevocableBytes() > 0) {
checkState(!lookupSourceProvider.isDone(), "Too late, LookupSource already done");
revokeMemory(buildOperator);
revoked.set(i, true);
}
}
}
getFutureValue(lookupSourceProvider).close();
assertEquals(revoked, whenSpill.stream().map(WhenSpill.DURING_BUILD::equals).collect(toImmutableList()), "Some operators not spilled before LookupSource built");
for (int i = 0; i < buildOperatorCount; i++) {
if (whenSpill.get(i) == WhenSpill.AFTER_BUILD) {
revokeMemory(buildSideSetup.getBuildOperators().get(i));
}
}
for (Driver buildDriver : buildDrivers) {
runDriverInThread(executor, buildDriver);
}
ValuesOperatorFactory valuesOperatorFactory = new ValuesOperatorFactory(17, new PlanNodeId("values"), probePages.build());
PageBuffer pageBuffer = new PageBuffer(10);
PageBufferOperatorFactory pageBufferOperatorFactory = new PageBufferOperatorFactory(18, new PlanNodeId("pageBuffer"), pageBuffer);
Driver joinDriver = Driver.createDriver(joinDriverContext, valuesOperatorFactory.createOperator(joinDriverContext), joinOperator, pageBufferOperatorFactory.createOperator(joinDriverContext));
while (!called.get()) {
// process first row of first page of LookupJoinOperator
processRow(joinDriver, taskStateMachine);
}
for (int i = 0; i < buildOperatorCount; i++) {
if (whenSpill.get(i) == WhenSpill.DURING_USAGE) {
triggerMemoryRevokingAndWait(buildSideSetup.getBuildOperators().get(i), taskStateMachine);
}
}
// process remaining LookupJoinOperator pages
while (!joinDriver.isFinished()) {
checkErrors(taskStateMachine);
processRow(joinDriver, taskStateMachine);
}
checkErrors(taskStateMachine);
List<Page> actualPages = getPages(pageBuffer);
MaterializedResult expected = MaterializedResult.resultBuilder(taskContext.getSession(), concat(probePages.getTypesWithoutHash(), buildPages.getTypesWithoutHash())).row("20", 123_000L, "20", 200L).row("20", 123_000L, "20", 200L).row("20", 123_000L, "20", 200L).row("20", 123_000L, "20", 200L).row("30", 123_000L, "30", 300L).row("31", 123_001L, "31", 301L).row("32", 123_002L, "32", 302L).row("33", 123_003L, "33", 303L).build();
assertEqualsIgnoreOrder(getProperColumns(joinOperator, concat(probePages.getTypes(), buildPages.getTypes()), probePages, actualPages).getMaterializedRows(), expected.getMaterializedRows());
} finally {
joinOperatorFactory.noMoreOperators();
}
}
use of io.trino.operator.OperatorFactory in project trino by trinodb.
the class TestHashJoinOperator method testOuterJoinWithNullBuild.
@Test(dataProvider = "hashJoinTestValues")
public void testOuterJoinWithNullBuild(boolean parallelBuild, boolean probeHashEnabled, boolean buildHashEnabled) {
TaskContext taskContext = createTaskContext();
// build factory
List<Type> buildTypes = ImmutableList.of(VARCHAR);
RowPagesBuilder buildPages = rowPagesBuilder(buildHashEnabled, Ints.asList(0), ImmutableList.of(VARCHAR)).row("a").row((String) null).row((String) null).row("a").row("b");
BuildSideSetup buildSideSetup = setupBuildSide(nodePartitioningManager, parallelBuild, taskContext, buildPages, Optional.empty(), false, SINGLE_STREAM_SPILLER_FACTORY);
JoinBridgeManager<PartitionedLookupSourceFactory> lookupSourceFactory = buildSideSetup.getLookupSourceFactoryManager();
// probe factory
List<Type> probeTypes = ImmutableList.of(VARCHAR);
RowPagesBuilder probePages = rowPagesBuilder(probeHashEnabled, Ints.asList(0), probeTypes);
List<Page> probeInput = probePages.row("a").row("b").row("c").build();
OperatorFactory joinOperatorFactory = probeOuterJoinOperatorFactory(lookupSourceFactory, probePages, false);
// build drivers and operators
instantiateBuildDrivers(buildSideSetup, taskContext);
buildLookupSource(executor, buildSideSetup);
// expected
MaterializedResult expected = MaterializedResult.resultBuilder(taskContext.getSession(), concat(probeTypes, buildTypes)).row("a", "a").row("a", "a").row("b", "b").row("c", null).build();
assertOperatorEquals(joinOperatorFactory, taskContext.addPipelineContext(0, true, true, false).addDriverContext(), probeInput, expected, true, getHashChannels(probePages, buildPages));
}
Aggregations