use of io.trino.operator.join.HashBuilderOperator.HashBuilderOperatorFactory in project trino by trinodb.
the class BenchmarkHashBuildAndJoinOperators method buildHash.
private static void buildHash(BuildContext buildContext, JoinBridgeManager<PartitionedLookupSourceFactory> lookupSourceFactoryManager, List<Integer> outputChannels, int partitionCount) {
HashBuilderOperatorFactory hashBuilderOperatorFactory = new HashBuilderOperatorFactory(HASH_BUILD_OPERATOR_ID, TEST_PLAN_NODE_ID, lookupSourceFactoryManager, outputChannels, buildContext.getHashChannels(), buildContext.getHashChannel(), Optional.empty(), Optional.empty(), ImmutableList.of(), 10_000, new PagesIndex.TestingFactory(false), false, SingleStreamSpillerFactory.unsupportedSingleStreamSpillerFactory(), incrementalLoadFactorHashArraySizeSupplier(buildContext.getSession()));
Operator[] operators = IntStream.range(0, partitionCount).mapToObj(i -> buildContext.createTaskContext().addPipelineContext(0, true, true, partitionCount > 1).addDriverContext()).map(hashBuilderOperatorFactory::createOperator).toArray(Operator[]::new);
if (partitionCount == 1) {
for (Page page : buildContext.getBuildPages()) {
operators[0].addInput(page);
}
} else {
PartitionFunction partitionGenerator = new LocalPartitionGenerator(new InterpretedHashGenerator(buildContext.getHashChannels().stream().map(channel -> buildContext.getTypes().get(channel)).collect(toImmutableList()), buildContext.getHashChannels(), TYPE_OPERATOR_FACTORY), partitionCount);
for (Page page : buildContext.getBuildPages()) {
Page[] partitionedPages = partitionPages(page, buildContext.getTypes(), partitionCount, partitionGenerator);
for (int i = 0; i < partitionCount; i++) {
operators[i].addInput(partitionedPages[i]);
}
}
}
LookupSourceFactory lookupSourceFactory = lookupSourceFactoryManager.getJoinBridge(Lifespan.taskWide());
ListenableFuture<LookupSourceProvider> lookupSourceProvider = lookupSourceFactory.createLookupSourceProvider();
for (Operator operator : operators) {
operator.finish();
}
if (!lookupSourceProvider.isDone()) {
throw new AssertionError("Expected lookup source provider to be ready");
}
getFutureValue(lookupSourceProvider).close();
}
use of io.trino.operator.join.HashBuilderOperator.HashBuilderOperatorFactory in project trino by trinodb.
the class HashBuildAndJoinBenchmark method createDrivers.
/*
select orderkey, quantity, totalprice
from lineitem join orders using (orderkey)
*/
@Override
protected List<Driver> createDrivers(TaskContext taskContext) {
ImmutableList.Builder<OperatorFactory> driversBuilder = ImmutableList.builder();
driversBuilder.add(ordersTableScan);
List<Type> sourceTypes = ordersTableTypes;
OptionalInt hashChannel = OptionalInt.empty();
if (hashEnabled) {
driversBuilder.add(createHashProjectOperator(1, new PlanNodeId("test"), sourceTypes));
sourceTypes = ImmutableList.<Type>builder().addAll(sourceTypes).add(BIGINT).build();
hashChannel = OptionalInt.of(sourceTypes.size() - 1);
}
// hash build
BlockTypeOperators blockTypeOperators = new BlockTypeOperators(new TypeOperators());
JoinBridgeManager<PartitionedLookupSourceFactory> lookupSourceFactoryManager = JoinBridgeManager.lookupAllAtOnce(new PartitionedLookupSourceFactory(sourceTypes, ImmutableList.of(0, 1).stream().map(sourceTypes::get).collect(toImmutableList()), Ints.asList(0).stream().map(sourceTypes::get).collect(toImmutableList()), 1, false, blockTypeOperators));
HashBuilderOperatorFactory hashBuilder = new HashBuilderOperatorFactory(2, new PlanNodeId("test"), lookupSourceFactoryManager, ImmutableList.of(0, 1), Ints.asList(0), hashChannel, Optional.empty(), Optional.empty(), ImmutableList.of(), 1_500_000, new PagesIndex.TestingFactory(false), false, SingleStreamSpillerFactory.unsupportedSingleStreamSpillerFactory(), incrementalLoadFactorHashArraySizeSupplier(session));
driversBuilder.add(hashBuilder);
DriverFactory hashBuildDriverFactory = new DriverFactory(0, true, false, driversBuilder.build(), OptionalInt.empty(), UNGROUPED_EXECUTION);
// join
ImmutableList.Builder<OperatorFactory> joinDriversBuilder = ImmutableList.builder();
joinDriversBuilder.add(lineItemTableScan);
sourceTypes = lineItemTableTypes;
hashChannel = OptionalInt.empty();
if (hashEnabled) {
joinDriversBuilder.add(createHashProjectOperator(1, new PlanNodeId("test"), sourceTypes));
sourceTypes = ImmutableList.<Type>builder().addAll(sourceTypes).add(BIGINT).build();
hashChannel = OptionalInt.of(sourceTypes.size() - 1);
}
OperatorFactory joinOperator = operatorFactories.innerJoin(2, new PlanNodeId("test"), lookupSourceFactoryManager, false, false, false, sourceTypes, Ints.asList(0), hashChannel, Optional.empty(), OptionalInt.empty(), unsupportedPartitioningSpillerFactory(), blockTypeOperators);
joinDriversBuilder.add(joinOperator);
joinDriversBuilder.add(new NullOutputOperatorFactory(3, new PlanNodeId("test")));
DriverFactory joinDriverFactory = new DriverFactory(1, true, true, joinDriversBuilder.build(), OptionalInt.empty(), UNGROUPED_EXECUTION);
Driver hashBuildDriver = hashBuildDriverFactory.createDriver(taskContext.addPipelineContext(0, true, false, false).addDriverContext());
hashBuildDriverFactory.noMoreDrivers();
Driver joinDriver = joinDriverFactory.createDriver(taskContext.addPipelineContext(1, true, true, false).addDriverContext());
joinDriverFactory.noMoreDrivers();
return ImmutableList.of(hashBuildDriver, joinDriver);
}
use of io.trino.operator.join.HashBuilderOperator.HashBuilderOperatorFactory in project trino by trinodb.
the class JoinTestUtils method setupBuildSide.
public static BuildSideSetup setupBuildSide(NodePartitioningManager nodePartitioningManager, boolean parallelBuild, TaskContext taskContext, RowPagesBuilder buildPages, Optional<InternalJoinFilterFunction> filterFunction, boolean spillEnabled, SingleStreamSpillerFactory singleStreamSpillerFactory) {
Optional<JoinFilterFunctionCompiler.JoinFilterFunctionFactory> filterFunctionFactory = filterFunction.map(function -> (session, addresses, pages) -> new StandardJoinFilterFunction(function, addresses, pages));
int partitionCount = parallelBuild ? PARTITION_COUNT : 1;
List<Integer> hashChannels = buildPages.getHashChannels().orElseThrow();
LocalExchange.LocalExchangeFactory localExchangeFactory = new LocalExchange.LocalExchangeFactory(nodePartitioningManager, taskContext.getSession(), FIXED_HASH_DISTRIBUTION, partitionCount, buildPages.getTypes(), hashChannels, buildPages.getHashChannel(), UNGROUPED_EXECUTION, DataSize.of(32, DataSize.Unit.MEGABYTE), TYPE_OPERATOR_FACTORY);
LocalExchange.LocalExchangeSinkFactoryId localExchangeSinkFactoryId = localExchangeFactory.newSinkFactoryId();
localExchangeFactory.noMoreSinkFactories();
// collect input data into the partitioned exchange
DriverContext collectDriverContext = taskContext.addPipelineContext(0, true, true, false).addDriverContext();
ValuesOperator.ValuesOperatorFactory valuesOperatorFactory = new ValuesOperator.ValuesOperatorFactory(0, new PlanNodeId("values"), buildPages.build());
LocalExchangeSinkOperator.LocalExchangeSinkOperatorFactory sinkOperatorFactory = new LocalExchangeSinkOperator.LocalExchangeSinkOperatorFactory(localExchangeFactory, 1, new PlanNodeId("sink"), localExchangeSinkFactoryId, Function.identity());
Driver sourceDriver = Driver.createDriver(collectDriverContext, valuesOperatorFactory.createOperator(collectDriverContext), sinkOperatorFactory.createOperator(collectDriverContext));
valuesOperatorFactory.noMoreOperators();
sinkOperatorFactory.noMoreOperators();
while (!sourceDriver.isFinished()) {
sourceDriver.process();
}
// build side operator factories
LocalExchangeSourceOperatorFactory sourceOperatorFactory = new LocalExchangeSourceOperatorFactory(0, new PlanNodeId("source"), localExchangeFactory);
JoinBridgeManager<PartitionedLookupSourceFactory> lookupSourceFactoryManager = JoinBridgeManager.lookupAllAtOnce(new PartitionedLookupSourceFactory(buildPages.getTypes(), rangeList(buildPages.getTypes().size()).stream().map(buildPages.getTypes()::get).collect(toImmutableList()), hashChannels.stream().map(buildPages.getTypes()::get).collect(toImmutableList()), partitionCount, false, TYPE_OPERATOR_FACTORY));
HashBuilderOperatorFactory buildOperatorFactory = new HashBuilderOperatorFactory(1, new PlanNodeId("build"), lookupSourceFactoryManager, rangeList(buildPages.getTypes().size()), hashChannels, buildPages.getHashChannel().map(OptionalInt::of).orElse(OptionalInt.empty()), filterFunctionFactory, Optional.empty(), ImmutableList.of(), 100, new PagesIndex.TestingFactory(false), spillEnabled, singleStreamSpillerFactory, incrementalLoadFactorHashArraySizeSupplier(taskContext.getSession()));
return new BuildSideSetup(lookupSourceFactoryManager, buildOperatorFactory, sourceOperatorFactory, partitionCount);
}
use of io.trino.operator.join.HashBuilderOperator.HashBuilderOperatorFactory in project trino by trinodb.
the class HashBuildBenchmark method createDrivers.
@Override
protected List<Driver> createDrivers(TaskContext taskContext) {
// hash build
List<Type> ordersTypes = getColumnTypes("orders", "orderkey", "totalprice");
OperatorFactory ordersTableScan = createTableScanOperator(0, new PlanNodeId("test"), "orders", "orderkey", "totalprice");
BlockTypeOperators blockTypeOperators = new BlockTypeOperators(new TypeOperators());
JoinBridgeManager<PartitionedLookupSourceFactory> lookupSourceFactoryManager = JoinBridgeManager.lookupAllAtOnce(new PartitionedLookupSourceFactory(ordersTypes, ImmutableList.of(0, 1).stream().map(ordersTypes::get).collect(toImmutableList()), Ints.asList(0).stream().map(ordersTypes::get).collect(toImmutableList()), 1, false, blockTypeOperators));
HashBuilderOperatorFactory hashBuilder = new HashBuilderOperatorFactory(1, new PlanNodeId("test"), lookupSourceFactoryManager, ImmutableList.of(0, 1), Ints.asList(0), OptionalInt.empty(), Optional.empty(), Optional.empty(), ImmutableList.of(), 1_500_000, new PagesIndex.TestingFactory(false), false, SingleStreamSpillerFactory.unsupportedSingleStreamSpillerFactory(), incrementalLoadFactorHashArraySizeSupplier(session));
DriverFactory hashBuildDriverFactory = new DriverFactory(0, true, true, ImmutableList.of(ordersTableScan, hashBuilder), OptionalInt.empty(), UNGROUPED_EXECUTION);
// empty join so build finishes
ImmutableList.Builder<OperatorFactory> joinDriversBuilder = ImmutableList.builder();
joinDriversBuilder.add(new ValuesOperatorFactory(0, new PlanNodeId("values"), ImmutableList.of()));
OperatorFactory joinOperator = operatorFactories.innerJoin(2, new PlanNodeId("test"), lookupSourceFactoryManager, false, false, false, ImmutableList.of(BIGINT), Ints.asList(0), OptionalInt.empty(), Optional.empty(), OptionalInt.empty(), unsupportedPartitioningSpillerFactory(), blockTypeOperators);
joinDriversBuilder.add(joinOperator);
joinDriversBuilder.add(new NullOutputOperatorFactory(3, new PlanNodeId("test")));
DriverFactory joinDriverFactory = new DriverFactory(1, true, true, joinDriversBuilder.build(), OptionalInt.empty(), UNGROUPED_EXECUTION);
Driver hashBuildDriver = hashBuildDriverFactory.createDriver(taskContext.addPipelineContext(0, true, true, false).addDriverContext());
hashBuildDriverFactory.noMoreDrivers();
Driver joinDriver = joinDriverFactory.createDriver(taskContext.addPipelineContext(1, true, true, false).addDriverContext());
joinDriverFactory.noMoreDrivers();
return ImmutableList.of(hashBuildDriver, joinDriver);
}
use of io.trino.operator.join.HashBuilderOperator.HashBuilderOperatorFactory in project trino by trinodb.
the class HashJoinBenchmark method createDrivers.
/*
select orderkey, quantity, totalprice
from lineitem join orders using (orderkey)
*/
@Override
protected List<Driver> createDrivers(TaskContext taskContext) {
if (probeDriverFactory == null) {
List<Type> ordersTypes = getColumnTypes("orders", "orderkey", "totalprice");
OperatorFactory ordersTableScan = createTableScanOperator(0, new PlanNodeId("test"), "orders", "orderkey", "totalprice");
BlockTypeOperators blockTypeOperators = new BlockTypeOperators(new TypeOperators());
JoinBridgeManager<PartitionedLookupSourceFactory> lookupSourceFactoryManager = JoinBridgeManager.lookupAllAtOnce(new PartitionedLookupSourceFactory(ordersTypes, ImmutableList.of(0, 1).stream().map(ordersTypes::get).collect(toImmutableList()), Ints.asList(0).stream().map(ordersTypes::get).collect(toImmutableList()), 1, false, blockTypeOperators));
HashBuilderOperatorFactory hashBuilder = new HashBuilderOperatorFactory(1, new PlanNodeId("test"), lookupSourceFactoryManager, ImmutableList.of(0, 1), Ints.asList(0), OptionalInt.empty(), Optional.empty(), Optional.empty(), ImmutableList.of(), 1_500_000, new PagesIndex.TestingFactory(false), false, SingleStreamSpillerFactory.unsupportedSingleStreamSpillerFactory(), incrementalLoadFactorHashArraySizeSupplier(session));
DriverContext driverContext = taskContext.addPipelineContext(0, false, false, false).addDriverContext();
DriverFactory buildDriverFactory = new DriverFactory(0, false, false, ImmutableList.of(ordersTableScan, hashBuilder), OptionalInt.empty(), UNGROUPED_EXECUTION);
List<Type> lineItemTypes = getColumnTypes("lineitem", "orderkey", "quantity");
OperatorFactory lineItemTableScan = createTableScanOperator(0, new PlanNodeId("test"), "lineitem", "orderkey", "quantity");
OperatorFactory joinOperator = operatorFactories.innerJoin(1, new PlanNodeId("test"), lookupSourceFactoryManager, false, false, false, lineItemTypes, Ints.asList(0), OptionalInt.empty(), Optional.empty(), OptionalInt.empty(), unsupportedPartitioningSpillerFactory(), blockTypeOperators);
NullOutputOperatorFactory output = new NullOutputOperatorFactory(2, new PlanNodeId("test"));
this.probeDriverFactory = new DriverFactory(1, true, true, ImmutableList.of(lineItemTableScan, joinOperator, output), OptionalInt.empty(), UNGROUPED_EXECUTION);
Driver driver = buildDriverFactory.createDriver(driverContext);
Future<LookupSourceProvider> lookupSourceProvider = lookupSourceFactoryManager.getJoinBridge(Lifespan.taskWide()).createLookupSourceProvider();
while (!lookupSourceProvider.isDone()) {
driver.process();
}
getFutureValue(lookupSourceProvider).close();
}
DriverContext driverContext = taskContext.addPipelineContext(1, true, true, false).addDriverContext();
Driver driver = probeDriverFactory.createDriver(driverContext);
return ImmutableList.of(driver);
}
Aggregations