Search in sources :

Example 1 with PartitionFunction

use of io.trino.operator.PartitionFunction in project trino by trinodb.

the class BenchmarkHashBuildAndJoinOperators method buildHash.

private static void buildHash(BuildContext buildContext, JoinBridgeManager<PartitionedLookupSourceFactory> lookupSourceFactoryManager, List<Integer> outputChannels, int partitionCount) {
    HashBuilderOperatorFactory hashBuilderOperatorFactory = new HashBuilderOperatorFactory(HASH_BUILD_OPERATOR_ID, TEST_PLAN_NODE_ID, lookupSourceFactoryManager, outputChannels, buildContext.getHashChannels(), buildContext.getHashChannel(), Optional.empty(), Optional.empty(), ImmutableList.of(), 10_000, new PagesIndex.TestingFactory(false), false, SingleStreamSpillerFactory.unsupportedSingleStreamSpillerFactory(), incrementalLoadFactorHashArraySizeSupplier(buildContext.getSession()));
    Operator[] operators = IntStream.range(0, partitionCount).mapToObj(i -> buildContext.createTaskContext().addPipelineContext(0, true, true, partitionCount > 1).addDriverContext()).map(hashBuilderOperatorFactory::createOperator).toArray(Operator[]::new);
    if (partitionCount == 1) {
        for (Page page : buildContext.getBuildPages()) {
            operators[0].addInput(page);
        }
    } else {
        PartitionFunction partitionGenerator = new LocalPartitionGenerator(new InterpretedHashGenerator(buildContext.getHashChannels().stream().map(channel -> buildContext.getTypes().get(channel)).collect(toImmutableList()), buildContext.getHashChannels(), TYPE_OPERATOR_FACTORY), partitionCount);
        for (Page page : buildContext.getBuildPages()) {
            Page[] partitionedPages = partitionPages(page, buildContext.getTypes(), partitionCount, partitionGenerator);
            for (int i = 0; i < partitionCount; i++) {
                operators[i].addInput(partitionedPages[i]);
            }
        }
    }
    LookupSourceFactory lookupSourceFactory = lookupSourceFactoryManager.getJoinBridge(Lifespan.taskWide());
    ListenableFuture<LookupSourceProvider> lookupSourceProvider = lookupSourceFactory.createLookupSourceProvider();
    for (Operator operator : operators) {
        operator.finish();
    }
    if (!lookupSourceProvider.isDone()) {
        throw new AssertionError("Expected lookup source provider to be ready");
    }
    getFutureValue(lookupSourceProvider).close();
}
Also used : Operator(io.trino.operator.Operator) PartitionFunction(io.trino.operator.PartitionFunction) JoinBridgeManager.lookupAllAtOnce(io.trino.operator.join.JoinBridgeManager.lookupAllAtOnce) BenchmarkMode(org.openjdk.jmh.annotations.BenchmarkMode) Arrays(java.util.Arrays) Test(org.testng.annotations.Test) Random(java.util.Random) AverageTime(org.openjdk.jmh.annotations.Mode.AverageTime) TypeOperators(io.trino.spi.type.TypeOperators) Warmup(org.openjdk.jmh.annotations.Warmup) HashBuilderOperatorFactory(io.trino.operator.join.HashBuilderOperator.HashBuilderOperatorFactory) PlanNodeId(io.trino.sql.planner.plan.PlanNodeId) OutputTimeUnit(org.openjdk.jmh.annotations.OutputTimeUnit) Executors.newScheduledThreadPool(java.util.concurrent.Executors.newScheduledThreadPool) TEST_SESSION(io.trino.SessionTestUtils.TEST_SESSION) RowPagesBuilder.rowPagesBuilder(io.trino.RowPagesBuilder.rowPagesBuilder) RunnerException(org.openjdk.jmh.runner.RunnerException) Setup(org.openjdk.jmh.annotations.Setup) Operator(io.trino.operator.Operator) Param(org.openjdk.jmh.annotations.Param) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Benchmarks.benchmark(io.trino.jmh.Benchmarks.benchmark) TrinoOperatorFactories(io.trino.operator.TrinoOperatorFactories) MILLISECONDS(java.util.concurrent.TimeUnit.MILLISECONDS) HashArraySizeSupplier.incrementalLoadFactorHashArraySizeSupplier(io.trino.operator.HashArraySizeSupplier.incrementalLoadFactorHashArraySizeSupplier) String.format(java.lang.String.format) Preconditions.checkState(com.google.common.base.Preconditions.checkState) DataSize(io.airlift.units.DataSize) List(java.util.List) DriverContext(io.trino.operator.DriverContext) BIGINT(io.trino.spi.type.BigintType.BIGINT) RowPagesBuilder(io.trino.RowPagesBuilder) Optional(java.util.Optional) Session(io.trino.Session) IntStream(java.util.stream.IntStream) OperatorFactories(io.trino.operator.OperatorFactories) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) Measurement(org.openjdk.jmh.annotations.Measurement) PageBuilder(io.trino.spi.PageBuilder) Type(io.trino.spi.type.Type) Page(io.trino.spi.Page) Scope(org.openjdk.jmh.annotations.Scope) OptionalInt(java.util.OptionalInt) LocalPartitionGenerator(io.trino.operator.exchange.LocalPartitionGenerator) TestingTaskContext(io.trino.testing.TestingTaskContext) VARCHAR(io.trino.spi.type.VarcharType.VARCHAR) GIGABYTE(io.airlift.units.DataSize.Unit.GIGABYTE) ImmutableList(com.google.common.collect.ImmutableList) Threads.daemonThreadsNamed(io.airlift.concurrent.Threads.daemonThreadsNamed) ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) TaskContext(io.trino.operator.TaskContext) Lifespan(io.trino.execution.Lifespan) Threads(org.openjdk.jmh.annotations.Threads) ExecutorService(java.util.concurrent.ExecutorService) PartitioningSpillerFactory.unsupportedPartitioningSpillerFactory(io.trino.spiller.PartitioningSpillerFactory.unsupportedPartitioningSpillerFactory) OperatorFactory(io.trino.operator.OperatorFactory) PagesIndex(io.trino.operator.PagesIndex) InterpretedHashGenerator(io.trino.operator.InterpretedHashGenerator) PartitionFunction(io.trino.operator.PartitionFunction) BlockTypeOperators(io.trino.type.BlockTypeOperators) Iterator(java.util.Iterator) SingleStreamSpillerFactory(io.trino.spiller.SingleStreamSpillerFactory) State(org.openjdk.jmh.annotations.State) MoreFutures.getFutureValue(io.airlift.concurrent.MoreFutures.getFutureValue) Ints(com.google.common.primitives.Ints) Benchmark(org.openjdk.jmh.annotations.Benchmark) Executors.newCachedThreadPool(java.util.concurrent.Executors.newCachedThreadPool) Fork(org.openjdk.jmh.annotations.Fork) SECONDS(java.util.concurrent.TimeUnit.SECONDS) InterpretedHashGenerator(io.trino.operator.InterpretedHashGenerator) LocalPartitionGenerator(io.trino.operator.exchange.LocalPartitionGenerator) HashBuilderOperatorFactory(io.trino.operator.join.HashBuilderOperator.HashBuilderOperatorFactory) Page(io.trino.spi.Page) PagesIndex(io.trino.operator.PagesIndex)

Example 2 with PartitionFunction

use of io.trino.operator.PartitionFunction in project trino by trinodb.

the class LocalExecutionPlanner method plan.

public LocalExecutionPlan plan(TaskContext taskContext, PlanNode plan, TypeProvider types, PartitioningScheme partitioningScheme, StageExecutionDescriptor stageExecutionDescriptor, List<PlanNodeId> partitionedSourceOrder, OutputBuffer outputBuffer) {
    List<Symbol> outputLayout = partitioningScheme.getOutputLayout();
    if (partitioningScheme.getPartitioning().getHandle().equals(FIXED_BROADCAST_DISTRIBUTION) || partitioningScheme.getPartitioning().getHandle().equals(FIXED_ARBITRARY_DISTRIBUTION) || partitioningScheme.getPartitioning().getHandle().equals(SCALED_WRITER_DISTRIBUTION) || partitioningScheme.getPartitioning().getHandle().equals(SINGLE_DISTRIBUTION) || partitioningScheme.getPartitioning().getHandle().equals(COORDINATOR_DISTRIBUTION)) {
        return plan(taskContext, stageExecutionDescriptor, plan, outputLayout, types, partitionedSourceOrder, new TaskOutputFactory(outputBuffer));
    }
    // We can convert the symbols directly into channels, because the root must be a sink and therefore the layout is fixed
    List<Integer> partitionChannels;
    List<Optional<NullableValue>> partitionConstants;
    List<Type> partitionChannelTypes;
    if (partitioningScheme.getHashColumn().isPresent()) {
        partitionChannels = ImmutableList.of(outputLayout.indexOf(partitioningScheme.getHashColumn().get()));
        partitionConstants = ImmutableList.of(Optional.empty());
        partitionChannelTypes = ImmutableList.of(BIGINT);
    } else {
        partitionChannels = partitioningScheme.getPartitioning().getArguments().stream().map(argument -> {
            if (argument.isConstant()) {
                return -1;
            }
            return outputLayout.indexOf(argument.getColumn());
        }).collect(toImmutableList());
        partitionConstants = partitioningScheme.getPartitioning().getArguments().stream().map(argument -> {
            if (argument.isConstant()) {
                return Optional.of(argument.getConstant());
            }
            return Optional.<NullableValue>empty();
        }).collect(toImmutableList());
        partitionChannelTypes = partitioningScheme.getPartitioning().getArguments().stream().map(argument -> {
            if (argument.isConstant()) {
                return argument.getConstant().getType();
            }
            return types.get(argument.getColumn());
        }).collect(toImmutableList());
    }
    PartitionFunction partitionFunction = nodePartitioningManager.getPartitionFunction(taskContext.getSession(), partitioningScheme, partitionChannelTypes);
    OptionalInt nullChannel = OptionalInt.empty();
    Set<Symbol> partitioningColumns = partitioningScheme.getPartitioning().getColumns();
    // partitioningColumns expected to have one column in the normal case, and zero columns when partitioning on a constant
    checkArgument(!partitioningScheme.isReplicateNullsAndAny() || partitioningColumns.size() <= 1);
    if (partitioningScheme.isReplicateNullsAndAny() && partitioningColumns.size() == 1) {
        nullChannel = OptionalInt.of(outputLayout.indexOf(getOnlyElement(partitioningColumns)));
    }
    return plan(taskContext, stageExecutionDescriptor, plan, outputLayout, types, partitionedSourceOrder, new PartitionedOutputFactory(partitionFunction, partitionChannels, partitionConstants, partitioningScheme.isReplicateNullsAndAny(), nullChannel, outputBuffer, maxPagePartitioningBufferSize, positionsAppenderFactory));
}
Also used : PartitionFunction(io.trino.operator.PartitionFunction) Optional(java.util.Optional) NullableValue(io.trino.spi.predicate.NullableValue) OptionalInt(java.util.OptionalInt) PartitionedOutputFactory(io.trino.operator.output.PartitionedOutputOperator.PartitionedOutputFactory) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) FunctionType(io.trino.type.FunctionType) RowType(io.trino.spi.type.RowType) Type(io.trino.spi.type.Type) TaskOutputFactory(io.trino.operator.output.TaskOutputOperator.TaskOutputFactory)

Aggregations

PartitionFunction (io.trino.operator.PartitionFunction)2 Type (io.trino.spi.type.Type)2 Preconditions.checkState (com.google.common.base.Preconditions.checkState)1 ImmutableList (com.google.common.collect.ImmutableList)1 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)1 Ints (com.google.common.primitives.Ints)1 ListenableFuture (com.google.common.util.concurrent.ListenableFuture)1 MoreFutures.getFutureValue (io.airlift.concurrent.MoreFutures.getFutureValue)1 Threads.daemonThreadsNamed (io.airlift.concurrent.Threads.daemonThreadsNamed)1 DataSize (io.airlift.units.DataSize)1 GIGABYTE (io.airlift.units.DataSize.Unit.GIGABYTE)1 RowPagesBuilder (io.trino.RowPagesBuilder)1 RowPagesBuilder.rowPagesBuilder (io.trino.RowPagesBuilder.rowPagesBuilder)1 Session (io.trino.Session)1 TEST_SESSION (io.trino.SessionTestUtils.TEST_SESSION)1 Lifespan (io.trino.execution.Lifespan)1 Benchmarks.benchmark (io.trino.jmh.Benchmarks.benchmark)1 DriverContext (io.trino.operator.DriverContext)1 HashArraySizeSupplier.incrementalLoadFactorHashArraySizeSupplier (io.trino.operator.HashArraySizeSupplier.incrementalLoadFactorHashArraySizeSupplier)1 InterpretedHashGenerator (io.trino.operator.InterpretedHashGenerator)1