Search in sources :

Example 1 with Split

use of io.trino.metadata.Split in project trino by trinodb.

the class ExtractSpatialJoins method loadKdbTree.

private static KdbTree loadKdbTree(String tableName, Session session, Metadata metadata, SplitManager splitManager, PageSourceManager pageSourceManager) {
    QualifiedObjectName name = toQualifiedObjectName(tableName, session.getCatalog().get(), session.getSchema().get());
    TableHandle tableHandle = metadata.getTableHandle(session, name).orElseThrow(() -> new TrinoException(INVALID_SPATIAL_PARTITIONING, format("Table not found: %s", name)));
    Map<String, ColumnHandle> columnHandles = metadata.getColumnHandles(session, tableHandle);
    List<ColumnHandle> visibleColumnHandles = columnHandles.values().stream().filter(handle -> !metadata.getColumnMetadata(session, tableHandle, handle).isHidden()).collect(toImmutableList());
    checkSpatialPartitioningTable(visibleColumnHandles.size() == 1, "Expected single column for table %s, but found %s columns", name, columnHandles.size());
    ColumnHandle kdbTreeColumn = Iterables.getOnlyElement(visibleColumnHandles);
    Optional<KdbTree> kdbTree = Optional.empty();
    try (SplitSource splitSource = splitManager.getSplits(session, tableHandle, UNGROUPED_SCHEDULING, EMPTY, alwaysTrue())) {
        while (!Thread.currentThread().isInterrupted()) {
            SplitBatch splitBatch = getFutureValue(splitSource.getNextBatch(NOT_PARTITIONED, Lifespan.taskWide(), 1000));
            List<Split> splits = splitBatch.getSplits();
            for (Split split : splits) {
                try (ConnectorPageSource pageSource = pageSourceManager.createPageSource(session, split, tableHandle, ImmutableList.of(kdbTreeColumn), DynamicFilter.EMPTY)) {
                    do {
                        getFutureValue(pageSource.isBlocked());
                        Page page = pageSource.getNextPage();
                        if (page != null && page.getPositionCount() > 0) {
                            checkSpatialPartitioningTable(kdbTree.isEmpty(), "Expected exactly one row for table %s, but found more", name);
                            checkSpatialPartitioningTable(page.getPositionCount() == 1, "Expected exactly one row for table %s, but found %s rows", name, page.getPositionCount());
                            String kdbTreeJson = VARCHAR.getSlice(page.getBlock(0), 0).toStringUtf8();
                            try {
                                kdbTree = Optional.of(KdbTreeUtils.fromJson(kdbTreeJson));
                            } catch (IllegalArgumentException e) {
                                checkSpatialPartitioningTable(false, "Invalid JSON string for KDB tree: %s", e.getMessage());
                            }
                        }
                    } while (!pageSource.isFinished());
                } catch (IOException e) {
                    throw new UncheckedIOException(e);
                }
            }
            if (splitBatch.isLastBatch()) {
                break;
            }
        }
    }
    checkSpatialPartitioningTable(kdbTree.isPresent(), "Expected exactly one row for table %s, but got none", name);
    return kdbTree.get();
}
Also used : EMPTY(io.trino.spi.connector.DynamicFilter.EMPTY) SpatialJoinUtils.extractSupportedSpatialComparisons(io.trino.util.SpatialJoinUtils.extractSupportedSpatialComparisons) SymbolsExtractor.extractUnique(io.trino.sql.planner.SymbolsExtractor.extractUnique) SplitBatch(io.trino.split.SplitSource.SplitBatch) SplitManager(io.trino.split.SplitManager) SystemSessionProperties.getSpatialPartitioningTableName(io.trino.SystemSessionProperties.getSpatialPartitioningTableName) FilterNode(io.trino.sql.planner.plan.FilterNode) PlanNode(io.trino.sql.planner.plan.PlanNode) LEFT(io.trino.sql.planner.plan.JoinNode.Type.LEFT) PlanNodeId(io.trino.sql.planner.plan.PlanNodeId) Map(java.util.Map) SpatialJoinNode(io.trino.sql.planner.plan.SpatialJoinNode) ConnectorPageSource(io.trino.spi.connector.ConnectorPageSource) JoinNode(io.trino.sql.planner.plan.JoinNode) INTEGER(io.trino.spi.type.IntegerType.INTEGER) Splitter(com.google.common.base.Splitter) FunctionCall(io.trino.sql.tree.FunctionCall) Patterns.join(io.trino.sql.planner.plan.Patterns.join) TypeSignature(io.trino.spi.type.TypeSignature) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableMap(com.google.common.collect.ImmutableMap) Collection(java.util.Collection) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) TypeSignatureTranslator.toSqlType(io.trino.sql.analyzer.TypeSignatureTranslator.toSqlType) KdbTree(io.trino.geospatial.KdbTree) Assignments(io.trino.sql.planner.plan.Assignments) Set(java.util.Set) TrinoException(io.trino.spi.TrinoException) ArrayType(io.trino.spi.type.ArrayType) SplitSource(io.trino.split.SplitSource) Context(io.trino.sql.planner.iterative.Rule.Context) ComparisonExpression(io.trino.sql.tree.ComparisonExpression) String.format(java.lang.String.format) Constraint.alwaysTrue(io.trino.spi.connector.Constraint.alwaysTrue) LESS_THAN_OR_EQUAL(io.trino.sql.tree.ComparisonExpression.Operator.LESS_THAN_OR_EQUAL) UncheckedIOException(java.io.UncheckedIOException) List(java.util.List) INVALID_SPATIAL_PARTITIONING(io.trino.spi.StandardErrorCode.INVALID_SPATIAL_PARTITIONING) NOT_PARTITIONED(io.trino.spi.connector.NotPartitionedPartitionHandle.NOT_PARTITIONED) Pattern(io.trino.matching.Pattern) SymbolReference(io.trino.sql.tree.SymbolReference) Split(io.trino.metadata.Split) DynamicFilter(io.trino.spi.connector.DynamicFilter) Optional(java.util.Optional) ExpressionNodeInliner.replaceExpression(io.trino.sql.planner.ExpressionNodeInliner.replaceExpression) Expression(io.trino.sql.tree.Expression) Session(io.trino.Session) PlannerContext(io.trino.sql.PlannerContext) Iterables(com.google.common.collect.Iterables) INNER(io.trino.sql.planner.plan.JoinNode.Type.INNER) Type(io.trino.spi.type.Type) Patterns.filter(io.trino.sql.planner.plan.Patterns.filter) Page(io.trino.spi.Page) Capture.newCapture(io.trino.matching.Capture.newCapture) Cast(io.trino.sql.tree.Cast) KdbTreeUtils(io.trino.geospatial.KdbTreeUtils) VARCHAR(io.trino.spi.type.VarcharType.VARCHAR) FunctionCallBuilder(io.trino.sql.planner.FunctionCallBuilder) ImmutableList(com.google.common.collect.ImmutableList) Verify.verify(com.google.common.base.Verify.verify) UNGROUPED_SCHEDULING(io.trino.spi.connector.ConnectorSplitManager.SplitSchedulingStrategy.UNGROUPED_SCHEDULING) Objects.requireNonNull(java.util.Objects.requireNonNull) Result(io.trino.sql.planner.iterative.Rule.Result) ColumnHandle(io.trino.spi.connector.ColumnHandle) Rule(io.trino.sql.planner.iterative.Rule) Lifespan(io.trino.execution.Lifespan) ProjectNode(io.trino.sql.planner.plan.ProjectNode) Symbol(io.trino.sql.planner.Symbol) StringLiteral(io.trino.sql.tree.StringLiteral) SystemSessionProperties.isSpatialJoinEnabled(io.trino.SystemSessionProperties.isSpatialJoinEnabled) IOException(java.io.IOException) PageSourceManager(io.trino.split.PageSourceManager) LESS_THAN(io.trino.sql.tree.ComparisonExpression.Operator.LESS_THAN) MoreFutures.getFutureValue(io.airlift.concurrent.MoreFutures.getFutureValue) UnnestNode(io.trino.sql.planner.plan.UnnestNode) Capture(io.trino.matching.Capture) QualifiedName(io.trino.sql.tree.QualifiedName) DOUBLE(io.trino.spi.type.DoubleType.DOUBLE) TableHandle(io.trino.metadata.TableHandle) TypeAnalyzer(io.trino.sql.planner.TypeAnalyzer) QualifiedObjectName(io.trino.metadata.QualifiedObjectName) Patterns.source(io.trino.sql.planner.plan.Patterns.source) Captures(io.trino.matching.Captures) Metadata(io.trino.metadata.Metadata) VisibleForTesting(com.google.common.annotations.VisibleForTesting) TypeManager(io.trino.spi.type.TypeManager) SpatialJoinUtils.extractSupportedSpatialFunctions(io.trino.util.SpatialJoinUtils.extractSupportedSpatialFunctions) ColumnHandle(io.trino.spi.connector.ColumnHandle) KdbTree(io.trino.geospatial.KdbTree) Page(io.trino.spi.Page) UncheckedIOException(java.io.UncheckedIOException) UncheckedIOException(java.io.UncheckedIOException) IOException(java.io.IOException) ConnectorPageSource(io.trino.spi.connector.ConnectorPageSource) QualifiedObjectName(io.trino.metadata.QualifiedObjectName) SplitBatch(io.trino.split.SplitSource.SplitBatch) TrinoException(io.trino.spi.TrinoException) TableHandle(io.trino.metadata.TableHandle) SplitSource(io.trino.split.SplitSource) Split(io.trino.metadata.Split)

Example 2 with Split

use of io.trino.metadata.Split in project trino by trinodb.

the class Driver method processNewSources.

@GuardedBy("exclusiveLock")
private void processNewSources() {
    checkLockHeld("Lock must be held to call processNewSources");
    // only update if the driver is still alive
    if (state.get() != State.ALIVE) {
        return;
    }
    SplitAssignment splitAssignment = pendingSplitAssignmentUpdates.getAndSet(null);
    if (splitAssignment == null) {
        return;
    }
    // merge the current assignment and the specified assignment
    SplitAssignment newAssignment = currentSplitAssignment.update(splitAssignment);
    // if the update contains no new data, just return
    if (newAssignment == currentSplitAssignment) {
        return;
    }
    // determine new splits to add
    Set<ScheduledSplit> newSplits = Sets.difference(newAssignment.getSplits(), currentSplitAssignment.getSplits());
    // add new splits
    SourceOperator sourceOperator = this.sourceOperator.orElseThrow(VerifyException::new);
    for (ScheduledSplit newSplit : newSplits) {
        Split split = newSplit.getSplit();
        Supplier<Optional<UpdatablePageSource>> pageSource = sourceOperator.addSplit(split);
        deleteOperator.ifPresent(deleteOperator -> deleteOperator.setPageSource(pageSource));
        updateOperator.ifPresent(updateOperator -> updateOperator.setPageSource(pageSource));
    }
    // set no more splits
    if (newAssignment.isNoMoreSplits()) {
        sourceOperator.noMoreSplits();
    }
    currentSplitAssignment = newAssignment;
}
Also used : ScheduledSplit(io.trino.execution.ScheduledSplit) Optional(java.util.Optional) VerifyException(com.google.common.base.VerifyException) SplitAssignment(io.trino.execution.SplitAssignment) ScheduledSplit(io.trino.execution.ScheduledSplit) Split(io.trino.metadata.Split) GuardedBy(javax.annotation.concurrent.GuardedBy)

Example 3 with Split

use of io.trino.metadata.Split in project trino by trinodb.

the class FaultTolerantStageScheduler method schedule.

public synchronized void schedule() throws Exception {
    if (failure != null) {
        propagateIfPossible(failure, Exception.class);
        throw new RuntimeException(failure);
    }
    if (closed) {
        return;
    }
    if (isFinished()) {
        return;
    }
    if (!blocked.isDone()) {
        return;
    }
    if (taskSource == null) {
        Map<PlanFragmentId, ListenableFuture<List<ExchangeSourceHandle>>> sourceHandles = sourceExchanges.entrySet().stream().collect(toImmutableMap(Map.Entry::getKey, entry -> toListenableFuture(entry.getValue().getSourceHandles())));
        List<ListenableFuture<List<ExchangeSourceHandle>>> blockedFutures = sourceHandles.values().stream().filter(future -> !future.isDone()).collect(toImmutableList());
        if (!blockedFutures.isEmpty()) {
            blocked = asVoid(allAsList(blockedFutures));
            return;
        }
        Multimap<PlanFragmentId, ExchangeSourceHandle> exchangeSources = sourceHandles.entrySet().stream().collect(flatteningToImmutableListMultimap(Map.Entry::getKey, entry -> getFutureValue(entry.getValue()).stream()));
        taskSource = taskSourceFactory.create(session, stage.getFragment(), sourceExchanges, exchangeSources, stage::recordGetSplitTime, sourceBucketToPartitionMap, sourceBucketNodeMap);
    }
    while (!queuedPartitions.isEmpty() || !taskSource.isFinished()) {
        while (queuedPartitions.isEmpty() && !taskSource.isFinished()) {
            List<TaskDescriptor> tasks = taskSource.getMoreTasks();
            for (TaskDescriptor task : tasks) {
                queuedPartitions.add(task.getPartitionId());
                allPartitions.add(task.getPartitionId());
                taskDescriptorStorage.put(stage.getStageId(), task);
                sinkExchange.ifPresent(exchange -> {
                    ExchangeSinkHandle exchangeSinkHandle = exchange.addSink(task.getPartitionId());
                    partitionToExchangeSinkHandleMap.put(task.getPartitionId(), exchangeSinkHandle);
                });
            }
            if (taskSource.isFinished()) {
                sinkExchange.ifPresent(Exchange::noMoreSinks);
            }
        }
        if (queuedPartitions.isEmpty()) {
            break;
        }
        int partition = queuedPartitions.peek();
        Optional<TaskDescriptor> taskDescriptorOptional = taskDescriptorStorage.get(stage.getStageId(), partition);
        if (taskDescriptorOptional.isEmpty()) {
            // query has been terminated
            return;
        }
        TaskDescriptor taskDescriptor = taskDescriptorOptional.get();
        MemoryRequirements memoryRequirements = partitionMemoryRequirements.computeIfAbsent(partition, ignored -> partitionMemoryEstimator.getInitialMemoryRequirements(session, taskDescriptor.getNodeRequirements().getMemory()));
        if (nodeLease == null) {
            NodeRequirements nodeRequirements = taskDescriptor.getNodeRequirements();
            nodeRequirements = nodeRequirements.withMemory(memoryRequirements.getRequiredMemory());
            nodeLease = nodeAllocator.acquire(nodeRequirements);
        }
        if (!nodeLease.getNode().isDone()) {
            blocked = asVoid(nodeLease.getNode());
            return;
        }
        NodeInfo node = getFutureValue(nodeLease.getNode());
        queuedPartitions.poll();
        Multimap<PlanNodeId, Split> tableScanSplits = taskDescriptor.getSplits();
        Multimap<PlanNodeId, Split> remoteSplits = createRemoteSplits(taskDescriptor.getExchangeSourceHandles());
        Multimap<PlanNodeId, Split> taskSplits = ImmutableListMultimap.<PlanNodeId, Split>builder().putAll(tableScanSplits).putAll(remoteSplits).build();
        int attemptId = getNextAttemptIdForPartition(partition);
        OutputBuffers outputBuffers;
        Optional<ExchangeSinkInstanceHandle> exchangeSinkInstanceHandle;
        if (sinkExchange.isPresent()) {
            ExchangeSinkHandle sinkHandle = partitionToExchangeSinkHandleMap.get(partition);
            exchangeSinkInstanceHandle = Optional.of(sinkExchange.get().instantiateSink(sinkHandle, attemptId));
            outputBuffers = createSpoolingExchangeOutputBuffers(exchangeSinkInstanceHandle.get());
        } else {
            exchangeSinkInstanceHandle = Optional.empty();
            // stage will be consumed by the coordinator using direct exchange
            outputBuffers = createInitialEmptyOutputBuffers(PARTITIONED).withBuffer(new OutputBuffers.OutputBufferId(0), 0).withNoMoreBufferIds();
        }
        Set<PlanNodeId> allSourcePlanNodeIds = ImmutableSet.<PlanNodeId>builder().addAll(stage.getFragment().getPartitionedSources()).addAll(stage.getFragment().getRemoteSourceNodes().stream().map(RemoteSourceNode::getId).iterator()).build();
        RemoteTask task = stage.createTask(node.getNode(), partition, attemptId, sinkBucketToPartitionMap, outputBuffers, taskSplits, allSourcePlanNodeIds.stream().collect(toImmutableListMultimap(Function.identity(), planNodeId -> Lifespan.taskWide())), allSourcePlanNodeIds).orElseThrow(() -> new VerifyException("stage execution is expected to be active"));
        partitionToRemoteTaskMap.put(partition, task);
        runningTasks.put(task.getTaskId(), task);
        runningNodes.put(task.getTaskId(), nodeLease);
        nodeLease = null;
        if (taskFinishedFuture == null) {
            taskFinishedFuture = SettableFuture.create();
        }
        taskLifecycleListener.taskCreated(stage.getFragment().getId(), task);
        task.addStateChangeListener(taskStatus -> updateTaskStatus(taskStatus, exchangeSinkInstanceHandle));
        task.start();
    }
    if (taskFinishedFuture != null && !taskFinishedFuture.isDone()) {
        blocked = taskFinishedFuture;
    }
}
Also used : ArrayListMultimap(com.google.common.collect.ArrayListMultimap) SettableFuture(com.google.common.util.concurrent.SettableFuture) RemoteSourceNode(io.trino.sql.planner.plan.RemoteSourceNode) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) Throwables.propagateIfPossible(com.google.common.base.Throwables.propagateIfPossible) ImmutableListMultimap.toImmutableListMultimap(com.google.common.collect.ImmutableListMultimap.toImmutableListMultimap) MemoryRequirements(io.trino.execution.scheduler.PartitionMemoryEstimator.MemoryRequirements) PlanNodeId(io.trino.sql.planner.plan.PlanNodeId) Map(java.util.Map) SpoolingExchangeInput(io.trino.split.RemoteSplit.SpoolingExchangeInput) REMOTE_HOST_GONE(io.trino.spi.StandardErrorCode.REMOTE_HOST_GONE) Futures.immediateVoidFuture(com.google.common.util.concurrent.Futures.immediateVoidFuture) ImmutableSet(com.google.common.collect.ImmutableSet) ExchangeSinkInstanceHandle(io.trino.spi.exchange.ExchangeSinkInstanceHandle) OutputBuffers.createSpoolingExchangeOutputBuffers(io.trino.execution.buffer.OutputBuffers.createSpoolingExchangeOutputBuffers) ImmutableMap(com.google.common.collect.ImmutableMap) ExecutionFailureInfo(io.trino.execution.ExecutionFailureInfo) Futures.allAsList(com.google.common.util.concurrent.Futures.allAsList) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) MoreFutures.toListenableFuture(io.airlift.concurrent.MoreFutures.toListenableFuture) Set(java.util.Set) TrinoException(io.trino.spi.TrinoException) GONE(io.trino.failuredetector.FailureDetector.State.GONE) GuardedBy(javax.annotation.concurrent.GuardedBy) TaskId(io.trino.execution.TaskId) ExchangeSinkHandle(io.trino.spi.exchange.ExchangeSinkHandle) List(java.util.List) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) Split(io.trino.metadata.Split) ImmutableListMultimap(com.google.common.collect.ImmutableListMultimap) Optional(java.util.Optional) Queue(java.util.Queue) PlanFragmentId(io.trino.sql.planner.plan.PlanFragmentId) OutputBuffers.createInitialEmptyOutputBuffers(io.trino.execution.buffer.OutputBuffers.createInitialEmptyOutputBuffers) ExchangeSourceHandle(io.trino.spi.exchange.ExchangeSourceHandle) Session(io.trino.Session) ImmutableListMultimap.flatteningToImmutableListMultimap(com.google.common.collect.ImmutableListMultimap.flatteningToImmutableListMultimap) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) StageId(io.trino.execution.StageId) Logger(io.airlift.log.Logger) HashMap(java.util.HashMap) Multimap(com.google.common.collect.Multimap) ErrorCode(io.trino.spi.ErrorCode) Function(java.util.function.Function) Failures.toFailure(io.trino.util.Failures.toFailure) RemoteSplit(io.trino.split.RemoteSplit) HashSet(java.util.HashSet) ImmutableList(com.google.common.collect.ImmutableList) Verify.verify(com.google.common.base.Verify.verify) USER_ERROR(io.trino.spi.ErrorType.USER_ERROR) Objects.requireNonNull(java.util.Objects.requireNonNull) TaskState(io.trino.execution.TaskState) Lifespan(io.trino.execution.Lifespan) Exchange(io.trino.spi.exchange.Exchange) VerifyException(com.google.common.base.VerifyException) SqlStage(io.trino.execution.SqlStage) FailureDetector(io.trino.failuredetector.FailureDetector) RemoteTask(io.trino.execution.RemoteTask) TaskStatus(io.trino.execution.TaskStatus) MoreFutures.getFutureValue(io.airlift.concurrent.MoreFutures.getFutureValue) GENERIC_INTERNAL_ERROR(io.trino.spi.StandardErrorCode.GENERIC_INTERNAL_ERROR) MoreFutures.asVoid(io.airlift.concurrent.MoreFutures.asVoid) PARTITIONED(io.trino.execution.buffer.OutputBuffers.BufferType.PARTITIONED) Futures.nonCancellationPropagating(com.google.common.util.concurrent.Futures.nonCancellationPropagating) OutputBuffers(io.trino.execution.buffer.OutputBuffers) ArrayDeque(java.util.ArrayDeque) REMOTE_CONNECTOR_ID(io.trino.operator.ExchangeOperator.REMOTE_CONNECTOR_ID) MemoryRequirements(io.trino.execution.scheduler.PartitionMemoryEstimator.MemoryRequirements) ExchangeSourceHandle(io.trino.spi.exchange.ExchangeSourceHandle) PlanNodeId(io.trino.sql.planner.plan.PlanNodeId) OutputBuffers.createSpoolingExchangeOutputBuffers(io.trino.execution.buffer.OutputBuffers.createSpoolingExchangeOutputBuffers) OutputBuffers.createInitialEmptyOutputBuffers(io.trino.execution.buffer.OutputBuffers.createInitialEmptyOutputBuffers) OutputBuffers(io.trino.execution.buffer.OutputBuffers) PlanFragmentId(io.trino.sql.planner.plan.PlanFragmentId) ExchangeSinkInstanceHandle(io.trino.spi.exchange.ExchangeSinkInstanceHandle) RemoteTask(io.trino.execution.RemoteTask) Exchange(io.trino.spi.exchange.Exchange) ExchangeSinkHandle(io.trino.spi.exchange.ExchangeSinkHandle) VerifyException(com.google.common.base.VerifyException) MoreFutures.toListenableFuture(io.airlift.concurrent.MoreFutures.toListenableFuture) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) Split(io.trino.metadata.Split) RemoteSplit(io.trino.split.RemoteSplit) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) HashMap(java.util.HashMap)

Example 4 with Split

use of io.trino.metadata.Split in project trino by trinodb.

the class MergeOperator method addSplit.

@Override
public Supplier<Optional<UpdatablePageSource>> addSplit(Split split) {
    requireNonNull(split, "split is null");
    checkArgument(split.getConnectorSplit() instanceof RemoteSplit, "split is not a remote split");
    checkState(!blockedOnSplits.isDone(), "noMoreSplits has been called already");
    TaskContext taskContext = operatorContext.getDriverContext().getPipelineContext().getTaskContext();
    DirectExchangeClient client = closer.register(directExchangeClientSupplier.get(taskContext.getTaskId().getQueryId(), new ExchangeId(format("direct-exchange-merge-%s-%s", taskContext.getTaskId().getStageId().getId(), sourceId)), operatorContext.localUserMemoryContext(), taskContext::sourceTaskFailed, RetryPolicy.NONE));
    RemoteSplit remoteSplit = (RemoteSplit) split.getConnectorSplit();
    // Only fault tolerant execution mode is expected to execute external exchanges.
    // MergeOperator is used for distributed sort only and it is not compatible (and disabled) with fault tolerant execution mode.
    DirectExchangeInput exchangeInput = (DirectExchangeInput) remoteSplit.getExchangeInput();
    client.addLocation(exchangeInput.getTaskId(), URI.create(exchangeInput.getLocation()));
    client.noMoreLocations();
    pageProducers.add(client.pages().map(serializedPage -> {
        Page page = pagesSerde.deserialize(serializedPage);
        operatorContext.recordNetworkInput(serializedPage.length(), page.getPositionCount());
        return page;
    }));
    return Optional::empty;
}
Also used : MergeSortedPages.mergeSortedPages(io.trino.util.MergeSortedPages.mergeSortedPages) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) OrderingCompiler(io.trino.sql.gen.OrderingCompiler) Type(io.trino.spi.type.Type) Page(io.trino.spi.Page) SettableFuture(com.google.common.util.concurrent.SettableFuture) Supplier(java.util.function.Supplier) DirectExchangeInput(io.trino.split.RemoteSplit.DirectExchangeInput) ArrayList(java.util.ArrayList) RemoteSplit(io.trino.split.RemoteSplit) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) PagesSerdeFactory(io.trino.execution.buffer.PagesSerdeFactory) Closer(com.google.common.io.Closer) PlanNodeId(io.trino.sql.planner.plan.PlanNodeId) Objects.requireNonNull(java.util.Objects.requireNonNull) URI(java.net.URI) IOException(java.io.IOException) MoreLists.mappedCopy(io.trino.util.MoreLists.mappedCopy) PagesSerde(io.trino.execution.buffer.PagesSerde) String.format(java.lang.String.format) SortOrder(io.trino.spi.connector.SortOrder) Preconditions.checkState(com.google.common.base.Preconditions.checkState) UncheckedIOException(java.io.UncheckedIOException) List(java.util.List) Split(io.trino.metadata.Split) UpdatablePageSource(io.trino.spi.connector.UpdatablePageSource) Optional(java.util.Optional) ExchangeId(io.trino.spi.exchange.ExchangeId) RemoteSplit(io.trino.split.RemoteSplit) Page(io.trino.spi.Page) DirectExchangeInput(io.trino.split.RemoteSplit.DirectExchangeInput) ExchangeId(io.trino.spi.exchange.ExchangeId)

Example 5 with Split

use of io.trino.metadata.Split in project trino by trinodb.

the class TopologyAwareNodeSelector method computeAssignments.

@Override
public SplitPlacementResult computeAssignments(Set<Split> splits, List<RemoteTask> existingTasks) {
    NodeMap nodeMap = this.nodeMap.get().get();
    Multimap<InternalNode, Split> assignment = HashMultimap.create();
    NodeAssignmentStats assignmentStats = new NodeAssignmentStats(nodeTaskMap, nodeMap, existingTasks);
    int[] topologicCounters = new int[topologicalSplitCounters.size()];
    Set<NetworkLocation> filledLocations = new HashSet<>();
    Set<InternalNode> blockedExactNodes = new HashSet<>();
    boolean splitWaitingForAnyNode = false;
    for (Split split : splits) {
        SplitWeight splitWeight = split.getSplitWeight();
        if (!split.isRemotelyAccessible()) {
            List<InternalNode> candidateNodes = selectExactNodes(nodeMap, split.getAddresses(), includeCoordinator);
            if (candidateNodes.isEmpty()) {
                log.debug("No nodes available to schedule %s. Available nodes %s", split, nodeMap.getNodesByHost().keys());
                throw new TrinoException(NO_NODES_AVAILABLE, "No nodes available to run query");
            }
            InternalNode chosenNode = bestNodeSplitCount(splitWeight, candidateNodes.iterator(), minCandidates, maxPendingSplitsWeightPerTask, assignmentStats);
            if (chosenNode != null) {
                assignment.put(chosenNode, split);
                assignmentStats.addAssignedSplit(chosenNode, splitWeight);
            } else // Exact node set won't matter, if a split is waiting for any node
            if (!splitWaitingForAnyNode) {
                blockedExactNodes.addAll(candidateNodes);
            }
            continue;
        }
        InternalNode chosenNode = null;
        int depth = topologicalSplitCounters.size() - 1;
        int chosenDepth = 0;
        Set<NetworkLocation> locations = new HashSet<>();
        for (HostAddress host : split.getAddresses()) {
            locations.add(networkTopology.locate(host));
        }
        if (locations.isEmpty()) {
            // Add the root location
            locations.add(ROOT_LOCATION);
            depth = 0;
        }
        // Try each address at progressively shallower network locations
        for (int i = depth; i >= 0 && chosenNode == null; i--) {
            for (NetworkLocation location : locations) {
                // For example, locations which couldn't be located will be at the "root" location
                if (location.getSegments().size() < i) {
                    continue;
                }
                location = location.subLocation(0, i);
                if (filledLocations.contains(location)) {
                    continue;
                }
                Set<InternalNode> nodes = nodeMap.getWorkersByNetworkPath().get(location);
                chosenNode = bestNodeSplitCount(splitWeight, new ResettableRandomizedIterator<>(nodes), minCandidates, calculateMaxPendingSplitsWeightPerTask(i, depth), assignmentStats);
                if (chosenNode != null) {
                    chosenDepth = i;
                    break;
                }
                filledLocations.add(location);
            }
        }
        if (chosenNode != null) {
            assignment.put(chosenNode, split);
            assignmentStats.addAssignedSplit(chosenNode, splitWeight);
            topologicCounters[chosenDepth]++;
        } else {
            splitWaitingForAnyNode = true;
        }
    }
    for (int i = 0; i < topologicCounters.length; i++) {
        if (topologicCounters[i] > 0) {
            topologicalSplitCounters.get(i).update(topologicCounters[i]);
        }
    }
    ListenableFuture<Void> blocked;
    long maxPendingForWildcardNetworkAffinity = calculateMaxPendingSplitsWeightPerTask(0, topologicalSplitCounters.size() - 1);
    if (splitWaitingForAnyNode) {
        blocked = toWhenHasSplitQueueSpaceFuture(existingTasks, calculateLowWatermark(maxPendingForWildcardNetworkAffinity));
    } else {
        blocked = toWhenHasSplitQueueSpaceFuture(blockedExactNodes, existingTasks, calculateLowWatermark(maxPendingForWildcardNetworkAffinity));
    }
    return new SplitPlacementResult(blocked, assignment);
}
Also used : HostAddress(io.trino.spi.HostAddress) SplitWeight(io.trino.spi.SplitWeight) TrinoException(io.trino.spi.TrinoException) InternalNode(io.trino.metadata.InternalNode) Split(io.trino.metadata.Split) HashSet(java.util.HashSet)

Aggregations

Split (io.trino.metadata.Split)56 Test (org.testng.annotations.Test)32 InternalNode (io.trino.metadata.InternalNode)26 ConnectorSplit (io.trino.spi.connector.ConnectorSplit)24 PlanNodeId (io.trino.sql.planner.plan.PlanNodeId)22 LinkedHashSet (java.util.LinkedHashSet)17 ImmutableList (com.google.common.collect.ImmutableList)16 HashSet (java.util.HashSet)16 CatalogName (io.trino.connector.CatalogName)14 Page (io.trino.spi.Page)12 List (java.util.List)11 Optional (java.util.Optional)11 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)9 ImmutableSet (com.google.common.collect.ImmutableSet)9 ArrayList (java.util.ArrayList)9 Lifespan (io.trino.execution.Lifespan)8 RemoteTask (io.trino.execution.RemoteTask)8 TestingSplit (io.trino.testing.TestingSplit)8 Objects.requireNonNull (java.util.Objects.requireNonNull)8 Preconditions.checkArgument (com.google.common.base.Preconditions.checkArgument)6