Search in sources :

Example 11 with NodeTaskMap

use of io.prestosql.execution.NodeTaskMap in project hetu-core by openlookeng.

the class TestHiveIntegrationSmokeTest method testRuseExchangeSplitsGroupNotMatchingBetweenProducerConsumer.

@Test
public void testRuseExchangeSplitsGroupNotMatchingBetweenProducerConsumer() {
    setUpNodes();
    NodeTaskMap nodeTasks = new NodeTaskMap(new FinalizerService());
    StageId stageId = new StageId(new QueryId("query"), 0);
    UUID uuid = UUID.randomUUID();
    PlanFragment testFragmentProducer = createTableScanPlanFragment("build", ReuseExchangeOperator.STRATEGY.REUSE_STRATEGY_PRODUCER, uuid, 1);
    PlanNodeId tableScanNodeId = new PlanNodeId("plan_id");
    StageExecutionPlan producerStageExecutionPlan = new StageExecutionPlan(testFragmentProducer, ImmutableMap.of(tableScanNodeId, new ConnectorAwareSplitSource(CONNECTOR_ID, createFixedSplitSource(0, TestingSplit::createRemoteSplit))), ImmutableList.of(), ImmutableMap.of(tableScanNodeId, new TableInfo(new QualifiedObjectName("test", TEST_SCHEMA, "test"), TupleDomain.all())));
    SqlStageExecution producerStage = createSqlStageExecution(stageId, new TestSqlTaskManager.MockLocationFactory().createStageLocation(stageId), producerStageExecutionPlan.getFragment(), producerStageExecutionPlan.getTables(), new MockRemoteTaskFactory(remoteTaskExecutor, remoteTaskScheduledExecutor), TEST_SESSION_REUSE, true, nodeTasks, remoteTaskExecutor, new NoOpFailureDetector(), new SplitSchedulerStats(), new DynamicFilterService(new LocalStateStoreProvider(new SeedStoreManager(new FileSystemClientManager()))), new QuerySnapshotManager(stageId.getQueryId(), NOOP_SNAPSHOT_UTILS, TEST_SESSION));
    Set<Split> producerSplits = createAndGetSplits(10);
    Multimap<InternalNode, Split> producerAssignment = nodeSelector.computeAssignments(producerSplits, ImmutableList.copyOf(taskMap.values()), Optional.of(producerStage)).getAssignments();
    PlanFragment testFragmentConsumer = createTableScanPlanFragment("build", ReuseExchangeOperator.STRATEGY.REUSE_STRATEGY_CONSUMER, uuid, 1);
    StageExecutionPlan consumerStageExecutionPlan = new StageExecutionPlan(testFragmentConsumer, ImmutableMap.of(tableScanNodeId, new ConnectorAwareSplitSource(CONNECTOR_ID, createFixedSplitSource(0, TestingSplit::createRemoteSplit))), ImmutableList.of(), ImmutableMap.of(tableScanNodeId, new TableInfo(new QualifiedObjectName("test", TEST_SCHEMA, "test"), TupleDomain.all())));
    SqlStageExecution stage = createSqlStageExecution(stageId, new TestSqlTaskManager.MockLocationFactory().createStageLocation(stageId), consumerStageExecutionPlan.getFragment(), consumerStageExecutionPlan.getTables(), new MockRemoteTaskFactory(remoteTaskExecutor, remoteTaskScheduledExecutor), TEST_SESSION_REUSE, true, nodeTasks, remoteTaskExecutor, new NoOpFailureDetector(), new SplitSchedulerStats(), new DynamicFilterService(new LocalStateStoreProvider(new SeedStoreManager(new FileSystemClientManager()))), new QuerySnapshotManager(stageId.getQueryId(), NOOP_SNAPSHOT_UTILS, TEST_SESSION));
    Set<Split> consumerSplits = createAndGetSplits(50);
    try {
        Multimap<InternalNode, Split> consumerAssignment = nodeSelector.computeAssignments(consumerSplits, ImmutableList.copyOf(taskMap.values()), Optional.of(stage)).getAssignments();
    } catch (PrestoException e) {
        assertEquals("Producer & consumer splits are not same", e.getMessage());
        return;
    }
    assertEquals(false, true);
}
Also used : NoOpFailureDetector(io.prestosql.failuredetector.NoOpFailureDetector) StageExecutionPlan(io.prestosql.sql.planner.StageExecutionPlan) StageId(io.prestosql.execution.StageId) PrestoException(io.prestosql.spi.PrestoException) TestPhasedExecutionSchedule.createTableScanPlanFragment(io.prestosql.execution.scheduler.TestPhasedExecutionSchedule.createTableScanPlanFragment) PlanFragment(io.prestosql.sql.planner.PlanFragment) ConnectorAwareSplitSource(io.prestosql.split.ConnectorAwareSplitSource) SqlStageExecution.createSqlStageExecution(io.prestosql.execution.SqlStageExecution.createSqlStageExecution) SqlStageExecution(io.prestosql.execution.SqlStageExecution) QuerySnapshotManager(io.prestosql.snapshot.QuerySnapshotManager) PlanNodeId(io.prestosql.spi.plan.PlanNodeId) LocalStateStoreProvider(io.prestosql.statestore.LocalStateStoreProvider) SeedStoreManager(io.prestosql.seedstore.SeedStoreManager) TableInfo(io.prestosql.execution.TableInfo) DynamicFilterService(io.prestosql.dynamicfilter.DynamicFilterService) UUID(java.util.UUID) NodeTaskMap(io.prestosql.execution.NodeTaskMap) QueryId(io.prestosql.spi.QueryId) QualifiedObjectName(io.prestosql.spi.connector.QualifiedObjectName) FileSystemClientManager(io.prestosql.filesystem.FileSystemClientManager) SplitSchedulerStats(io.prestosql.execution.scheduler.SplitSchedulerStats) FinalizerService(io.prestosql.util.FinalizerService) InternalNode(io.prestosql.metadata.InternalNode) ConnectorSplit(io.prestosql.spi.connector.ConnectorSplit) Split(io.prestosql.metadata.Split) TestingSplit(io.prestosql.testing.TestingSplit) MockRemoteTaskFactory(io.prestosql.execution.MockRemoteTaskFactory) Test(org.testng.annotations.Test) AbstractTestIntegrationSmokeTest(io.prestosql.tests.AbstractTestIntegrationSmokeTest)

Example 12 with NodeTaskMap

use of io.prestosql.execution.NodeTaskMap in project hetu-core by openlookeng.

the class SqlQueryScheduler method createStages.

private List<SqlStageExecution> createStages(ExchangeLocationsConsumer parent, AtomicInteger nextStageId, LocationFactory locationFactory, StageExecutionPlan plan, NodeScheduler nodeScheduler, RemoteTaskFactory remoteTaskFactory, Session session, int splitBatchSize, BiFunction<PartitioningHandle, Integer, NodePartitionMap> partitioningCache, NodePartitioningManager nodePartitioningManager, ExecutorService queryExecutor, ScheduledExecutorService schedulerExecutor, FailureDetector failureDetector, NodeTaskMap nodeTaskMap, ImmutableMap.Builder<StageId, StageScheduler> stageSchedulers, ImmutableMap.Builder<StageId, StageLinkage> stageLinkages, boolean isSnapshotEnabled, QuerySnapshotManager snapshotManager, Map<StageId, Integer> stageTaskCounts) {
    ImmutableList.Builder<SqlStageExecution> localStages = ImmutableList.builder();
    StageId stageId = new StageId(queryStateMachine.getQueryId(), nextStageId.getAndIncrement());
    SqlStageExecution stageExecution = createSqlStageExecution(stageId, locationFactory.createStageLocation(stageId), plan.getFragment(), plan.getTables(), remoteTaskFactory, session, summarizeTaskInfo, nodeTaskMap, queryExecutor, failureDetector, schedulerStats, dynamicFilterService, snapshotManager);
    localStages.add(stageExecution);
    Optional<int[]> bucketToPartition;
    PartitioningHandle partitioningHandle = plan.getFragment().getPartitioning();
    boolean keepConsumerOnFeederNodes = !plan.getFragment().getFeederCTEId().isPresent() && plan.getFragment().getFeederCTEParentId().isPresent();
    if (partitioningHandle.equals(SOURCE_DISTRIBUTION)) {
        // nodes are selected dynamically based on the constraints of the splits and the system load
        Entry<PlanNodeId, SplitSource> entry = Iterables.getOnlyElement(plan.getSplitSources().entrySet());
        PlanNodeId planNodeId = entry.getKey();
        SplitSource splitSource = entry.getValue();
        CatalogName catalogName = splitSource.getCatalogName();
        if (isInternalSystemConnector(catalogName)) {
            catalogName = null;
        }
        NodeSelector nodeSelector = nodeScheduler.createNodeSelector(catalogName, keepConsumerOnFeederNodes, feederScheduledNodes);
        if (isSnapshotEnabled) {
            // When snapshot is enabled, then no task can be added after the query started running,
            // otherwise assumptions about how many "input channels" may be broken.
            nodeSelector.lockDownNodes();
        }
        SplitPlacementPolicy placementPolicy = new DynamicSplitPlacementPolicy(nodeSelector, stageExecution::getAllTasks);
        checkArgument(!plan.getFragment().getStageExecutionDescriptor().isStageGroupedExecution());
        stageSchedulers.put(stageId, newSourcePartitionedSchedulerAsStageScheduler(stageExecution, planNodeId, splitSource, placementPolicy, splitBatchSize, session, heuristicIndexerManager));
        bucketToPartition = Optional.of(new int[1]);
    } else if (partitioningHandle.equals(SCALED_WRITER_DISTRIBUTION)) {
        bucketToPartition = Optional.of(new int[1]);
    } else {
        Map<PlanNodeId, SplitSource> splitSources = plan.getSplitSources();
        if (!splitSources.isEmpty()) {
            // contains local source
            List<PlanNodeId> schedulingOrder = plan.getFragment().getPartitionedSources();
            CatalogName catalogName = partitioningHandle.getConnectorId().orElseThrow(IllegalStateException::new);
            List<ConnectorPartitionHandle> connectorPartitionHandles;
            boolean groupedExecutionForStage = plan.getFragment().getStageExecutionDescriptor().isStageGroupedExecution();
            if (groupedExecutionForStage) {
                connectorPartitionHandles = nodePartitioningManager.listPartitionHandles(session, partitioningHandle);
                checkState(!ImmutableList.of(NOT_PARTITIONED).equals(connectorPartitionHandles));
            } else {
                connectorPartitionHandles = ImmutableList.of(NOT_PARTITIONED);
            }
            BucketNodeMap bucketNodeMap;
            List<InternalNode> stageNodeList;
            if (plan.getFragment().getRemoteSourceNodes().stream().allMatch(node -> node.getExchangeType() == REPLICATE)) {
                // no remote source
                boolean dynamicLifespanSchedule = plan.getFragment().getStageExecutionDescriptor().isDynamicLifespanSchedule();
                if (isSnapshotEnabled) {
                    NodeSelector nodeSelector = nodeScheduler.createNodeSelector(catalogName, keepConsumerOnFeederNodes, feederScheduledNodes);
                    int nodeCount;
                    if (stageTaskCounts != null) {
                        // Resuming: need to create same number of tasks as old stage.
                        nodeCount = stageTaskCounts.get(stageId);
                    } else {
                        // Scheduling: reserve some nodes for resuming
                        nodeCount = calculateTaskCount(nodeSelector.selectableNodeCount());
                    }
                    stageNodeList = new ArrayList<>(nodeSelector.selectRandomNodes(nodeCount));
                    checkCondition(stageNodeList.size() == nodeCount, NO_NODES_AVAILABLE, "Snapshot: not enough worker nodes to resume expected number of tasks: " + nodeCount);
                    // Make sure bucketNodeMap uses the same node list
                    bucketNodeMap = nodePartitioningManager.getBucketNodeMap(session, partitioningHandle, dynamicLifespanSchedule, stageNodeList);
                } else {
                    bucketNodeMap = nodePartitioningManager.getBucketNodeMap(session, partitioningHandle, dynamicLifespanSchedule);
                    stageNodeList = new ArrayList<>(nodeScheduler.createNodeSelector(catalogName, keepConsumerOnFeederNodes, feederScheduledNodes).allNodes());
                }
                // verify execution is consistent with planner's decision on dynamic lifespan schedule
                verify(bucketNodeMap.isDynamic() == dynamicLifespanSchedule);
                Collections.shuffle(stageNodeList);
                bucketToPartition = Optional.empty();
            } else {
                // cannot use dynamic lifespan schedule
                verify(!plan.getFragment().getStageExecutionDescriptor().isDynamicLifespanSchedule());
                // remote source requires nodePartitionMap
                NodePartitionMap nodePartitionMap = partitioningCache.apply(plan.getFragment().getPartitioning(), stageTaskCounts == null ? null : stageTaskCounts.get(stageId));
                if (groupedExecutionForStage) {
                    checkState(connectorPartitionHandles.size() == nodePartitionMap.getBucketToPartition().length);
                }
                stageNodeList = nodePartitionMap.getPartitionToNode();
                bucketNodeMap = nodePartitionMap.asBucketNodeMap();
                bucketToPartition = Optional.of(nodePartitionMap.getBucketToPartition());
            }
            stageSchedulers.put(stageId, new FixedSourcePartitionedScheduler(stageExecution, splitSources, plan.getFragment().getStageExecutionDescriptor(), schedulingOrder, stageNodeList, bucketNodeMap, splitBatchSize, getConcurrentLifespansPerNode(session), nodeScheduler.createNodeSelector(catalogName, keepConsumerOnFeederNodes, feederScheduledNodes), connectorPartitionHandles, session, heuristicIndexerManager));
        } else {
            // all sources are remote
            NodePartitionMap nodePartitionMap = partitioningCache.apply(plan.getFragment().getPartitioning(), stageTaskCounts == null ? null : stageTaskCounts.get(stageId));
            List<InternalNode> partitionToNode = nodePartitionMap.getPartitionToNode();
            // todo this should asynchronously wait a standard timeout period before failing
            checkCondition(!partitionToNode.isEmpty(), NO_NODES_AVAILABLE, "No worker nodes available");
            stageSchedulers.put(stageId, new FixedCountScheduler(stageExecution, partitionToNode));
            bucketToPartition = Optional.of(nodePartitionMap.getBucketToPartition());
        }
    }
    ImmutableSet.Builder<SqlStageExecution> childStagesBuilder = ImmutableSet.builder();
    for (StageExecutionPlan subStagePlan : plan.getSubStages()) {
        if (visitedPlanFrags.contains(subStagePlan.getFragment().getId())) {
            continue;
        }
        visitedPlanFrags.add(subStagePlan.getFragment().getId());
        List<SqlStageExecution> subTree = createStages(stageExecution::addExchangeLocations, nextStageId, locationFactory, subStagePlan.withBucketToPartition(bucketToPartition), nodeScheduler, remoteTaskFactory, session, splitBatchSize, partitioningCache, nodePartitioningManager, queryExecutor, schedulerExecutor, failureDetector, nodeTaskMap, stageSchedulers, stageLinkages, isSnapshotEnabled, snapshotManager, stageTaskCounts);
        localStages.addAll(subTree);
        SqlStageExecution childStage = subTree.get(0);
        childStagesBuilder.add(childStage);
        Optional<RemoteSourceNode> parentNode = plan.getFragment().getRemoteSourceNodes().stream().filter(x -> x.getSourceFragmentIds().contains(childStage.getFragment().getId())).findAny();
        checkArgument(parentNode.isPresent(), "Couldn't find parent of a CTE node");
        childStage.setParentId(parentNode.get().getId());
    }
    Set<SqlStageExecution> childStages = childStagesBuilder.build();
    stageExecution.addStateChangeListener(newState -> {
        if (newState.isDone() && newState != StageState.RESCHEDULING) {
            // Snapshot: For "rescheduling", tasks are already cancelled (for resume)
            childStages.forEach(SqlStageExecution::cancel);
        }
    });
    stageLinkages.put(stageId, new StageLinkage(plan.getFragment().getId(), parent, childStages));
    if (partitioningHandle.equals(SCALED_WRITER_DISTRIBUTION)) {
        Supplier<Collection<TaskStatus>> sourceTasksProvider = () -> childStages.stream().map(SqlStageExecution::getAllTasks).flatMap(Collection::stream).map(RemoteTask::getTaskStatus).collect(toList());
        Supplier<Collection<TaskStatus>> writerTasksProvider = () -> stageExecution.getAllTasks().stream().map(RemoteTask::getTaskStatus).collect(toList());
        ScaledWriterScheduler scheduler = new ScaledWriterScheduler(stageExecution, sourceTasksProvider, writerTasksProvider, nodeScheduler.createNodeSelector(null, keepConsumerOnFeederNodes, feederScheduledNodes), schedulerExecutor, getWriterMinSize(session), isSnapshotEnabled, stageTaskCounts != null ? stageTaskCounts.get(stageId) : null);
        whenAllStages(childStages, StageState::isDone).addListener(scheduler::finish, directExecutor());
        stageSchedulers.put(stageId, scheduler);
    }
    return localStages.build();
}
Also used : CANCELED(io.prestosql.execution.StageState.CANCELED) SCHEDULED(io.prestosql.execution.StageState.SCHEDULED) PlanFragmentId(io.prestosql.sql.planner.plan.PlanFragmentId) NO_NODES_AVAILABLE(io.prestosql.spi.StandardErrorCode.NO_NODES_AVAILABLE) FIXED_BROADCAST_DISTRIBUTION(io.prestosql.sql.planner.SystemPartitioningHandle.FIXED_BROADCAST_DISTRIBUTION) StageExecutionPlan(io.prestosql.sql.planner.StageExecutionPlan) Map(java.util.Map) SystemSessionProperties.getWriterMinSize(io.prestosql.SystemSessionProperties.getWriterMinSize) HeuristicIndexerManager(io.prestosql.heuristicindex.HeuristicIndexerManager) PlanNodeId(io.prestosql.spi.plan.PlanNodeId) TaskStatus(io.prestosql.execution.TaskStatus) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Set(java.util.Set) LocationFactory(io.prestosql.execution.LocationFactory) MILLISECONDS(java.util.concurrent.TimeUnit.MILLISECONDS) MoreExecutors.directExecutor(com.google.common.util.concurrent.MoreExecutors.directExecutor) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) StageState(io.prestosql.execution.StageState) ConnectorPartitionHandle(io.prestosql.spi.connector.ConnectorPartitionHandle) GENERIC_INTERNAL_ERROR(io.prestosql.spi.StandardErrorCode.GENERIC_INTERNAL_ERROR) SetThreadName(io.airlift.concurrent.SetThreadName) Iterables(com.google.common.collect.Iterables) RESUMABLE_FAILURE(io.prestosql.execution.StageState.RESUMABLE_FAILURE) Supplier(java.util.function.Supplier) SCALED_WRITER_DISTRIBUTION(io.prestosql.sql.planner.SystemPartitioningHandle.SCALED_WRITER_DISTRIBUTION) QueryStateMachine(io.prestosql.execution.QueryStateMachine) ArrayList(java.util.ArrayList) CatalogName.isInternalSystemConnector(io.prestosql.spi.connector.CatalogName.isInternalSystemConnector) Session(io.prestosql.Session) ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) NodeTaskMap(io.prestosql.execution.NodeTaskMap) SplitSource(io.prestosql.split.SplitSource) FINISHED(io.prestosql.execution.StageState.FINISHED) StageId(io.prestosql.execution.StageId) InternalNode(io.prestosql.metadata.InternalNode) Sets.newConcurrentHashSet(com.google.common.collect.Sets.newConcurrentHashSet) QuerySnapshotManager(io.prestosql.snapshot.QuerySnapshotManager) ResourceGroupInfo(io.prestosql.server.ResourceGroupInfo) PartitioningHandle(io.prestosql.sql.planner.PartitioningHandle) QueryState(io.prestosql.execution.QueryState) NodePartitionMap(io.prestosql.sql.planner.NodePartitionMap) SqlStageExecution.createSqlStageExecution(io.prestosql.execution.SqlStageExecution.createSqlStageExecution) SqlStageExecution(io.prestosql.execution.SqlStageExecution) ABORTED(io.prestosql.execution.StageState.ABORTED) FailureDetector(io.prestosql.failuredetector.FailureDetector) RemoteTask(io.prestosql.execution.RemoteTask) FAILED(io.prestosql.execution.StageState.FAILED) SystemSessionProperties(io.prestosql.SystemSessionProperties) BiFunction(java.util.function.BiFunction) SettableFuture(com.google.common.util.concurrent.SettableFuture) SourcePartitionedScheduler.newSourcePartitionedSchedulerAsStageScheduler(io.prestosql.execution.scheduler.SourcePartitionedScheduler.newSourcePartitionedSchedulerAsStageScheduler) Duration(io.airlift.units.Duration) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) URI(java.net.URI) Collectors.toSet(java.util.stream.Collectors.toSet) PrestoException(io.prestosql.spi.PrestoException) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableMap(com.google.common.collect.ImmutableMap) Predicate(java.util.function.Predicate) Collection(java.util.Collection) CatalogName(io.prestosql.spi.connector.CatalogName) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) RemoteTaskFactory(io.prestosql.execution.RemoteTaskFactory) UUID(java.util.UUID) String.format(java.lang.String.format) Preconditions.checkState(com.google.common.base.Preconditions.checkState) List(java.util.List) SOURCE_DISTRIBUTION(io.prestosql.sql.planner.SystemPartitioningHandle.SOURCE_DISTRIBUTION) StageInfo(io.prestosql.execution.StageInfo) Entry(java.util.Map.Entry) HttpUriBuilder.uriBuilderFrom(io.airlift.http.client.HttpUriBuilder.uriBuilderFrom) Function.identity(java.util.function.Function.identity) Optional(java.util.Optional) MoreFutures.whenAnyComplete(io.airlift.concurrent.MoreFutures.whenAnyComplete) BasicStageStats(io.prestosql.execution.BasicStageStats) NodePartitioningManager(io.prestosql.sql.planner.NodePartitioningManager) NOT_PARTITIONED(io.prestosql.spi.connector.NotPartitionedPartitionHandle.NOT_PARTITIONED) RUNNING(io.prestosql.execution.StageState.RUNNING) TaskId(io.prestosql.execution.TaskId) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) Logger(io.airlift.log.Logger) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) RemoteSourceNode(io.prestosql.sql.planner.plan.RemoteSourceNode) HashMap(java.util.HashMap) OutputBuffers(io.prestosql.execution.buffer.OutputBuffers) TaskLocation(io.prestosql.operator.TaskLocation) HashSet(java.util.HashSet) SnapshotConfig.calculateTaskCount(io.prestosql.snapshot.SnapshotConfig.calculateTaskCount) ImmutableList(com.google.common.collect.ImmutableList) SystemSessionProperties.isReuseTableScanEnabled(io.prestosql.SystemSessionProperties.isReuseTableScanEnabled) OutputBufferId(io.prestosql.execution.buffer.OutputBuffers.OutputBufferId) Verify.verify(com.google.common.base.Verify.verify) Failures.checkCondition(io.prestosql.util.Failures.checkCondition) Objects.requireNonNull(java.util.Objects.requireNonNull) TimeStat(io.airlift.stats.TimeStat) REPLICATE(io.prestosql.sql.planner.plan.ExchangeNode.Type.REPLICATE) ExecutorService(java.util.concurrent.ExecutorService) Ints(com.google.common.primitives.Ints) DynamicFilterService(io.prestosql.dynamicfilter.DynamicFilterService) MoreFutures.tryGetFutureValue(io.airlift.concurrent.MoreFutures.tryGetFutureValue) SystemSessionProperties.getConcurrentLifespansPerNode(io.prestosql.SystemSessionProperties.getConcurrentLifespansPerNode) Collectors.toList(java.util.stream.Collectors.toList) Collections(java.util.Collections) SECONDS(java.util.concurrent.TimeUnit.SECONDS) BasicStageStats.aggregateBasicStageStats(io.prestosql.execution.BasicStageStats.aggregateBasicStageStats) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ImmutableList(com.google.common.collect.ImmutableList) StageExecutionPlan(io.prestosql.sql.planner.StageExecutionPlan) StageId(io.prestosql.execution.StageId) ArrayList(java.util.ArrayList) SqlStageExecution.createSqlStageExecution(io.prestosql.execution.SqlStageExecution.createSqlStageExecution) SqlStageExecution(io.prestosql.execution.SqlStageExecution) PlanNodeId(io.prestosql.spi.plan.PlanNodeId) RemoteSourceNode(io.prestosql.sql.planner.plan.RemoteSourceNode) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ArrayList(java.util.ArrayList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) Collectors.toList(java.util.stream.Collectors.toList) NodePartitionMap(io.prestosql.sql.planner.NodePartitionMap) Collection(java.util.Collection) CatalogName(io.prestosql.spi.connector.CatalogName) PartitioningHandle(io.prestosql.sql.planner.PartitioningHandle) SplitSource(io.prestosql.split.SplitSource) Map(java.util.Map) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) NodeTaskMap(io.prestosql.execution.NodeTaskMap) NodePartitionMap(io.prestosql.sql.planner.NodePartitionMap) ImmutableMap(com.google.common.collect.ImmutableMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap)

Example 13 with NodeTaskMap

use of io.prestosql.execution.NodeTaskMap in project hetu-core by openlookeng.

the class TestNodeScheduler method testRuseExchangeComputeAssignments.

@Test
public void testRuseExchangeComputeAssignments() {
    setUpNodes();
    Split split = new Split(CONNECTOR_ID, new TestSplitLocallyAccessible(), Lifespan.taskWide());
    Set<Split> splits = ImmutableSet.of(split);
    NodeTaskMap newNodeTaskMap = new NodeTaskMap(new FinalizerService());
    StageId stageId = new StageId(new QueryId("query"), 0);
    UUID uuid = UUID.randomUUID();
    PlanFragment testFragmentProducer = createTableScanPlanFragment("build", ReuseExchangeOperator.STRATEGY.REUSE_STRATEGY_PRODUCER, uuid, 1);
    PlanNodeId tableScanNodeId = new PlanNodeId("plan_id");
    StageExecutionPlan producerStageExecutionPlan = new StageExecutionPlan(testFragmentProducer, ImmutableMap.of(tableScanNodeId, new ConnectorAwareSplitSource(CONNECTOR_ID, createFixedSplitSource(0, TestingSplit::createRemoteSplit))), ImmutableList.of(), ImmutableMap.of(tableScanNodeId, new TableInfo(new QualifiedObjectName("test", TEST_SCHEMA, "test"), TupleDomain.all())));
    SqlStageExecution producerStage = createSqlStageExecution(stageId, new TestSqlTaskManager.MockLocationFactory().createStageLocation(stageId), producerStageExecutionPlan.getFragment(), producerStageExecutionPlan.getTables(), new MockRemoteTaskFactory(remoteTaskExecutor, remoteTaskScheduledExecutor), TEST_SESSION_REUSE, true, newNodeTaskMap, remoteTaskExecutor, new NoOpFailureDetector(), new SplitSchedulerStats(), new DynamicFilterService(new LocalStateStoreProvider(new SeedStoreManager(new FileSystemClientManager()))), new QuerySnapshotManager(stageId.getQueryId(), NOOP_SNAPSHOT_UTILS, TEST_SESSION));
    Map.Entry<InternalNode, Split> producerAssignment = Iterables.getOnlyElement(nodeSelector.computeAssignments(splits, ImmutableList.copyOf(this.taskMap.values()), Optional.of(producerStage)).getAssignments().entries());
    PlanFragment testFragmentConsumer = createTableScanPlanFragment("build", ReuseExchangeOperator.STRATEGY.REUSE_STRATEGY_CONSUMER, uuid, 1);
    StageExecutionPlan consumerStageExecutionPlan = new StageExecutionPlan(testFragmentConsumer, ImmutableMap.of(tableScanNodeId, new ConnectorAwareSplitSource(CONNECTOR_ID, createFixedSplitSource(0, TestingSplit::createRemoteSplit))), ImmutableList.of(), ImmutableMap.of(tableScanNodeId, new TableInfo(new QualifiedObjectName("test", TEST_SCHEMA, "test"), TupleDomain.all())));
    SqlStageExecution stage = createSqlStageExecution(stageId, new TestSqlTaskManager.MockLocationFactory().createStageLocation(stageId), consumerStageExecutionPlan.getFragment(), consumerStageExecutionPlan.getTables(), new MockRemoteTaskFactory(remoteTaskExecutor, remoteTaskScheduledExecutor), TEST_SESSION_REUSE, true, newNodeTaskMap, remoteTaskExecutor, new NoOpFailureDetector(), new SplitSchedulerStats(), new DynamicFilterService(new LocalStateStoreProvider(new SeedStoreManager(new FileSystemClientManager()))), new QuerySnapshotManager(stageId.getQueryId(), NOOP_SNAPSHOT_UTILS, TEST_SESSION));
    Map.Entry<InternalNode, Split> consumerAssignment = Iterables.getOnlyElement(nodeSelector.computeAssignments(splits, ImmutableList.copyOf(this.taskMap.values()), Optional.of(stage)).getAssignments().entries());
    Split producerSplit = producerAssignment.getValue();
    Split consumerSplit = consumerAssignment.getValue();
    SplitKey splitKeyProducer = new SplitKey(producerSplit, producerSplit.getCatalogName().getCatalogName(), TEST_SCHEMA, "test");
    SplitKey splitKeyConsumer = new SplitKey(producerSplit, consumerSplit.getCatalogName().getCatalogName(), TEST_SCHEMA, "test");
    if (splitKeyProducer.equals(splitKeyConsumer)) {
        assertEquals(true, true);
    } else {
        assertEquals(false, true);
    }
}
Also used : NoOpFailureDetector(io.prestosql.failuredetector.NoOpFailureDetector) SplitKey(io.prestosql.execution.SplitKey) StageExecutionPlan(io.prestosql.sql.planner.StageExecutionPlan) StageId(io.prestosql.execution.StageId) TestPhasedExecutionSchedule.createTableScanPlanFragment(io.prestosql.execution.scheduler.TestPhasedExecutionSchedule.createTableScanPlanFragment) PlanFragment(io.prestosql.sql.planner.PlanFragment) ConnectorAwareSplitSource(io.prestosql.split.ConnectorAwareSplitSource) SqlStageExecution.createSqlStageExecution(io.prestosql.execution.SqlStageExecution.createSqlStageExecution) SqlStageExecution(io.prestosql.execution.SqlStageExecution) QuerySnapshotManager(io.prestosql.snapshot.QuerySnapshotManager) PlanNodeId(io.prestosql.spi.plan.PlanNodeId) LocalStateStoreProvider(io.prestosql.statestore.LocalStateStoreProvider) SeedStoreManager(io.prestosql.seedstore.SeedStoreManager) TableInfo(io.prestosql.execution.TableInfo) DynamicFilterService(io.prestosql.dynamicfilter.DynamicFilterService) UUID(java.util.UUID) NodeTaskMap(io.prestosql.execution.NodeTaskMap) QueryId(io.prestosql.spi.QueryId) QualifiedObjectName(io.prestosql.spi.connector.QualifiedObjectName) FileSystemClientManager(io.prestosql.filesystem.FileSystemClientManager) FinalizerService(io.prestosql.util.FinalizerService) InternalNode(io.prestosql.metadata.InternalNode) MockSplit(io.prestosql.MockSplit) ConnectorSplit(io.prestosql.spi.connector.ConnectorSplit) Split(io.prestosql.metadata.Split) TestingSplit(io.prestosql.testing.TestingSplit) SplitCacheMap(io.prestosql.execution.SplitCacheMap) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap) NodeTaskMap(io.prestosql.execution.NodeTaskMap) MockRemoteTaskFactory(io.prestosql.execution.MockRemoteTaskFactory) Test(org.testng.annotations.Test)

Example 14 with NodeTaskMap

use of io.prestosql.execution.NodeTaskMap in project hetu-core by openlookeng.

the class TestNodeScheduler method testSplitCacheAwareScheduling.

@Test
public void testSplitCacheAwareScheduling() {
    setUpNodes();
    PropertyService.setProperty(HetuConstant.SPLIT_CACHE_MAP_ENABLED, true);
    SplitCacheMap splitCacheMap = SplitCacheMap.getInstance();
    QualifiedName tableQN = QualifiedName.of(CONNECTOR_ID.toString(), TEST_SCHEMA, TEST_TABLE);
    MockSplit mock = new MockSplit("hdfs://hacluster/user/hive/warehouse/test_schema.db/test_table/a=23/000000_0", 0, 10, System.currentTimeMillis(), true);
    MockSplit mock2 = new MockSplit("hdfs://hacluster/user/hive/warehouse/test_schema.db/test_table/b=33/000000_0", 0, 10, System.currentTimeMillis(), false);
    MockSplit mock3 = new MockSplit("hdfs://hacluster/user/hive/warehouse/test_schema.db/test_table/a=23/000001_0", 0, 10, System.currentTimeMillis(), true);
    Split split = new Split(CONNECTOR_ID, mock, Lifespan.taskWide());
    Split split2 = new Split(CONNECTOR_ID, mock2, Lifespan.taskWide());
    Split split3 = new Split(CONNECTOR_ID, mock3, Lifespan.taskWide());
    Set<Split> splits = ImmutableSet.of(split, split2, split3);
    assertFalse(splitCacheMap.cacheExists(tableQN));
    Map splitInfoMap = (Map) split.getConnectorSplit().getInfo();
    SplitKey splitKey = new SplitKey(split, split.getCatalogName().getCatalogName(), TEST_SCHEMA, splitInfoMap.get("table").toString());
    assertFalse(splitCacheMap.getCachedNodeId(splitKey).isPresent());
    Map split2InfoMap = (Map) split.getConnectorSplit().getInfo();
    SplitKey split2Key = new SplitKey(split2, split2.getCatalogName().getCatalogName(), TEST_SCHEMA, split2InfoMap.get("table").toString());
    Map split3InfoMap = (Map) split.getConnectorSplit().getInfo();
    SplitKey split3Key = new SplitKey(split3, split3.getCatalogName().getCatalogName(), TEST_SCHEMA, split3InfoMap.get("table").toString());
    TestNetworkTopology topology = new TestNetworkTopology();
    NetworkLocationCache locationCache = new NetworkLocationCache(topology);
    // contents of taskMap indicate the node-task map for the current stage
    NodeSchedulerConfig nodeSchedulerConfig = new NodeSchedulerConfig().setMaxSplitsPerNode(20).setIncludeCoordinator(false).setMaxPendingSplitsPerTask(10);
    NodeScheduler nodeScheduler = new NodeScheduler(locationCache, topology, nodeManager, nodeSchedulerConfig, nodeTaskMap);
    NodeSelector selector = nodeScheduler.createNodeSelector(CONNECTOR_ID, false, null);
    assertTrue(selector instanceof SplitCacheAwareNodeSelector);
    Multimap<InternalNode, Split> assignment1 = selector.computeAssignments(splits, ImmutableList.copyOf(taskMap.values()), Optional.empty()).getAssignments();
    assertEquals(3, assignment1.size());
    // No cache predicates defined, thus the split to worker mapping will not be saved
    assertFalse(splitCacheMap.getCachedNodeId(splitKey).isPresent());
    // Add cache predicate
    ColumnMetadata columnMetadataA = new ColumnMetadata("a", BIGINT);
    TupleDomain<ColumnMetadata> tupleDomainA = TupleDomain.withColumnDomains(ImmutableMap.of(columnMetadataA, Domain.singleValue(BIGINT, 23L)));
    splitCacheMap.addCache(tableQN, tupleDomainA, "a = 23");
    assertFalse(splitCacheMap.getCachedNodeId(splitKey).isPresent());
    Multimap<InternalNode, Split> assignment2 = selector.computeAssignments(splits, ImmutableList.copyOf(taskMap.values()), Optional.empty()).getAssignments();
    // Split will be assigned by default node selector and the mapping cached
    assertTrue(assignment2.containsValue(split));
    assertTrue(assignment2.containsValue(split2));
    assertTrue(assignment2.containsValue(split3));
    assertFalse(splitCacheMap.getCachedNodeId(split2Key).isPresent());
    Multimap<String, Split> nodeIdToSplits = ArrayListMultimap.create();
    assignment2.forEach((node, spl) -> nodeIdToSplits.put(node.getNodeIdentifier(), spl));
    assertTrue(nodeIdToSplits.get(splitCacheMap.getCachedNodeId(splitKey).get()).contains(split));
    assertTrue(nodeIdToSplits.get(splitCacheMap.getCachedNodeId(split3Key).get()).contains(split3));
    // Schedule split again and the same assignments should be returned
    Multimap<InternalNode, Split> assignment3 = selector.computeAssignments(splits, ImmutableList.copyOf(taskMap.values()), Optional.empty()).getAssignments();
    // Split will be assigned by default node selector and the mapping cached
    assertTrue(assignment3.containsValue(split));
    assertTrue(assignment3.containsValue(split2));
    assertTrue(assignment3.containsValue(split3));
    assertFalse(splitCacheMap.getCachedNodeId(split2Key).isPresent());
    Multimap<String, Split> nodeIdToSplits3 = ArrayListMultimap.create();
    assignment3.forEach((node, spl) -> nodeIdToSplits3.put(node.getNodeIdentifier(), spl));
    assertTrue(nodeIdToSplits.get(splitCacheMap.getCachedNodeId(splitKey).get()).contains(split));
    assertTrue(nodeIdToSplits.get(splitCacheMap.getCachedNodeId(split3Key).get()).contains(split3));
}
Also used : SplitCacheMap(io.prestosql.execution.SplitCacheMap) SplitKey(io.prestosql.execution.SplitKey) ColumnMetadata(io.prestosql.spi.connector.ColumnMetadata) QualifiedName(io.prestosql.sql.tree.QualifiedName) MockSplit(io.prestosql.MockSplit) InternalNode(io.prestosql.metadata.InternalNode) MockSplit(io.prestosql.MockSplit) ConnectorSplit(io.prestosql.spi.connector.ConnectorSplit) Split(io.prestosql.metadata.Split) TestingSplit(io.prestosql.testing.TestingSplit) SplitCacheMap(io.prestosql.execution.SplitCacheMap) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap) NodeTaskMap(io.prestosql.execution.NodeTaskMap) Test(org.testng.annotations.Test)

Example 15 with NodeTaskMap

use of io.prestosql.execution.NodeTaskMap in project hetu-core by openlookeng.

the class TestNodeScheduler method testRuseExchangeComputeAssignmentsSplitsNotMatchProdConsumer.

@Test
public void testRuseExchangeComputeAssignmentsSplitsNotMatchProdConsumer() {
    setUpNodes();
    Split split = new Split(CONNECTOR_ID, new TestSplitLocallyAccessible(), Lifespan.taskWide());
    Set<Split> splits = ImmutableSet.of(split);
    NodeTaskMap newNodeTaskMap = new NodeTaskMap(new FinalizerService());
    StageId stageId = new StageId(new QueryId("query"), 0);
    UUID uuid = UUID.randomUUID();
    PlanFragment testFragmentProducer = createTableScanPlanFragment("build", ReuseExchangeOperator.STRATEGY.REUSE_STRATEGY_PRODUCER, uuid, 1);
    PlanNodeId tableScanNodeId = new PlanNodeId("plan_id");
    StageExecutionPlan producerStageExecutionPlan = new StageExecutionPlan(testFragmentProducer, ImmutableMap.of(tableScanNodeId, new ConnectorAwareSplitSource(CONNECTOR_ID, createFixedSplitSource(0, TestingSplit::createRemoteSplit))), ImmutableList.of(), ImmutableMap.of(tableScanNodeId, new TableInfo(new QualifiedObjectName("test", TEST_SCHEMA, "test"), TupleDomain.all())));
    SqlStageExecution producerStage = createSqlStageExecution(stageId, new TestSqlTaskManager.MockLocationFactory().createStageLocation(stageId), producerStageExecutionPlan.getFragment(), producerStageExecutionPlan.getTables(), new MockRemoteTaskFactory(remoteTaskExecutor, remoteTaskScheduledExecutor), TEST_SESSION_REUSE, true, newNodeTaskMap, remoteTaskExecutor, new NoOpFailureDetector(), new SplitSchedulerStats(), new DynamicFilterService(new LocalStateStoreProvider(new SeedStoreManager(new FileSystemClientManager()))), new QuerySnapshotManager(stageId.getQueryId(), NOOP_SNAPSHOT_UTILS, TEST_SESSION));
    nodeSelector.computeAssignments(splits, ImmutableList.copyOf(this.taskMap.values()), Optional.of(producerStage)).getAssignments().entries();
    // Consumer
    Split splitConsumer = new Split(CONNECTOR_ID, new TestSplitLocallyAccessibleDifferentIndex(), Lifespan.taskWide());
    Set<Split> splitConsumers = ImmutableSet.of(splitConsumer);
    PlanFragment testFragmentConsumer = createTableScanPlanFragment("build", ReuseExchangeOperator.STRATEGY.REUSE_STRATEGY_CONSUMER, uuid, 1);
    StageExecutionPlan consumerStageExecutionPlan = new StageExecutionPlan(testFragmentConsumer, ImmutableMap.of(tableScanNodeId, new ConnectorAwareSplitSource(CONNECTOR_ID, createFixedSplitSource(0, TestingSplit::createRemoteSplit))), ImmutableList.of(), ImmutableMap.of(tableScanNodeId, new TableInfo(new QualifiedObjectName("test", TEST_SCHEMA, "test"), TupleDomain.all())));
    SqlStageExecution stage = createSqlStageExecution(stageId, new TestSqlTaskManager.MockLocationFactory().createStageLocation(stageId), consumerStageExecutionPlan.getFragment(), consumerStageExecutionPlan.getTables(), new MockRemoteTaskFactory(remoteTaskExecutor, remoteTaskScheduledExecutor), TEST_SESSION_REUSE, true, newNodeTaskMap, remoteTaskExecutor, new NoOpFailureDetector(), new SplitSchedulerStats(), new DynamicFilterService(new LocalStateStoreProvider(new SeedStoreManager(new FileSystemClientManager()))), new QuerySnapshotManager(stageId.getQueryId(), NOOP_SNAPSHOT_UTILS, TEST_SESSION));
    try {
        nodeSelector.computeAssignments(splitConsumers, ImmutableList.copyOf(this.taskMap.values()), Optional.of(stage)).getAssignments().entries();
    } catch (PrestoException e) {
        assertEquals("Producer & consumer splits are not same", e.getMessage());
        return;
    }
    assertEquals(false, true);
}
Also used : NoOpFailureDetector(io.prestosql.failuredetector.NoOpFailureDetector) StageExecutionPlan(io.prestosql.sql.planner.StageExecutionPlan) StageId(io.prestosql.execution.StageId) PrestoException(io.prestosql.spi.PrestoException) TestPhasedExecutionSchedule.createTableScanPlanFragment(io.prestosql.execution.scheduler.TestPhasedExecutionSchedule.createTableScanPlanFragment) PlanFragment(io.prestosql.sql.planner.PlanFragment) ConnectorAwareSplitSource(io.prestosql.split.ConnectorAwareSplitSource) SqlStageExecution.createSqlStageExecution(io.prestosql.execution.SqlStageExecution.createSqlStageExecution) SqlStageExecution(io.prestosql.execution.SqlStageExecution) QuerySnapshotManager(io.prestosql.snapshot.QuerySnapshotManager) PlanNodeId(io.prestosql.spi.plan.PlanNodeId) LocalStateStoreProvider(io.prestosql.statestore.LocalStateStoreProvider) SeedStoreManager(io.prestosql.seedstore.SeedStoreManager) TableInfo(io.prestosql.execution.TableInfo) DynamicFilterService(io.prestosql.dynamicfilter.DynamicFilterService) UUID(java.util.UUID) NodeTaskMap(io.prestosql.execution.NodeTaskMap) QueryId(io.prestosql.spi.QueryId) QualifiedObjectName(io.prestosql.spi.connector.QualifiedObjectName) FileSystemClientManager(io.prestosql.filesystem.FileSystemClientManager) FinalizerService(io.prestosql.util.FinalizerService) MockSplit(io.prestosql.MockSplit) ConnectorSplit(io.prestosql.spi.connector.ConnectorSplit) Split(io.prestosql.metadata.Split) TestingSplit(io.prestosql.testing.TestingSplit) MockRemoteTaskFactory(io.prestosql.execution.MockRemoteTaskFactory) Test(org.testng.annotations.Test)

Aggregations

NodeTaskMap (io.prestosql.execution.NodeTaskMap)19 SqlStageExecution (io.prestosql.execution.SqlStageExecution)16 Test (org.testng.annotations.Test)15 StageExecutionPlan (io.prestosql.sql.planner.StageExecutionPlan)14 InternalNode (io.prestosql.metadata.InternalNode)11 Split (io.prestosql.metadata.Split)10 PlanNodeId (io.prestosql.spi.plan.PlanNodeId)10 RemoteTask (io.prestosql.execution.RemoteTask)9 TestingSplit (io.prestosql.testing.TestingSplit)9 SourcePartitionedScheduler.newSourcePartitionedSchedulerAsStageScheduler (io.prestosql.execution.scheduler.SourcePartitionedScheduler.newSourcePartitionedSchedulerAsStageScheduler)8 ConnectorSplit (io.prestosql.spi.connector.ConnectorSplit)8 DynamicFilterService (io.prestosql.dynamicfilter.DynamicFilterService)7 MockRemoteTaskFactory (io.prestosql.execution.MockRemoteTaskFactory)7 SqlStageExecution.createSqlStageExecution (io.prestosql.execution.SqlStageExecution.createSqlStageExecution)7 StageId (io.prestosql.execution.StageId)7 TableInfo (io.prestosql.execution.TableInfo)7 QuerySnapshotManager (io.prestosql.snapshot.QuerySnapshotManager)7 QualifiedObjectName (io.prestosql.spi.connector.QualifiedObjectName)7 FinalizerService (io.prestosql.util.FinalizerService)7 SplitKey (io.prestosql.execution.SplitKey)6