Search in sources :

Example 6 with SplitKey

use of io.prestosql.execution.SplitKey in project hetu-core by openlookeng.

the class SimpleNodeSelector method createConsumerScanNodeAssignment.

private Multimap createConsumerScanNodeAssignment(QualifiedObjectName tableName, Set<Split> splits, Set<SplitKey> splitKeySet, HashMap<SplitKey, InternalNode> splitKeyNodeAssignment) {
    Multimap<InternalNode, Split> assignment = HashMultimap.create();
    for (Split split : splits) {
        Split aSplit;
        if (split.getConnectorSplit().getSplitCount() > 1) {
            aSplit = split.getSplits().get(0);
        } else {
            aSplit = split;
        }
        boolean matched = false;
        SplitKey splitKey = new SplitKey(aSplit, tableName.getCatalogName(), tableName.getSchemaName(), tableName.getObjectName());
        for (Iterator<SplitKey> it = splitKeySet.iterator(); it.hasNext(); ) {
            SplitKey producerSplitKey = it.next();
            if (splitKey.equals(producerSplitKey)) {
                InternalNode node = splitKeyNodeAssignment.get(producerSplitKey);
                assignment.put(node, split);
                matched = true;
                break;
            }
        }
        if (matched == false) {
            log.debug("split not matched: " + aSplit);
            throw new PrestoException(GENERIC_INTERNAL_ERROR, "Producer & consumer splits are not same");
        }
    }
    return assignment;
}
Also used : SplitKey(io.prestosql.execution.SplitKey) PrestoException(io.prestosql.spi.PrestoException) InternalNode(io.prestosql.metadata.InternalNode) Split(io.prestosql.metadata.Split)

Example 7 with SplitKey

use of io.prestosql.execution.SplitKey in project hetu-core by openlookeng.

the class TestNodeScheduler method testRuseExchangeComputeAssignments.

@Test
public void testRuseExchangeComputeAssignments() {
    setUpNodes();
    Split split = new Split(CONNECTOR_ID, new TestSplitLocallyAccessible(), Lifespan.taskWide());
    Set<Split> splits = ImmutableSet.of(split);
    NodeTaskMap newNodeTaskMap = new NodeTaskMap(new FinalizerService());
    StageId stageId = new StageId(new QueryId("query"), 0);
    UUID uuid = UUID.randomUUID();
    PlanFragment testFragmentProducer = createTableScanPlanFragment("build", ReuseExchangeOperator.STRATEGY.REUSE_STRATEGY_PRODUCER, uuid, 1);
    PlanNodeId tableScanNodeId = new PlanNodeId("plan_id");
    StageExecutionPlan producerStageExecutionPlan = new StageExecutionPlan(testFragmentProducer, ImmutableMap.of(tableScanNodeId, new ConnectorAwareSplitSource(CONNECTOR_ID, createFixedSplitSource(0, TestingSplit::createRemoteSplit))), ImmutableList.of(), ImmutableMap.of(tableScanNodeId, new TableInfo(new QualifiedObjectName("test", TEST_SCHEMA, "test"), TupleDomain.all())));
    SqlStageExecution producerStage = createSqlStageExecution(stageId, new TestSqlTaskManager.MockLocationFactory().createStageLocation(stageId), producerStageExecutionPlan.getFragment(), producerStageExecutionPlan.getTables(), new MockRemoteTaskFactory(remoteTaskExecutor, remoteTaskScheduledExecutor), TEST_SESSION_REUSE, true, newNodeTaskMap, remoteTaskExecutor, new NoOpFailureDetector(), new SplitSchedulerStats(), new DynamicFilterService(new LocalStateStoreProvider(new SeedStoreManager(new FileSystemClientManager()))), new QuerySnapshotManager(stageId.getQueryId(), NOOP_SNAPSHOT_UTILS, TEST_SESSION));
    Map.Entry<InternalNode, Split> producerAssignment = Iterables.getOnlyElement(nodeSelector.computeAssignments(splits, ImmutableList.copyOf(this.taskMap.values()), Optional.of(producerStage)).getAssignments().entries());
    PlanFragment testFragmentConsumer = createTableScanPlanFragment("build", ReuseExchangeOperator.STRATEGY.REUSE_STRATEGY_CONSUMER, uuid, 1);
    StageExecutionPlan consumerStageExecutionPlan = new StageExecutionPlan(testFragmentConsumer, ImmutableMap.of(tableScanNodeId, new ConnectorAwareSplitSource(CONNECTOR_ID, createFixedSplitSource(0, TestingSplit::createRemoteSplit))), ImmutableList.of(), ImmutableMap.of(tableScanNodeId, new TableInfo(new QualifiedObjectName("test", TEST_SCHEMA, "test"), TupleDomain.all())));
    SqlStageExecution stage = createSqlStageExecution(stageId, new TestSqlTaskManager.MockLocationFactory().createStageLocation(stageId), consumerStageExecutionPlan.getFragment(), consumerStageExecutionPlan.getTables(), new MockRemoteTaskFactory(remoteTaskExecutor, remoteTaskScheduledExecutor), TEST_SESSION_REUSE, true, newNodeTaskMap, remoteTaskExecutor, new NoOpFailureDetector(), new SplitSchedulerStats(), new DynamicFilterService(new LocalStateStoreProvider(new SeedStoreManager(new FileSystemClientManager()))), new QuerySnapshotManager(stageId.getQueryId(), NOOP_SNAPSHOT_UTILS, TEST_SESSION));
    Map.Entry<InternalNode, Split> consumerAssignment = Iterables.getOnlyElement(nodeSelector.computeAssignments(splits, ImmutableList.copyOf(this.taskMap.values()), Optional.of(stage)).getAssignments().entries());
    Split producerSplit = producerAssignment.getValue();
    Split consumerSplit = consumerAssignment.getValue();
    SplitKey splitKeyProducer = new SplitKey(producerSplit, producerSplit.getCatalogName().getCatalogName(), TEST_SCHEMA, "test");
    SplitKey splitKeyConsumer = new SplitKey(producerSplit, consumerSplit.getCatalogName().getCatalogName(), TEST_SCHEMA, "test");
    if (splitKeyProducer.equals(splitKeyConsumer)) {
        assertEquals(true, true);
    } else {
        assertEquals(false, true);
    }
}
Also used : NoOpFailureDetector(io.prestosql.failuredetector.NoOpFailureDetector) SplitKey(io.prestosql.execution.SplitKey) StageExecutionPlan(io.prestosql.sql.planner.StageExecutionPlan) StageId(io.prestosql.execution.StageId) TestPhasedExecutionSchedule.createTableScanPlanFragment(io.prestosql.execution.scheduler.TestPhasedExecutionSchedule.createTableScanPlanFragment) PlanFragment(io.prestosql.sql.planner.PlanFragment) ConnectorAwareSplitSource(io.prestosql.split.ConnectorAwareSplitSource) SqlStageExecution.createSqlStageExecution(io.prestosql.execution.SqlStageExecution.createSqlStageExecution) SqlStageExecution(io.prestosql.execution.SqlStageExecution) QuerySnapshotManager(io.prestosql.snapshot.QuerySnapshotManager) PlanNodeId(io.prestosql.spi.plan.PlanNodeId) LocalStateStoreProvider(io.prestosql.statestore.LocalStateStoreProvider) SeedStoreManager(io.prestosql.seedstore.SeedStoreManager) TableInfo(io.prestosql.execution.TableInfo) DynamicFilterService(io.prestosql.dynamicfilter.DynamicFilterService) UUID(java.util.UUID) NodeTaskMap(io.prestosql.execution.NodeTaskMap) QueryId(io.prestosql.spi.QueryId) QualifiedObjectName(io.prestosql.spi.connector.QualifiedObjectName) FileSystemClientManager(io.prestosql.filesystem.FileSystemClientManager) FinalizerService(io.prestosql.util.FinalizerService) InternalNode(io.prestosql.metadata.InternalNode) MockSplit(io.prestosql.MockSplit) ConnectorSplit(io.prestosql.spi.connector.ConnectorSplit) Split(io.prestosql.metadata.Split) TestingSplit(io.prestosql.testing.TestingSplit) SplitCacheMap(io.prestosql.execution.SplitCacheMap) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap) NodeTaskMap(io.prestosql.execution.NodeTaskMap) MockRemoteTaskFactory(io.prestosql.execution.MockRemoteTaskFactory) Test(org.testng.annotations.Test)

Example 8 with SplitKey

use of io.prestosql.execution.SplitKey in project hetu-core by openlookeng.

the class TestNodeScheduler method testSplitCacheAwareScheduling.

@Test
public void testSplitCacheAwareScheduling() {
    setUpNodes();
    PropertyService.setProperty(HetuConstant.SPLIT_CACHE_MAP_ENABLED, true);
    SplitCacheMap splitCacheMap = SplitCacheMap.getInstance();
    QualifiedName tableQN = QualifiedName.of(CONNECTOR_ID.toString(), TEST_SCHEMA, TEST_TABLE);
    MockSplit mock = new MockSplit("hdfs://hacluster/user/hive/warehouse/test_schema.db/test_table/a=23/000000_0", 0, 10, System.currentTimeMillis(), true);
    MockSplit mock2 = new MockSplit("hdfs://hacluster/user/hive/warehouse/test_schema.db/test_table/b=33/000000_0", 0, 10, System.currentTimeMillis(), false);
    MockSplit mock3 = new MockSplit("hdfs://hacluster/user/hive/warehouse/test_schema.db/test_table/a=23/000001_0", 0, 10, System.currentTimeMillis(), true);
    Split split = new Split(CONNECTOR_ID, mock, Lifespan.taskWide());
    Split split2 = new Split(CONNECTOR_ID, mock2, Lifespan.taskWide());
    Split split3 = new Split(CONNECTOR_ID, mock3, Lifespan.taskWide());
    Set<Split> splits = ImmutableSet.of(split, split2, split3);
    assertFalse(splitCacheMap.cacheExists(tableQN));
    Map splitInfoMap = (Map) split.getConnectorSplit().getInfo();
    SplitKey splitKey = new SplitKey(split, split.getCatalogName().getCatalogName(), TEST_SCHEMA, splitInfoMap.get("table").toString());
    assertFalse(splitCacheMap.getCachedNodeId(splitKey).isPresent());
    Map split2InfoMap = (Map) split.getConnectorSplit().getInfo();
    SplitKey split2Key = new SplitKey(split2, split2.getCatalogName().getCatalogName(), TEST_SCHEMA, split2InfoMap.get("table").toString());
    Map split3InfoMap = (Map) split.getConnectorSplit().getInfo();
    SplitKey split3Key = new SplitKey(split3, split3.getCatalogName().getCatalogName(), TEST_SCHEMA, split3InfoMap.get("table").toString());
    TestNetworkTopology topology = new TestNetworkTopology();
    NetworkLocationCache locationCache = new NetworkLocationCache(topology);
    // contents of taskMap indicate the node-task map for the current stage
    NodeSchedulerConfig nodeSchedulerConfig = new NodeSchedulerConfig().setMaxSplitsPerNode(20).setIncludeCoordinator(false).setMaxPendingSplitsPerTask(10);
    NodeScheduler nodeScheduler = new NodeScheduler(locationCache, topology, nodeManager, nodeSchedulerConfig, nodeTaskMap);
    NodeSelector selector = nodeScheduler.createNodeSelector(CONNECTOR_ID, false, null);
    assertTrue(selector instanceof SplitCacheAwareNodeSelector);
    Multimap<InternalNode, Split> assignment1 = selector.computeAssignments(splits, ImmutableList.copyOf(taskMap.values()), Optional.empty()).getAssignments();
    assertEquals(3, assignment1.size());
    // No cache predicates defined, thus the split to worker mapping will not be saved
    assertFalse(splitCacheMap.getCachedNodeId(splitKey).isPresent());
    // Add cache predicate
    ColumnMetadata columnMetadataA = new ColumnMetadata("a", BIGINT);
    TupleDomain<ColumnMetadata> tupleDomainA = TupleDomain.withColumnDomains(ImmutableMap.of(columnMetadataA, Domain.singleValue(BIGINT, 23L)));
    splitCacheMap.addCache(tableQN, tupleDomainA, "a = 23");
    assertFalse(splitCacheMap.getCachedNodeId(splitKey).isPresent());
    Multimap<InternalNode, Split> assignment2 = selector.computeAssignments(splits, ImmutableList.copyOf(taskMap.values()), Optional.empty()).getAssignments();
    // Split will be assigned by default node selector and the mapping cached
    assertTrue(assignment2.containsValue(split));
    assertTrue(assignment2.containsValue(split2));
    assertTrue(assignment2.containsValue(split3));
    assertFalse(splitCacheMap.getCachedNodeId(split2Key).isPresent());
    Multimap<String, Split> nodeIdToSplits = ArrayListMultimap.create();
    assignment2.forEach((node, spl) -> nodeIdToSplits.put(node.getNodeIdentifier(), spl));
    assertTrue(nodeIdToSplits.get(splitCacheMap.getCachedNodeId(splitKey).get()).contains(split));
    assertTrue(nodeIdToSplits.get(splitCacheMap.getCachedNodeId(split3Key).get()).contains(split3));
    // Schedule split again and the same assignments should be returned
    Multimap<InternalNode, Split> assignment3 = selector.computeAssignments(splits, ImmutableList.copyOf(taskMap.values()), Optional.empty()).getAssignments();
    // Split will be assigned by default node selector and the mapping cached
    assertTrue(assignment3.containsValue(split));
    assertTrue(assignment3.containsValue(split2));
    assertTrue(assignment3.containsValue(split3));
    assertFalse(splitCacheMap.getCachedNodeId(split2Key).isPresent());
    Multimap<String, Split> nodeIdToSplits3 = ArrayListMultimap.create();
    assignment3.forEach((node, spl) -> nodeIdToSplits3.put(node.getNodeIdentifier(), spl));
    assertTrue(nodeIdToSplits.get(splitCacheMap.getCachedNodeId(splitKey).get()).contains(split));
    assertTrue(nodeIdToSplits.get(splitCacheMap.getCachedNodeId(split3Key).get()).contains(split3));
}
Also used : SplitCacheMap(io.prestosql.execution.SplitCacheMap) SplitKey(io.prestosql.execution.SplitKey) ColumnMetadata(io.prestosql.spi.connector.ColumnMetadata) QualifiedName(io.prestosql.sql.tree.QualifiedName) MockSplit(io.prestosql.MockSplit) InternalNode(io.prestosql.metadata.InternalNode) MockSplit(io.prestosql.MockSplit) ConnectorSplit(io.prestosql.spi.connector.ConnectorSplit) Split(io.prestosql.metadata.Split) TestingSplit(io.prestosql.testing.TestingSplit) SplitCacheMap(io.prestosql.execution.SplitCacheMap) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap) NodeTaskMap(io.prestosql.execution.NodeTaskMap) Test(org.testng.annotations.Test)

Example 9 with SplitKey

use of io.prestosql.execution.SplitKey in project hetu-core by openlookeng.

the class TestHiveIntegrationSmokeTest method testRuseExchangeGroupSplitsMatchingBetweenProducerConsumer.

@Test
public void testRuseExchangeGroupSplitsMatchingBetweenProducerConsumer() {
    setUpNodes();
    NodeTaskMap nodeTasks = new NodeTaskMap(new FinalizerService());
    StageId stageId = new StageId(new QueryId("query"), 0);
    UUID uuid = UUID.randomUUID();
    PlanFragment testFragmentProducer = createTableScanPlanFragment("build", ReuseExchangeOperator.STRATEGY.REUSE_STRATEGY_PRODUCER, uuid, 1);
    PlanNodeId tableScanNodeId = new PlanNodeId("plan_id");
    StageExecutionPlan producerStageExecutionPlan = new StageExecutionPlan(testFragmentProducer, ImmutableMap.of(tableScanNodeId, new ConnectorAwareSplitSource(CONNECTOR_ID, createFixedSplitSource(0, TestingSplit::createRemoteSplit))), ImmutableList.of(), ImmutableMap.of(tableScanNodeId, new TableInfo(new QualifiedObjectName("test", TEST_SCHEMA, "test"), TupleDomain.all())));
    SqlStageExecution producerStage = createSqlStageExecution(stageId, new TestSqlTaskManager.MockLocationFactory().createStageLocation(stageId), producerStageExecutionPlan.getFragment(), producerStageExecutionPlan.getTables(), new MockRemoteTaskFactory(remoteTaskExecutor, remoteTaskScheduledExecutor), TEST_SESSION_REUSE, true, nodeTasks, remoteTaskExecutor, new NoOpFailureDetector(), new SplitSchedulerStats(), new DynamicFilterService(new LocalStateStoreProvider(new SeedStoreManager(new FileSystemClientManager()))), new QuerySnapshotManager(stageId.getQueryId(), NOOP_SNAPSHOT_UTILS, TEST_SESSION));
    Set<Split> splits = createAndGetSplits(10);
    Multimap<InternalNode, Split> producerAssignment = nodeSelector.computeAssignments(splits, ImmutableList.copyOf(taskMap.values()), Optional.of(producerStage)).getAssignments();
    PlanFragment testFragmentConsumer = createTableScanPlanFragment("build", ReuseExchangeOperator.STRATEGY.REUSE_STRATEGY_CONSUMER, uuid, 1);
    StageExecutionPlan consumerStageExecutionPlan = new StageExecutionPlan(testFragmentConsumer, ImmutableMap.of(tableScanNodeId, new ConnectorAwareSplitSource(CONNECTOR_ID, createFixedSplitSource(0, TestingSplit::createRemoteSplit))), ImmutableList.of(), ImmutableMap.of(tableScanNodeId, new TableInfo(new QualifiedObjectName("test", TEST_SCHEMA, "test"), TupleDomain.all())));
    SqlStageExecution stage = createSqlStageExecution(stageId, new TestSqlTaskManager.MockLocationFactory().createStageLocation(stageId), consumerStageExecutionPlan.getFragment(), consumerStageExecutionPlan.getTables(), new MockRemoteTaskFactory(remoteTaskExecutor, remoteTaskScheduledExecutor), TEST_SESSION_REUSE, true, nodeTasks, remoteTaskExecutor, new NoOpFailureDetector(), new SplitSchedulerStats(), new DynamicFilterService(new LocalStateStoreProvider(new SeedStoreManager(new FileSystemClientManager()))), new QuerySnapshotManager(stageId.getQueryId(), NOOP_SNAPSHOT_UTILS, TEST_SESSION));
    Multimap<InternalNode, Split> consumerAssignment = nodeSelector.computeAssignments(splits, ImmutableList.copyOf(taskMap.values()), Optional.of(stage)).getAssignments();
    assertEquals(consumerAssignment.size(), consumerAssignment.size());
    for (InternalNode node : consumerAssignment.keySet()) {
        List<Split> splitList = new ArrayList<>();
        List<Split> splitList2 = new ArrayList<>();
        boolean b = producerAssignment.containsEntry(node, consumerAssignment.get(node));
        Collection<Split> producerSplits = producerAssignment.get(node);
        Collection<Split> consumerSplits = producerAssignment.get(node);
        producerSplits.forEach(s -> splitList.add(s));
        List<Split> splitList1 = splitList.get(0).getSplits();
        consumerSplits.forEach(s -> splitList2.add(s));
        int i = 0;
        for (Split split3 : splitList1) {
            SplitKey splitKey1 = new SplitKey(split3, TEST_CATALOG, TEST_SCHEMA, TEST_TABLE);
            SplitKey splitKey2 = new SplitKey(splitList1.get(i), TEST_CATALOG, TEST_SCHEMA, TEST_TABLE);
            boolean f = splitKey1.equals(splitKey2);
            assertEquals(true, f);
            i++;
        }
    }
}
Also used : NoOpFailureDetector(io.prestosql.failuredetector.NoOpFailureDetector) SplitKey(io.prestosql.execution.SplitKey) StageExecutionPlan(io.prestosql.sql.planner.StageExecutionPlan) StageId(io.prestosql.execution.StageId) ArrayList(java.util.ArrayList) TestPhasedExecutionSchedule.createTableScanPlanFragment(io.prestosql.execution.scheduler.TestPhasedExecutionSchedule.createTableScanPlanFragment) PlanFragment(io.prestosql.sql.planner.PlanFragment) ConnectorAwareSplitSource(io.prestosql.split.ConnectorAwareSplitSource) SqlStageExecution.createSqlStageExecution(io.prestosql.execution.SqlStageExecution.createSqlStageExecution) SqlStageExecution(io.prestosql.execution.SqlStageExecution) QuerySnapshotManager(io.prestosql.snapshot.QuerySnapshotManager) PlanNodeId(io.prestosql.spi.plan.PlanNodeId) LocalStateStoreProvider(io.prestosql.statestore.LocalStateStoreProvider) SeedStoreManager(io.prestosql.seedstore.SeedStoreManager) TableInfo(io.prestosql.execution.TableInfo) DynamicFilterService(io.prestosql.dynamicfilter.DynamicFilterService) UUID(java.util.UUID) NodeTaskMap(io.prestosql.execution.NodeTaskMap) QueryId(io.prestosql.spi.QueryId) QualifiedObjectName(io.prestosql.spi.connector.QualifiedObjectName) ColumnConstraint(io.prestosql.sql.planner.planprinter.IoPlanPrinter.ColumnConstraint) Constraint(io.prestosql.spi.connector.Constraint) FileSystemClientManager(io.prestosql.filesystem.FileSystemClientManager) SplitSchedulerStats(io.prestosql.execution.scheduler.SplitSchedulerStats) FinalizerService(io.prestosql.util.FinalizerService) InternalNode(io.prestosql.metadata.InternalNode) ConnectorSplit(io.prestosql.spi.connector.ConnectorSplit) Split(io.prestosql.metadata.Split) TestingSplit(io.prestosql.testing.TestingSplit) MockRemoteTaskFactory(io.prestosql.execution.MockRemoteTaskFactory) Test(org.testng.annotations.Test) AbstractTestIntegrationSmokeTest(io.prestosql.tests.AbstractTestIntegrationSmokeTest)

Aggregations

SplitKey (io.prestosql.execution.SplitKey)9 InternalNode (io.prestosql.metadata.InternalNode)8 Split (io.prestosql.metadata.Split)8 NodeTaskMap (io.prestosql.execution.NodeTaskMap)7 SqlStageExecution (io.prestosql.execution.SqlStageExecution)5 PlanNodeId (io.prestosql.spi.plan.PlanNodeId)5 HashMap (java.util.HashMap)5 Map (java.util.Map)5 SplitCacheMap (io.prestosql.execution.SplitCacheMap)4 TableInfo (io.prestosql.execution.TableInfo)4 QualifiedObjectName (io.prestosql.spi.connector.QualifiedObjectName)4 DynamicFilterService (io.prestosql.dynamicfilter.DynamicFilterService)3 MockRemoteTaskFactory (io.prestosql.execution.MockRemoteTaskFactory)3 SqlStageExecution.createSqlStageExecution (io.prestosql.execution.SqlStageExecution.createSqlStageExecution)3 StageId (io.prestosql.execution.StageId)3 ConnectorSplit (io.prestosql.spi.connector.ConnectorSplit)3 TestingSplit (io.prestosql.testing.TestingSplit)3 Test (org.testng.annotations.Test)3 Supplier (com.google.common.base.Supplier)2 Suppliers (com.google.common.base.Suppliers)2