Search in sources :

Example 6 with SplitCacheMap

use of io.prestosql.execution.SplitCacheMap in project hetu-core by openlookeng.

the class TestOrcCache method testDropCacheWithPredicates.

@Test
public void testDropCacheWithPredicates() {
    SplitCacheMap splitCacheMap = SplitCacheMap.getInstance();
    assertQuerySucceeds("CACHE TABLE test_drop_cache_3 WHERE p1 = 1");
    assertTrue(splitCacheMap.tableCacheInfoMap().get("hive.tpch.test_drop_cache_3").showPredicates().contains("(p1 = 1)"));
    assertQuerySucceeds("CACHE TABLE test_drop_cache_4 WHERE p3 = 3");
    assertTrue(splitCacheMap.tableCacheInfoMap().get("hive.tpch.test_drop_cache_4").showPredicates().contains("(p3 = 3)"));
    assertQuerySucceeds("CACHE TABLE test_drop_cache_4 WHERE p3 = 4");
    assertTrue(splitCacheMap.tableCacheInfoMap().get("hive.tpch.test_drop_cache_4").showPredicates().contains("(p3 = 4)"));
    assertQuerySucceeds("CACHE TABLE test_drop_cache_3 WHERE p2 = 2");
    assertTrue(splitCacheMap.tableCacheInfoMap().get("hive.tpch.test_drop_cache_3").showPredicates().contains("(p2 = 2)"));
    assertQuerySucceeds("DROP CACHE test_drop_cache_3 WHERE p1 = 1");
    assertFalse(splitCacheMap.tableCacheInfoMap().get("hive.tpch.test_drop_cache_3").showPredicates().contains("(p1 = 1)"));
    assertTrue(splitCacheMap.tableCacheInfoMap().get("hive.tpch.test_drop_cache_3").showPredicates().contains("(p2 = 2)"));
    assertQuerySucceeds("DROP CACHE test_drop_cache_4 WHERE p3 = 4");
    assertTrue(splitCacheMap.tableCacheInfoMap().get("hive.tpch.test_drop_cache_4").showPredicates().contains("(p3 = 3)"));
    assertFalse(splitCacheMap.tableCacheInfoMap().get("hive.tpch.test_drop_cache_4").showPredicates().contains("(p3 = 4)"));
}
Also used : SplitCacheMap(io.prestosql.execution.SplitCacheMap) Test(org.testng.annotations.Test)

Example 7 with SplitCacheMap

use of io.prestosql.execution.SplitCacheMap in project hetu-core by openlookeng.

the class SplitCacheAwareNodeSelector method computeAssignments.

@Override
public SplitPlacementResult computeAssignments(Set<Split> splits, List<RemoteTask> existingTasks, Optional<SqlStageExecution> stage) {
    Multimap<InternalNode, Split> assignment = HashMultimap.create();
    NodeMap nodeMapSlice = this.nodeMap.get().get();
    Map<CatalogName, Map<String, InternalNode>> activeNodesByCatalog = new HashMap<>();
    NodeAssignmentStats assignmentStats = new NodeAssignmentStats(nodeTaskMap, nodeMapSlice, existingTasks);
    Set<Split> uncacheableSplits = new HashSet<>();
    Set<Split> newCacheableSplits = new HashSet<>();
    SplitCacheMap splitCacheMap = SplitCacheMap.getInstance();
    for (Split split : splits) {
        Optional<String> assignedNodeId = Optional.empty();
        SplitKey splitKey = createSplitKey(split);
        if (splitKey != null) {
            assignedNodeId = splitCacheMap.getCachedNodeId(splitKey);
        }
        if (!split.getConnectorSplit().isCacheable() || splitKey == null) {
            // uncacheable splits will be scheduled using default node selector
            uncacheableSplits.add(split);
            continue;
        }
        Map<String, InternalNode> activeNodes = activeNodesByCatalog.computeIfAbsent(split.getCatalogName(), catalogName -> nodeManager.getActiveConnectorNodes(catalogName).stream().collect(Collectors.toMap(InternalNode::getNodeIdentifier, Function.identity())));
        InternalNode assignedNode = assignedNodeId.map(activeNodes::get).orElse(null);
        // check if a node has been assigned and ensure it is still active before scheduling
        if (assignedNode != null) {
            // split has been previously assigned to a node
            // assign the split to the same node as before
            assignment.put(assignedNode, split);
            assignmentStats.addAssignedSplit(assignedNode);
        } else {
            // splits that have not be previously cached or the assigned node is now inactive
            newCacheableSplits.add(split);
        }
    }
    log.info("%d out of %d splits already cached. %d new splits to be cached. %d splits cannot be cached.", assignment.size(), splits.size(), newCacheableSplits.size(), uncacheableSplits.size());
    Set<Split> unassignedSplits = new HashSet<>();
    unassignedSplits.addAll(newCacheableSplits);
    unassignedSplits.addAll(uncacheableSplits);
    // Compute split assignments for splits that cannot be cached, newly cacheable, and already cached but cached worker is inactive now.
    SplitPlacementResult defaultSplitPlacementResult = defaultNodeSelector.computeAssignments(unassignedSplits, existingTasks, stage);
    defaultSplitPlacementResult.getAssignments().forEach(((internalNode, split) -> {
        // Set or Update cached node id only if split is cacheable
        if (newCacheableSplits.contains(split)) {
            SplitKey splitKey = createSplitKey(split);
            if (splitKey != null) {
                splitCacheMap.addCachedNode(splitKey, internalNode.getNodeIdentifier());
            }
        }
        assignmentStats.addAssignedSplit(internalNode);
    }));
    assignment.putAll(defaultSplitPlacementResult.getAssignments());
    // Check if its CTE node and its feeder
    if (stage.isPresent() && stage.get().getFragment().getFeederCTEId().isPresent()) {
        updateFeederNodeAndSplitCount(stage.get(), assignment);
    }
    return new SplitPlacementResult(defaultSplitPlacementResult.getBlocked(), assignment);
}
Also used : SplitCacheMap(io.prestosql.execution.SplitCacheMap) NodeScheduler.randomizedNodes(io.prestosql.execution.scheduler.NodeScheduler.randomizedNodes) Logger(io.airlift.log.Logger) Supplier(com.google.common.base.Supplier) HashMap(java.util.HashMap) Split(io.prestosql.metadata.Split) Multimap(com.google.common.collect.Multimap) AtomicReference(java.util.concurrent.atomic.AtomicReference) Function(java.util.function.Function) SplitCacheMap(io.prestosql.execution.SplitCacheMap) HashSet(java.util.HashSet) HashMultimap(com.google.common.collect.HashMultimap) ImmutableList(com.google.common.collect.ImmutableList) Map(java.util.Map) Objects.requireNonNull(java.util.Objects.requireNonNull) Suppliers(com.google.common.base.Suppliers) NodeTaskMap(io.prestosql.execution.NodeTaskMap) InternalNodeManager(io.prestosql.metadata.InternalNodeManager) PlanNodeId(io.prestosql.spi.plan.PlanNodeId) InternalNode(io.prestosql.metadata.InternalNode) CatalogName(io.prestosql.spi.connector.CatalogName) Set(java.util.Set) Collectors(java.util.stream.Collectors) NodeScheduler.selectNodes(io.prestosql.execution.scheduler.NodeScheduler.selectNodes) List(java.util.List) SplitKey(io.prestosql.execution.SplitKey) Optional(java.util.Optional) NodeScheduler.selectDistributionNodes(io.prestosql.execution.scheduler.NodeScheduler.selectDistributionNodes) SqlStageExecution(io.prestosql.execution.SqlStageExecution) RemoteTask(io.prestosql.execution.RemoteTask) SplitKey(io.prestosql.execution.SplitKey) HashMap(java.util.HashMap) CatalogName(io.prestosql.spi.connector.CatalogName) InternalNode(io.prestosql.metadata.InternalNode) Split(io.prestosql.metadata.Split) HashMap(java.util.HashMap) SplitCacheMap(io.prestosql.execution.SplitCacheMap) Map(java.util.Map) NodeTaskMap(io.prestosql.execution.NodeTaskMap) HashSet(java.util.HashSet)

Example 8 with SplitCacheMap

use of io.prestosql.execution.SplitCacheMap in project boostkit-bigdata by kunpengcompute.

the class TestOrcCache method testCacheTableWithComplexPredicate.

@Test
public void testCacheTableWithComplexPredicate() {
    SplitCacheMap splitCacheMap = SplitCacheMap.getInstance();
    assertQuerySucceeds("CACHE TABLE employee WHERE dob BETWEEN DATE '1980-01-01' AND DATE '2000-01-01' AND perf < DOUBLE '9.0'");
    assertQueryOrdered("SELECT * FROM employee WHERE dob BETWEEN DATE '1980-01-01' AND DATE '2000-01-01' AND perf < DOUBLE '9.0' ORDER BY id", "VALUES (0, 'Alice', '1995-10-09', 8.0), (4, 'Lenard', '1980-06-24', 8.8)");
    assertTrue(splitCacheMap.tableCacheInfoMap().get("hive.tpch.employee").showPredicates().contains("((dob BETWEEN DATE '1980-01-01' AND DATE '2000-01-01') AND (perf < DOUBLE '9.0'))"));
    assertQuerySucceeds("CACHE TABLE employee WHERE dob < DATE '1980-01-01' AND perf < DOUBLE '8.0'");
    assertQueryOrdered("SELECT * FROM employee WHERE dob < DATE '1980-01-01' AND perf < DOUBLE '8.0' ORDER BY id", "VALUES (6, 'Trump', '1945-08-15', 2.5)");
    assertTrue(splitCacheMap.tableCacheInfoMap().get("hive.tpch.employee").showPredicates().contains("((dob < DATE '1980-01-01') AND (perf < DOUBLE '8.0'))"));
}
Also used : SplitCacheMap(io.prestosql.execution.SplitCacheMap) Test(org.testng.annotations.Test)

Example 9 with SplitCacheMap

use of io.prestosql.execution.SplitCacheMap in project boostkit-bigdata by kunpengcompute.

the class TestOrcCache method testDropCacheWithPredicates.

@Test
public void testDropCacheWithPredicates() {
    SplitCacheMap splitCacheMap = SplitCacheMap.getInstance();
    assertQuerySucceeds("CACHE TABLE test_drop_cache_3 WHERE p1 = 1");
    assertTrue(splitCacheMap.tableCacheInfoMap().get("hive.tpch.test_drop_cache_3").showPredicates().contains("(p1 = 1)"));
    assertQuerySucceeds("CACHE TABLE test_drop_cache_4 WHERE p3 = 3");
    assertTrue(splitCacheMap.tableCacheInfoMap().get("hive.tpch.test_drop_cache_4").showPredicates().contains("(p3 = 3)"));
    assertQuerySucceeds("CACHE TABLE test_drop_cache_4 WHERE p3 = 4");
    assertTrue(splitCacheMap.tableCacheInfoMap().get("hive.tpch.test_drop_cache_4").showPredicates().contains("(p3 = 4)"));
    assertQuerySucceeds("CACHE TABLE test_drop_cache_3 WHERE p2 = 2");
    assertTrue(splitCacheMap.tableCacheInfoMap().get("hive.tpch.test_drop_cache_3").showPredicates().contains("(p2 = 2)"));
    assertQuerySucceeds("DROP CACHE test_drop_cache_3 WHERE p1 = 1");
    assertFalse(splitCacheMap.tableCacheInfoMap().get("hive.tpch.test_drop_cache_3").showPredicates().contains("(p1 = 1)"));
    assertTrue(splitCacheMap.tableCacheInfoMap().get("hive.tpch.test_drop_cache_3").showPredicates().contains("(p2 = 2)"));
    assertQuerySucceeds("DROP CACHE test_drop_cache_4 WHERE p3 = 4");
    assertTrue(splitCacheMap.tableCacheInfoMap().get("hive.tpch.test_drop_cache_4").showPredicates().contains("(p3 = 3)"));
    assertFalse(splitCacheMap.tableCacheInfoMap().get("hive.tpch.test_drop_cache_4").showPredicates().contains("(p3 = 4)"));
}
Also used : SplitCacheMap(io.prestosql.execution.SplitCacheMap) Test(org.testng.annotations.Test)

Example 10 with SplitCacheMap

use of io.prestosql.execution.SplitCacheMap in project boostkit-bigdata by kunpengcompute.

the class TestOrcCache method testShowCache.

@Test
public void testShowCache() {
    SplitCacheMap splitCacheMap = SplitCacheMap.getInstance();
    assertQuerySucceeds("CACHE TABLE employee WHERE dob BETWEEN DATE '1980-01-01' AND DATE '2000-01-01'");
    assertTrue(splitCacheMap.tableCacheInfoMap().get("hive.tpch.employee").showPredicates().contains("(dob BETWEEN DATE '1980-01-01' AND DATE '2000-01-01')"));
    assertQuerySucceeds("CACHE TABLE employee WHERE perf > DOUBLE '9.0'");
    assertTrue(splitCacheMap.tableCacheInfoMap().get("hive.tpch.employee").showPredicates().contains("(dob BETWEEN DATE '1980-01-01' AND DATE '2000-01-01')"));
    assertTrue(splitCacheMap.tableCacheInfoMap().get("hive.tpch.employee").showPredicates().contains("(perf > DOUBLE '9.0')"));
}
Also used : SplitCacheMap(io.prestosql.execution.SplitCacheMap) Test(org.testng.annotations.Test)

Aggregations

SplitCacheMap (io.prestosql.execution.SplitCacheMap)20 Test (org.testng.annotations.Test)19 NodeTaskMap (io.prestosql.execution.NodeTaskMap)2 SplitKey (io.prestosql.execution.SplitKey)2 InternalNode (io.prestosql.metadata.InternalNode)2 Split (io.prestosql.metadata.Split)2 HashMap (java.util.HashMap)2 Map (java.util.Map)2 Supplier (com.google.common.base.Supplier)1 Suppliers (com.google.common.base.Suppliers)1 HashMultimap (com.google.common.collect.HashMultimap)1 ImmutableList (com.google.common.collect.ImmutableList)1 ImmutableMap (com.google.common.collect.ImmutableMap)1 Multimap (com.google.common.collect.Multimap)1 Logger (io.airlift.log.Logger)1 MockSplit (io.prestosql.MockSplit)1 RemoteTask (io.prestosql.execution.RemoteTask)1 SqlStageExecution (io.prestosql.execution.SqlStageExecution)1 NodeScheduler.randomizedNodes (io.prestosql.execution.scheduler.NodeScheduler.randomizedNodes)1 NodeScheduler.selectDistributionNodes (io.prestosql.execution.scheduler.NodeScheduler.selectDistributionNodes)1