Search in sources :

Example 11 with HostAddress

use of io.prestosql.spi.HostAddress in project hetu-core by openlookeng.

the class TestNodeScheduler method testTopologyAwareScheduling.

@Test(timeOut = 60 * 1000)
public void testTopologyAwareScheduling() throws Exception {
    NodeTaskMap nodeMap = new NodeTaskMap(finalizerService);
    InMemoryNodeManager memoryNodeManager = new InMemoryNodeManager();
    ImmutableList.Builder<InternalNode> nodeBuilder = ImmutableList.builder();
    nodeBuilder.add(new InternalNode("node1", URI.create("http://host1.rack1:11"), NodeVersion.UNKNOWN, false));
    nodeBuilder.add(new InternalNode("node2", URI.create("http://host2.rack1:12"), NodeVersion.UNKNOWN, false));
    nodeBuilder.add(new InternalNode("node3", URI.create("http://host3.rack2:13"), NodeVersion.UNKNOWN, false));
    ImmutableList<InternalNode> nodes = nodeBuilder.build();
    memoryNodeManager.addNode(CONNECTOR_ID, nodes);
    // contents of taskMap indicate the node-task map for the current stage
    Map<InternalNode, RemoteTask> nodeRemoteTaskHashMap = new HashMap<>();
    NodeSchedulerConfig nodeSchedulerConfig = new NodeSchedulerConfig().setMaxSplitsPerNode(25).setIncludeCoordinator(false).setNetworkTopology("test").setMaxPendingSplitsPerTask(20);
    TestNetworkTopology topology = new TestNetworkTopology();
    NetworkLocationCache locationCache = new NetworkLocationCache(topology) {

        @Override
        public NetworkLocation get(HostAddress host) {
            // Bypass the cache for workers, since we only look them up once and they would all be unresolved otherwise
            if (host.getHostText().startsWith("host")) {
                return topology.locate(host);
            } else {
                return super.get(host);
            }
        }
    };
    NodeScheduler nodeScheduler = new NodeScheduler(locationCache, topology, memoryNodeManager, nodeSchedulerConfig, nodeMap);
    NodeSelector selector = nodeScheduler.createNodeSelector(CONNECTOR_ID, false, null);
    // Fill up the nodes with non-local data
    ImmutableSet.Builder<Split> nonRackLocalBuilder = ImmutableSet.builder();
    for (int i = 0; i < (25 + 11) * 3; i++) {
        nonRackLocalBuilder.add(new Split(CONNECTOR_ID, new TestSplitRemote(HostAddress.fromParts("data.other_rack", 1)), Lifespan.taskWide()));
    }
    Set<Split> nonRackLocalSplits = nonRackLocalBuilder.build();
    Multimap<InternalNode, Split> assignments = selector.computeAssignments(nonRackLocalSplits, ImmutableList.copyOf(nodeRemoteTaskHashMap.values()), Optional.empty()).getAssignments();
    MockRemoteTaskFactory remoteTaskFactory = new MockRemoteTaskFactory(remoteTaskExecutor, remoteTaskScheduledExecutor);
    int task = 0;
    for (InternalNode node : assignments.keySet()) {
        TaskId taskId = new TaskId("test", 1, task);
        task++;
        MockRemoteTaskFactory.MockRemoteTask remoteTask = remoteTaskFactory.createTableScanTask(taskId, node, ImmutableList.copyOf(assignments.get(node)), nodeMap.createPartitionedSplitCountTracker(node, taskId));
        remoteTask.startSplits(25);
        nodeMap.addTask(node, remoteTask);
        nodeRemoteTaskHashMap.put(node, remoteTask);
    }
    // Continue assigning to fill up part of the queue
    nonRackLocalSplits = Sets.difference(nonRackLocalSplits, new HashSet<>(assignments.values()));
    assignments = selector.computeAssignments(nonRackLocalSplits, ImmutableList.copyOf(nodeRemoteTaskHashMap.values()), Optional.empty()).getAssignments();
    for (InternalNode node : assignments.keySet()) {
        RemoteTask remoteTask = nodeRemoteTaskHashMap.get(node);
        remoteTask.addSplits(ImmutableMultimap.<PlanNodeId, Split>builder().putAll(new PlanNodeId("sourceId"), assignments.get(node)).build());
    }
    nonRackLocalSplits = Sets.difference(nonRackLocalSplits, new HashSet<>(assignments.values()));
    // Check that 3 of the splits were rejected, since they're non-local
    assertEquals(nonRackLocalSplits.size(), 3);
    // Assign rack-local splits
    ImmutableSet.Builder<Split> rackLocalSplits = ImmutableSet.builder();
    HostAddress dataHost1 = HostAddress.fromParts("data.rack1", 1);
    HostAddress dataHost2 = HostAddress.fromParts("data.rack2", 1);
    for (int i = 0; i < 6 * 2; i++) {
        rackLocalSplits.add(new Split(CONNECTOR_ID, new TestSplitRemote(dataHost1), Lifespan.taskWide()));
    }
    for (int i = 0; i < 6; i++) {
        rackLocalSplits.add(new Split(CONNECTOR_ID, new TestSplitRemote(dataHost2), Lifespan.taskWide()));
    }
    assignments = selector.computeAssignments(rackLocalSplits.build(), ImmutableList.copyOf(nodeRemoteTaskHashMap.values()), Optional.empty()).getAssignments();
    for (InternalNode node : assignments.keySet()) {
        RemoteTask remoteTask = nodeRemoteTaskHashMap.get(node);
        remoteTask.addSplits(ImmutableMultimap.<PlanNodeId, Split>builder().putAll(new PlanNodeId("sourceId"), assignments.get(node)).build());
    }
    Set<Split> unassigned = Sets.difference(rackLocalSplits.build(), new HashSet<>(assignments.values()));
    // Compute the assignments a second time to account for the fact that some splits may not have been assigned due to asynchronous
    // loading of the NetworkLocationCache
    boolean cacheRefreshed = false;
    while (!cacheRefreshed) {
        cacheRefreshed = true;
        if (locationCache.get(dataHost1).equals(ROOT_LOCATION)) {
            cacheRefreshed = false;
        }
        if (locationCache.get(dataHost2).equals(ROOT_LOCATION)) {
            cacheRefreshed = false;
        }
        MILLISECONDS.sleep(10);
    }
    assignments = selector.computeAssignments(unassigned, ImmutableList.copyOf(nodeRemoteTaskHashMap.values()), Optional.empty()).getAssignments();
    for (InternalNode node : assignments.keySet()) {
        RemoteTask remoteTask = nodeRemoteTaskHashMap.get(node);
        remoteTask.addSplits(ImmutableMultimap.<PlanNodeId, Split>builder().putAll(new PlanNodeId("sourceId"), assignments.get(node)).build());
    }
    unassigned = Sets.difference(unassigned, new HashSet<>(assignments.values()));
    assertEquals(unassigned.size(), 3);
    int rack1 = 0;
    int rack2 = 0;
    for (Split split : unassigned) {
        String rack = topology.locate(split.getAddresses().get(0)).getSegments().get(0);
        switch(rack) {
            case "rack1":
                rack1++;
                break;
            case "rack2":
                rack2++;
                break;
            default:
                fail();
        }
    }
    assertEquals(rack1, 2);
    assertEquals(rack2, 1);
    // Assign local splits
    ImmutableSet.Builder<Split> localSplits = ImmutableSet.builder();
    localSplits.add(new Split(CONNECTOR_ID, new TestSplitRemote(HostAddress.fromParts("host1.rack1", 1)), Lifespan.taskWide()));
    localSplits.add(new Split(CONNECTOR_ID, new TestSplitRemote(HostAddress.fromParts("host2.rack1", 1)), Lifespan.taskWide()));
    localSplits.add(new Split(CONNECTOR_ID, new TestSplitRemote(HostAddress.fromParts("host3.rack2", 1)), Lifespan.taskWide()));
    assignments = selector.computeAssignments(localSplits.build(), ImmutableList.copyOf(nodeRemoteTaskHashMap.values()), Optional.empty()).getAssignments();
    assertEquals(assignments.size(), 3);
    assertEquals(assignments.keySet().size(), 3);
}
Also used : TaskId(io.prestosql.execution.TaskId) HashMap(java.util.HashMap) ImmutableList(com.google.common.collect.ImmutableList) HostAddress(io.prestosql.spi.HostAddress) PlanNodeId(io.prestosql.spi.plan.PlanNodeId) ImmutableSet(com.google.common.collect.ImmutableSet) HashSet(java.util.HashSet) LinkedHashSet(java.util.LinkedHashSet) NodeTaskMap(io.prestosql.execution.NodeTaskMap) RemoteTask(io.prestosql.execution.RemoteTask) InMemoryNodeManager(io.prestosql.metadata.InMemoryNodeManager) InternalNode(io.prestosql.metadata.InternalNode) MockSplit(io.prestosql.MockSplit) ConnectorSplit(io.prestosql.spi.connector.ConnectorSplit) Split(io.prestosql.metadata.Split) TestingSplit(io.prestosql.testing.TestingSplit) MockRemoteTaskFactory(io.prestosql.execution.MockRemoteTaskFactory) Test(org.testng.annotations.Test)

Example 12 with HostAddress

use of io.prestosql.spi.HostAddress in project hetu-core by openlookeng.

the class NodeScheduler method selectExactNodes.

public static List<InternalNode> selectExactNodes(NodeMap nodeMap, List<HostAddress> hosts, boolean includeCoordinator) {
    Set<InternalNode> chosen = new LinkedHashSet<>();
    for (HostAddress host : hosts) {
        nodeMap.getNodesByHostAndPort().get(host).stream().filter(InternalNode::isWorker).forEach(chosen::add);
        InetAddress address;
        try {
            address = host.toInetAddress();
        } catch (UnknownHostException e) {
            // skip hosts that don't resolve
            continue;
        }
        // consider a split with a host without a port as being accessible by all nodes in that host
        if (!host.hasPort()) {
            nodeMap.getNodesByHost().get(address).stream().filter(InternalNode::isWorker).forEach(chosen::add);
        }
    }
    // if the chosen set is empty and the host is the coordinator, force pick the coordinator
    if (chosen.isEmpty() && !includeCoordinator) {
        for (HostAddress host : hosts) {
            // In the code below, before calling `chosen::add`, it could have been checked that
            // `coordinatorIds.contains(node.getNodeIdentifier())`. But checking the condition isn't necessary
            // because every node satisfies it. Otherwise, `chosen` wouldn't have been empty.
            chosen.addAll(nodeMap.getNodesByHostAndPort().get(host));
            InetAddress address;
            try {
                address = host.toInetAddress();
            } catch (UnknownHostException e) {
                // skip hosts that don't resolve
                continue;
            }
            // consider a split with a host without a port as being accessible by all nodes in that host
            if (!host.hasPort()) {
                chosen.addAll(nodeMap.getNodesByHost().get(address));
            }
        }
    }
    return ImmutableList.copyOf(chosen);
}
Also used : LinkedHashSet(java.util.LinkedHashSet) UnknownHostException(java.net.UnknownHostException) InternalNode(io.prestosql.metadata.InternalNode) HostAddress(io.prestosql.spi.HostAddress) InetAddress(java.net.InetAddress)

Example 13 with HostAddress

use of io.prestosql.spi.HostAddress in project hetu-core by openlookeng.

the class InformationSchemaSplitManager method getSplits.

@Override
public ConnectorSplitSource getSplits(ConnectorTransactionHandle transaction, ConnectorSession session, ConnectorTableHandle table, SplitSchedulingStrategy splitSchedulingStrategy) {
    List<HostAddress> localAddress = ImmutableList.of(nodeManager.getCurrentNode().getHostAndPort());
    ConnectorSplit split = new InformationSchemaSplit(localAddress);
    return new FixedSplitSource(ImmutableList.of(split));
}
Also used : FixedSplitSource(io.prestosql.spi.connector.FixedSplitSource) HostAddress(io.prestosql.spi.HostAddress) ConnectorSplit(io.prestosql.spi.connector.ConnectorSplit)

Example 14 with HostAddress

use of io.prestosql.spi.HostAddress in project hetu-core by openlookeng.

the class SystemSplitManager method getSplits.

@Override
public ConnectorSplitSource getSplits(ConnectorTransactionHandle transaction, ConnectorSession session, ConnectorTableHandle tableHandle, SplitSchedulingStrategy splitSchedulingStrategy) {
    SystemTableHandle table = (SystemTableHandle) tableHandle;
    TupleDomain<ColumnHandle> constraint = table.getConstraint();
    SystemTable systemTable = tables.getSystemTable(session, table.getSchemaTableName()).orElseThrow(() -> new TableNotFoundException(table.getSchemaTableName()));
    Distribution tableDistributionMode = systemTable.getDistribution();
    if (tableDistributionMode == SINGLE_COORDINATOR) {
        HostAddress address = nodeManager.getCurrentNode().getHostAndPort();
        ConnectorSplit split = new SystemSplit(address, constraint);
        return new FixedSplitSource(ImmutableList.of(split));
    }
    ImmutableList.Builder<ConnectorSplit> splits = ImmutableList.builder();
    ImmutableSet.Builder<InternalNode> nodes = ImmutableSet.builder();
    if (tableDistributionMode == ALL_COORDINATORS) {
        nodes.addAll(nodeManager.getCoordinators());
    } else if (tableDistributionMode == ALL_NODES) {
        nodes.addAll(nodeManager.getNodes(ACTIVE));
    }
    Set<InternalNode> nodeSet = nodes.build();
    for (InternalNode node : nodeSet) {
        splits.add(new SystemSplit(node.getHostAndPort(), constraint));
    }
    return new FixedSplitSource(splits.build());
}
Also used : ColumnHandle(io.prestosql.spi.connector.ColumnHandle) ImmutableList(com.google.common.collect.ImmutableList) HostAddress(io.prestosql.spi.HostAddress) TableNotFoundException(io.prestosql.spi.connector.TableNotFoundException) ImmutableSet(com.google.common.collect.ImmutableSet) FixedSplitSource(io.prestosql.spi.connector.FixedSplitSource) Distribution(io.prestosql.spi.connector.SystemTable.Distribution) SystemTable(io.prestosql.spi.connector.SystemTable) InternalNode(io.prestosql.metadata.InternalNode) ConnectorSplit(io.prestosql.spi.connector.ConnectorSplit)

Example 15 with HostAddress

use of io.prestosql.spi.HostAddress in project boostkit-bigdata by kunpengcompute.

the class TestHiveSplitSource method testGroupSmallSplit.

@Test
public void testGroupSmallSplit() {
    HiveConfig hiveConfig = new HiveConfig();
    hiveConfig.setMaxSplitsToGroup(10);
    HiveSplitSource hiveSplitSource = HiveSplitSource.allAtOnce(HiveTestUtils.SESSION, "database", "table", 10, 10, new DataSize(1, MEGABYTE), Integer.MAX_VALUE, new TestingHiveSplitLoader(), Executors.newFixedThreadPool(5), new CounterStat(), null, null, null, hiveConfig, HiveStorageFormat.ORC);
    List<HostAddress> hostAddress = new ArrayList<>();
    hostAddress.add(new HostAddress("vm1", 1));
    hostAddress.add(new HostAddress("vm3", 1));
    hostAddress.add(new HostAddress("vm2", 1));
    for (int i = 0; i < 12; i++) {
        hiveSplitSource.addToQueue(new TestSplit(i, hostAddress));
        assertEquals(hiveSplitSource.getBufferedInternalSplitCount(), i + 1);
    }
    List<ConnectorSplit> connectorSplits = getSplits(hiveSplitSource, 100);
    List<ConnectorSplit> groupedConnectorSplits = hiveSplitSource.groupSmallSplits(connectorSplits, 1);
    assertEquals(groupedConnectorSplits.size(), 3);
    List<HiveSplitWrapper> hiveSplitWrappers = new ArrayList<>();
    groupedConnectorSplits.forEach(pendingSplit -> hiveSplitWrappers.add((HiveSplitWrapper) pendingSplit));
    assertEquals(hiveSplitWrappers.get(0).getSplits().size(), 4);
    assertEquals(hiveSplitWrappers.get(1).getSplits().size(), 4);
    assertEquals(hiveSplitWrappers.get(2).getSplits().size(), 4);
}
Also used : CounterStat(io.airlift.stats.CounterStat) ArrayList(java.util.ArrayList) HostAddress(io.prestosql.spi.HostAddress) DataSize(io.airlift.units.DataSize) ConnectorSplit(io.prestosql.spi.connector.ConnectorSplit) Test(org.testng.annotations.Test)

Aggregations

HostAddress (io.prestosql.spi.HostAddress)38 ConnectorSplit (io.prestosql.spi.connector.ConnectorSplit)25 Test (org.testng.annotations.Test)25 ArrayList (java.util.ArrayList)23 CounterStat (io.airlift.stats.CounterStat)18 DataSize (io.airlift.units.DataSize)18 ImmutableList (com.google.common.collect.ImmutableList)5 InternalNode (io.prestosql.metadata.InternalNode)5 FixedSplitSource (io.prestosql.spi.connector.FixedSplitSource)5 HashMap (java.util.HashMap)5 List (java.util.List)5 HBaseSplit (io.hetu.core.plugin.hbase.split.HBaseSplit)3 PrestoException (io.prestosql.spi.PrestoException)3 ImmutableSet (com.google.common.collect.ImmutableSet)2 HBaseColumnHandle (io.hetu.core.plugin.hbase.connector.HBaseColumnHandle)2 HBaseTableHandle (io.hetu.core.plugin.hbase.connector.HBaseTableHandle)2 HBaseRecordSet (io.hetu.core.plugin.hbase.query.HBaseRecordSet)2 Split (io.prestosql.metadata.Split)2 Node (io.prestosql.spi.Node)2 ColumnHandle (io.prestosql.spi.connector.ColumnHandle)2