use of io.prestosql.spi.HostAddress in project hetu-core by openlookeng.
the class TestNodeScheduler method testTopologyAwareScheduling.
@Test(timeOut = 60 * 1000)
public void testTopologyAwareScheduling() throws Exception {
NodeTaskMap nodeMap = new NodeTaskMap(finalizerService);
InMemoryNodeManager memoryNodeManager = new InMemoryNodeManager();
ImmutableList.Builder<InternalNode> nodeBuilder = ImmutableList.builder();
nodeBuilder.add(new InternalNode("node1", URI.create("http://host1.rack1:11"), NodeVersion.UNKNOWN, false));
nodeBuilder.add(new InternalNode("node2", URI.create("http://host2.rack1:12"), NodeVersion.UNKNOWN, false));
nodeBuilder.add(new InternalNode("node3", URI.create("http://host3.rack2:13"), NodeVersion.UNKNOWN, false));
ImmutableList<InternalNode> nodes = nodeBuilder.build();
memoryNodeManager.addNode(CONNECTOR_ID, nodes);
// contents of taskMap indicate the node-task map for the current stage
Map<InternalNode, RemoteTask> nodeRemoteTaskHashMap = new HashMap<>();
NodeSchedulerConfig nodeSchedulerConfig = new NodeSchedulerConfig().setMaxSplitsPerNode(25).setIncludeCoordinator(false).setNetworkTopology("test").setMaxPendingSplitsPerTask(20);
TestNetworkTopology topology = new TestNetworkTopology();
NetworkLocationCache locationCache = new NetworkLocationCache(topology) {
@Override
public NetworkLocation get(HostAddress host) {
// Bypass the cache for workers, since we only look them up once and they would all be unresolved otherwise
if (host.getHostText().startsWith("host")) {
return topology.locate(host);
} else {
return super.get(host);
}
}
};
NodeScheduler nodeScheduler = new NodeScheduler(locationCache, topology, memoryNodeManager, nodeSchedulerConfig, nodeMap);
NodeSelector selector = nodeScheduler.createNodeSelector(CONNECTOR_ID, false, null);
// Fill up the nodes with non-local data
ImmutableSet.Builder<Split> nonRackLocalBuilder = ImmutableSet.builder();
for (int i = 0; i < (25 + 11) * 3; i++) {
nonRackLocalBuilder.add(new Split(CONNECTOR_ID, new TestSplitRemote(HostAddress.fromParts("data.other_rack", 1)), Lifespan.taskWide()));
}
Set<Split> nonRackLocalSplits = nonRackLocalBuilder.build();
Multimap<InternalNode, Split> assignments = selector.computeAssignments(nonRackLocalSplits, ImmutableList.copyOf(nodeRemoteTaskHashMap.values()), Optional.empty()).getAssignments();
MockRemoteTaskFactory remoteTaskFactory = new MockRemoteTaskFactory(remoteTaskExecutor, remoteTaskScheduledExecutor);
int task = 0;
for (InternalNode node : assignments.keySet()) {
TaskId taskId = new TaskId("test", 1, task);
task++;
MockRemoteTaskFactory.MockRemoteTask remoteTask = remoteTaskFactory.createTableScanTask(taskId, node, ImmutableList.copyOf(assignments.get(node)), nodeMap.createPartitionedSplitCountTracker(node, taskId));
remoteTask.startSplits(25);
nodeMap.addTask(node, remoteTask);
nodeRemoteTaskHashMap.put(node, remoteTask);
}
// Continue assigning to fill up part of the queue
nonRackLocalSplits = Sets.difference(nonRackLocalSplits, new HashSet<>(assignments.values()));
assignments = selector.computeAssignments(nonRackLocalSplits, ImmutableList.copyOf(nodeRemoteTaskHashMap.values()), Optional.empty()).getAssignments();
for (InternalNode node : assignments.keySet()) {
RemoteTask remoteTask = nodeRemoteTaskHashMap.get(node);
remoteTask.addSplits(ImmutableMultimap.<PlanNodeId, Split>builder().putAll(new PlanNodeId("sourceId"), assignments.get(node)).build());
}
nonRackLocalSplits = Sets.difference(nonRackLocalSplits, new HashSet<>(assignments.values()));
// Check that 3 of the splits were rejected, since they're non-local
assertEquals(nonRackLocalSplits.size(), 3);
// Assign rack-local splits
ImmutableSet.Builder<Split> rackLocalSplits = ImmutableSet.builder();
HostAddress dataHost1 = HostAddress.fromParts("data.rack1", 1);
HostAddress dataHost2 = HostAddress.fromParts("data.rack2", 1);
for (int i = 0; i < 6 * 2; i++) {
rackLocalSplits.add(new Split(CONNECTOR_ID, new TestSplitRemote(dataHost1), Lifespan.taskWide()));
}
for (int i = 0; i < 6; i++) {
rackLocalSplits.add(new Split(CONNECTOR_ID, new TestSplitRemote(dataHost2), Lifespan.taskWide()));
}
assignments = selector.computeAssignments(rackLocalSplits.build(), ImmutableList.copyOf(nodeRemoteTaskHashMap.values()), Optional.empty()).getAssignments();
for (InternalNode node : assignments.keySet()) {
RemoteTask remoteTask = nodeRemoteTaskHashMap.get(node);
remoteTask.addSplits(ImmutableMultimap.<PlanNodeId, Split>builder().putAll(new PlanNodeId("sourceId"), assignments.get(node)).build());
}
Set<Split> unassigned = Sets.difference(rackLocalSplits.build(), new HashSet<>(assignments.values()));
// Compute the assignments a second time to account for the fact that some splits may not have been assigned due to asynchronous
// loading of the NetworkLocationCache
boolean cacheRefreshed = false;
while (!cacheRefreshed) {
cacheRefreshed = true;
if (locationCache.get(dataHost1).equals(ROOT_LOCATION)) {
cacheRefreshed = false;
}
if (locationCache.get(dataHost2).equals(ROOT_LOCATION)) {
cacheRefreshed = false;
}
MILLISECONDS.sleep(10);
}
assignments = selector.computeAssignments(unassigned, ImmutableList.copyOf(nodeRemoteTaskHashMap.values()), Optional.empty()).getAssignments();
for (InternalNode node : assignments.keySet()) {
RemoteTask remoteTask = nodeRemoteTaskHashMap.get(node);
remoteTask.addSplits(ImmutableMultimap.<PlanNodeId, Split>builder().putAll(new PlanNodeId("sourceId"), assignments.get(node)).build());
}
unassigned = Sets.difference(unassigned, new HashSet<>(assignments.values()));
assertEquals(unassigned.size(), 3);
int rack1 = 0;
int rack2 = 0;
for (Split split : unassigned) {
String rack = topology.locate(split.getAddresses().get(0)).getSegments().get(0);
switch(rack) {
case "rack1":
rack1++;
break;
case "rack2":
rack2++;
break;
default:
fail();
}
}
assertEquals(rack1, 2);
assertEquals(rack2, 1);
// Assign local splits
ImmutableSet.Builder<Split> localSplits = ImmutableSet.builder();
localSplits.add(new Split(CONNECTOR_ID, new TestSplitRemote(HostAddress.fromParts("host1.rack1", 1)), Lifespan.taskWide()));
localSplits.add(new Split(CONNECTOR_ID, new TestSplitRemote(HostAddress.fromParts("host2.rack1", 1)), Lifespan.taskWide()));
localSplits.add(new Split(CONNECTOR_ID, new TestSplitRemote(HostAddress.fromParts("host3.rack2", 1)), Lifespan.taskWide()));
assignments = selector.computeAssignments(localSplits.build(), ImmutableList.copyOf(nodeRemoteTaskHashMap.values()), Optional.empty()).getAssignments();
assertEquals(assignments.size(), 3);
assertEquals(assignments.keySet().size(), 3);
}
use of io.prestosql.spi.HostAddress in project hetu-core by openlookeng.
the class NodeScheduler method selectExactNodes.
public static List<InternalNode> selectExactNodes(NodeMap nodeMap, List<HostAddress> hosts, boolean includeCoordinator) {
Set<InternalNode> chosen = new LinkedHashSet<>();
for (HostAddress host : hosts) {
nodeMap.getNodesByHostAndPort().get(host).stream().filter(InternalNode::isWorker).forEach(chosen::add);
InetAddress address;
try {
address = host.toInetAddress();
} catch (UnknownHostException e) {
// skip hosts that don't resolve
continue;
}
// consider a split with a host without a port as being accessible by all nodes in that host
if (!host.hasPort()) {
nodeMap.getNodesByHost().get(address).stream().filter(InternalNode::isWorker).forEach(chosen::add);
}
}
// if the chosen set is empty and the host is the coordinator, force pick the coordinator
if (chosen.isEmpty() && !includeCoordinator) {
for (HostAddress host : hosts) {
// In the code below, before calling `chosen::add`, it could have been checked that
// `coordinatorIds.contains(node.getNodeIdentifier())`. But checking the condition isn't necessary
// because every node satisfies it. Otherwise, `chosen` wouldn't have been empty.
chosen.addAll(nodeMap.getNodesByHostAndPort().get(host));
InetAddress address;
try {
address = host.toInetAddress();
} catch (UnknownHostException e) {
// skip hosts that don't resolve
continue;
}
// consider a split with a host without a port as being accessible by all nodes in that host
if (!host.hasPort()) {
chosen.addAll(nodeMap.getNodesByHost().get(address));
}
}
}
return ImmutableList.copyOf(chosen);
}
use of io.prestosql.spi.HostAddress in project hetu-core by openlookeng.
the class InformationSchemaSplitManager method getSplits.
@Override
public ConnectorSplitSource getSplits(ConnectorTransactionHandle transaction, ConnectorSession session, ConnectorTableHandle table, SplitSchedulingStrategy splitSchedulingStrategy) {
List<HostAddress> localAddress = ImmutableList.of(nodeManager.getCurrentNode().getHostAndPort());
ConnectorSplit split = new InformationSchemaSplit(localAddress);
return new FixedSplitSource(ImmutableList.of(split));
}
use of io.prestosql.spi.HostAddress in project hetu-core by openlookeng.
the class SystemSplitManager method getSplits.
@Override
public ConnectorSplitSource getSplits(ConnectorTransactionHandle transaction, ConnectorSession session, ConnectorTableHandle tableHandle, SplitSchedulingStrategy splitSchedulingStrategy) {
SystemTableHandle table = (SystemTableHandle) tableHandle;
TupleDomain<ColumnHandle> constraint = table.getConstraint();
SystemTable systemTable = tables.getSystemTable(session, table.getSchemaTableName()).orElseThrow(() -> new TableNotFoundException(table.getSchemaTableName()));
Distribution tableDistributionMode = systemTable.getDistribution();
if (tableDistributionMode == SINGLE_COORDINATOR) {
HostAddress address = nodeManager.getCurrentNode().getHostAndPort();
ConnectorSplit split = new SystemSplit(address, constraint);
return new FixedSplitSource(ImmutableList.of(split));
}
ImmutableList.Builder<ConnectorSplit> splits = ImmutableList.builder();
ImmutableSet.Builder<InternalNode> nodes = ImmutableSet.builder();
if (tableDistributionMode == ALL_COORDINATORS) {
nodes.addAll(nodeManager.getCoordinators());
} else if (tableDistributionMode == ALL_NODES) {
nodes.addAll(nodeManager.getNodes(ACTIVE));
}
Set<InternalNode> nodeSet = nodes.build();
for (InternalNode node : nodeSet) {
splits.add(new SystemSplit(node.getHostAndPort(), constraint));
}
return new FixedSplitSource(splits.build());
}
use of io.prestosql.spi.HostAddress in project boostkit-bigdata by kunpengcompute.
the class TestHiveSplitSource method testGroupSmallSplit.
@Test
public void testGroupSmallSplit() {
HiveConfig hiveConfig = new HiveConfig();
hiveConfig.setMaxSplitsToGroup(10);
HiveSplitSource hiveSplitSource = HiveSplitSource.allAtOnce(HiveTestUtils.SESSION, "database", "table", 10, 10, new DataSize(1, MEGABYTE), Integer.MAX_VALUE, new TestingHiveSplitLoader(), Executors.newFixedThreadPool(5), new CounterStat(), null, null, null, hiveConfig, HiveStorageFormat.ORC);
List<HostAddress> hostAddress = new ArrayList<>();
hostAddress.add(new HostAddress("vm1", 1));
hostAddress.add(new HostAddress("vm3", 1));
hostAddress.add(new HostAddress("vm2", 1));
for (int i = 0; i < 12; i++) {
hiveSplitSource.addToQueue(new TestSplit(i, hostAddress));
assertEquals(hiveSplitSource.getBufferedInternalSplitCount(), i + 1);
}
List<ConnectorSplit> connectorSplits = getSplits(hiveSplitSource, 100);
List<ConnectorSplit> groupedConnectorSplits = hiveSplitSource.groupSmallSplits(connectorSplits, 1);
assertEquals(groupedConnectorSplits.size(), 3);
List<HiveSplitWrapper> hiveSplitWrappers = new ArrayList<>();
groupedConnectorSplits.forEach(pendingSplit -> hiveSplitWrappers.add((HiveSplitWrapper) pendingSplit));
assertEquals(hiveSplitWrappers.get(0).getSplits().size(), 4);
assertEquals(hiveSplitWrappers.get(1).getSplits().size(), 4);
assertEquals(hiveSplitWrappers.get(2).getSplits().size(), 4);
}
Aggregations