use of io.prestosql.spi.HostAddress in project hetu-core by openlookeng.
the class TestHiveSplitSource method testGroupSmallSplitAlternativeFileSize.
@Test
public void testGroupSmallSplitAlternativeFileSize() {
// alternative big and small size total 100 files
HiveConfig hiveConfig = new HiveConfig();
hiveConfig.setMaxSplitsToGroup(100);
HiveSplitSource hiveSplitSource = HiveSplitSource.allAtOnce(HiveTestUtils.SESSION, "database", "table", 10, 10, new DataSize(1, MEGABYTE), Integer.MAX_VALUE, new TestingHiveSplitLoader(), Executors.newFixedThreadPool(5), new CounterStat(), null, null, null, hiveConfig, HiveStorageFormat.ORC);
for (int i = 0; i < 100; i++) {
List<HostAddress> hostAddress = new ArrayList<>();
hostAddress.add(new HostAddress("vm1", 1));
hiveSplitSource.addToQueue(new TestSplit(i, OptionalInt.empty(), 67108864 / (((i + 1) % 2) + 1), hostAddress));
}
List<ConnectorSplit> connectorSplits = getSplits(hiveSplitSource, 100);
List<ConnectorSplit> groupedConnectorSplits = hiveSplitSource.groupSmallSplits(connectorSplits, 1);
List<HiveSplitWrapper> hiveSplitWrappers = new ArrayList<>();
groupedConnectorSplits.forEach(pendingSplit -> hiveSplitWrappers.add((HiveSplitWrapper) pendingSplit));
System.out.println("hiveSplitWrappers.get(i).getSplits().size() " + groupedConnectorSplits.size());
for (int i = 0; i < 50; i++) {
assertEquals(hiveSplitWrappers.get(i).getSplits().size(), 1);
}
for (int i = 50; i < groupedConnectorSplits.size(); i++) {
System.out.println(hiveSplitWrappers.get(i).getSplits().size());
assertEquals(hiveSplitWrappers.get(i).getSplits().size(), 2);
}
}
use of io.prestosql.spi.HostAddress in project hetu-core by openlookeng.
the class KafkaSplitManager method getSplits.
@Override
public ConnectorSplitSource getSplits(ConnectorTransactionHandle transaction, ConnectorSession session, ConnectorTableHandle table, SplitSchedulingStrategy splitSchedulingStrategy) {
KafkaTableHandle kafkaTableHandle = (KafkaTableHandle) table;
try {
SimpleConsumer simpleConsumer = consumerManager.getConsumer(selectRandom(nodes));
TopicMetadataRequest topicMetadataRequest = new TopicMetadataRequest(ImmutableList.of(kafkaTableHandle.getTopicName()));
TopicMetadataResponse topicMetadataResponse = simpleConsumer.send(topicMetadataRequest);
ImmutableList.Builder<ConnectorSplit> splits = ImmutableList.builder();
for (TopicMetadata metadata : topicMetadataResponse.topicsMetadata()) {
for (PartitionMetadata part : metadata.partitionsMetadata()) {
log.debug("Adding Partition %s/%s", metadata.topic(), part.partitionId());
BrokerEndPoint leader = part.leader();
if (leader == null) {
throw new PrestoException(GENERIC_INTERNAL_ERROR, format("Leader election in progress for Kafka topic '%s' partition %s", metadata.topic(), part.partitionId()));
}
HostAddress partitionLeader = HostAddress.fromParts(leader.host(), leader.port());
SimpleConsumer leaderConsumer = consumerManager.getConsumer(partitionLeader);
// Kafka contains a reverse list of "end - start" pairs for the splits
long[] offsets = findAllOffsets(leaderConsumer, metadata.topic(), part.partitionId());
for (int i = offsets.length - 1; i > 0; i--) {
KafkaSplit split = new KafkaSplit(metadata.topic(), kafkaTableHandle.getKeyDataFormat(), kafkaTableHandle.getMessageDataFormat(), kafkaTableHandle.getKeyDataSchemaLocation().map(KafkaSplitManager::readSchema), kafkaTableHandle.getMessageDataSchemaLocation().map(KafkaSplitManager::readSchema), part.partitionId(), offsets[i], offsets[i - 1], partitionLeader);
splits.add(split);
}
}
}
return new FixedSplitSource(splits.build());
} catch (Exception e) {
// Catch all exceptions because Kafka library is written in scala and checked exceptions are not declared in method signature.
if (e instanceof PrestoException) {
throw e;
}
throw new PrestoException(KAFKA_SPLIT_ERROR, format("Cannot list splits for table '%s' reading topic '%s'", kafkaTableHandle.getTableName(), kafkaTableHandle.getTopicName()), e);
}
}
use of io.prestosql.spi.HostAddress in project hetu-core by openlookeng.
the class TestJmxSplitManager method testNoPredicate.
@Test
public void testNoPredicate() throws Exception {
JmxTableHandle tableHandle = new JmxTableHandle(new SchemaTableName("schema", "tableName"), ImmutableList.of("objectName"), ImmutableList.of(columnHandle), true, TupleDomain.all());
ConnectorSplitSource splitSource = splitManager.getSplits(JmxTransactionHandle.INSTANCE, SESSION, tableHandle, UNGROUPED_SCHEDULING);
List<ConnectorSplit> allSplits = getAllSplits(splitSource);
assertEquals(allSplits.size(), nodes.size());
Set<String> actualNodes = nodes.stream().map(Node::getNodeIdentifier).collect(toSet());
Set<String> expectedNodes = new HashSet<>();
for (ConnectorSplit split : allSplits) {
List<HostAddress> addresses = split.getAddresses();
assertEquals(addresses.size(), 1);
expectedNodes.add(addresses.get(0).getHostText());
}
assertEquals(actualNodes, expectedNodes);
}
use of io.prestosql.spi.HostAddress in project hetu-core by openlookeng.
the class TopologyAwareNodeSelector method computeAssignments.
@Override
public SplitPlacementResult computeAssignments(Set<Split> splits, List<RemoteTask> existingTasks, Optional<SqlStageExecution> stage) {
NodeMap nodeMapSlice = this.nodeMap.get().get();
Multimap<InternalNode, Split> assignment = HashMultimap.create();
NodeAssignmentStats assignmentStats = new NodeAssignmentStats(nodeTaskMap, nodeMapSlice, existingTasks);
int[] topologicCounters = new int[topologicalSplitCounters.size()];
Set<NetworkLocation> filledLocations = new HashSet<>();
Set<InternalNode> blockedExactNodes = new HashSet<>();
boolean splitWaitingForAnyNode = false;
for (Split split : splits) {
if (!split.isRemotelyAccessible()) {
List<InternalNode> candidateNodes = selectExactNodes(nodeMapSlice, split.getAddresses(), includeCoordinator);
if (candidateNodes.isEmpty()) {
log.debug("No nodes available to schedule %s. Available nodes %s", split, nodeMapSlice.getNodesByHost().keys());
throw new PrestoException(NO_NODES_AVAILABLE, "No nodes available to run query");
}
InternalNode chosenNode = bestNodeSplitCount(candidateNodes.iterator(), minCandidates, maxPendingSplitsPerTask, assignmentStats);
if (chosenNode != null) {
assignment.put(chosenNode, split);
assignmentStats.addAssignedSplit(chosenNode);
} else // Exact node set won't matter, if a split is waiting for any node
if (!splitWaitingForAnyNode) {
blockedExactNodes.addAll(candidateNodes);
}
continue;
}
InternalNode chosenNode = null;
int depth = networkLocationSegmentNames.size();
int chosenDepth = 0;
Set<NetworkLocation> locations = new HashSet<>();
for (HostAddress host : split.getAddresses()) {
locations.add(networkLocationCache.get(host));
}
if (locations.isEmpty()) {
// Add the root location
locations.add(ROOT_LOCATION);
depth = 0;
}
// Try each address at progressively shallower network locations
for (int i = depth; i >= 0 && chosenNode == null; i--) {
for (NetworkLocation location : locations) {
// For example, locations which couldn't be located will be at the "root" location
if (location.getSegments().size() < i) {
continue;
}
location = location.subLocation(0, i);
if (filledLocations.contains(location)) {
continue;
}
Set<InternalNode> nodes = nodeMapSlice.getWorkersByNetworkPath().get(location);
chosenNode = bestNodeSplitCount(new ResettableRandomizedIterator<>(nodes), minCandidates, calculateMaxPendingSplits(i, depth), assignmentStats);
if (chosenNode != null) {
chosenDepth = i;
break;
}
filledLocations.add(location);
}
}
if (chosenNode != null) {
assignment.put(chosenNode, split);
assignmentStats.addAssignedSplit(chosenNode);
topologicCounters[chosenDepth]++;
} else {
splitWaitingForAnyNode = true;
}
}
for (int i = 0; i < topologicCounters.length; i++) {
if (topologicCounters[i] > 0) {
topologicalSplitCounters.get(i).update(topologicCounters[i]);
}
}
ListenableFuture<?> blocked;
int maxPendingForWildcardNetworkAffinity = calculateMaxPendingSplits(0, networkLocationSegmentNames.size());
if (splitWaitingForAnyNode) {
blocked = toWhenHasSplitQueueSpaceFuture(existingTasks, calculateLowWatermark(maxPendingForWildcardNetworkAffinity));
} else {
blocked = toWhenHasSplitQueueSpaceFuture(blockedExactNodes, existingTasks, calculateLowWatermark(maxPendingForWildcardNetworkAffinity));
}
// Check if its CTE node and its feeder
if (stage.isPresent() && stage.get().getFragment().getFeederCTEId().isPresent()) {
updateFeederNodeAndSplitCount(stage.get(), assignment);
}
return new SplitPlacementResult(blocked, assignment);
}
use of io.prestosql.spi.HostAddress in project hetu-core by openlookeng.
the class NodeScheduler method createNodeSelector.
public NodeSelector createNodeSelector(CatalogName catalogName, boolean keepConsumerOnFeederNodes, Map<PlanNodeId, FixedNodeScheduleData> feederScheduledNodes) {
// this supplier is thread-safe. TODO: this logic should probably move to the scheduler since the choice of which node to run in should be
// done as close to when the the split is about to be scheduled
Supplier<NodeMap> nodeMap = Suppliers.memoizeWithExpiration(() -> {
ImmutableSetMultimap.Builder<HostAddress, InternalNode> byHostAndPort = ImmutableSetMultimap.builder();
ImmutableSetMultimap.Builder<InetAddress, InternalNode> byHost = ImmutableSetMultimap.builder();
ImmutableSetMultimap.Builder<NetworkLocation, InternalNode> workersByNetworkPath = ImmutableSetMultimap.builder();
Set<InternalNode> nodes;
if (catalogName != null) {
nodes = nodeManager.getActiveConnectorNodes(catalogName);
} else {
nodes = nodeManager.getNodes(ACTIVE);
}
Set<String> coordinatorNodeIds = nodeManager.getCoordinators().stream().map(InternalNode::getNodeIdentifier).collect(toImmutableSet());
for (InternalNode node : nodes) {
if (useNetworkTopology && (includeCoordinator || !coordinatorNodeIds.contains(node.getNodeIdentifier()))) {
NetworkLocation location = networkLocationCache.get(node.getHostAndPort());
for (int i = 0; i <= location.getSegments().size(); i++) {
workersByNetworkPath.put(location.subLocation(0, i), node);
}
}
try {
byHostAndPort.put(node.getHostAndPort(), node);
InetAddress host = InetAddress.getByName(node.getInternalUri().getHost());
byHost.put(host, node);
} catch (UnknownHostException e) {
if (inaccessibleNodeLogCache.getIfPresent(node) == null) {
inaccessibleNodeLogCache.put(node, true);
LOG.warn(e, "Unable to resolve host name for node: %s", node);
}
}
}
// //TODO duration will be reverted to 5 seconds after implementing discovery Service AA mode(now with AP mode)
return new NodeMap(byHostAndPort.build(), byHost.build(), workersByNetworkPath.build(), coordinatorNodeIds);
}, 2, TimeUnit.SECONDS);
if (keepConsumerOnFeederNodes) {
return new SimpleFixedNodeSelector(nodeManager, nodeTaskMap, includeCoordinator, nodeMap, minCandidates, maxSplitsPerNode, maxPendingSplitsPerTask, optimizedLocalScheduling, feederScheduledNodes);
}
NodeSelector defaultNodeSelector = null;
if (useNetworkTopology) {
defaultNodeSelector = new TopologyAwareNodeSelector(nodeManager, nodeTaskMap, includeCoordinator, nodeMap, minCandidates, maxSplitsPerNode, maxPendingSplitsPerTask, topologicalSplitCounters, networkLocationSegmentNames, networkLocationCache, feederScheduledNodes);
} else {
defaultNodeSelector = new SimpleNodeSelector(nodeManager, nodeTaskMap, includeCoordinator, nodeMap, minCandidates, maxSplitsPerNode, maxPendingSplitsPerTask, optimizedLocalScheduling, feederScheduledNodes);
}
if (PropertyService.getBooleanProperty(HetuConstant.SPLIT_CACHE_MAP_ENABLED)) {
return new SplitCacheAwareNodeSelector(nodeManager, nodeTaskMap, includeCoordinator, nodeMap, minCandidates, maxSplitsPerNode, maxPendingSplitsPerTask, defaultNodeSelector, feederScheduledNodes);
} else {
return defaultNodeSelector;
}
}
Aggregations