Search in sources :

Example 26 with HostAddress

use of io.prestosql.spi.HostAddress in project hetu-core by openlookeng.

the class TestHiveSplitSource method testGroupSmallSplitAlternativeFileSize.

@Test
public void testGroupSmallSplitAlternativeFileSize() {
    // alternative big and small size total 100 files
    HiveConfig hiveConfig = new HiveConfig();
    hiveConfig.setMaxSplitsToGroup(100);
    HiveSplitSource hiveSplitSource = HiveSplitSource.allAtOnce(HiveTestUtils.SESSION, "database", "table", 10, 10, new DataSize(1, MEGABYTE), Integer.MAX_VALUE, new TestingHiveSplitLoader(), Executors.newFixedThreadPool(5), new CounterStat(), null, null, null, hiveConfig, HiveStorageFormat.ORC);
    for (int i = 0; i < 100; i++) {
        List<HostAddress> hostAddress = new ArrayList<>();
        hostAddress.add(new HostAddress("vm1", 1));
        hiveSplitSource.addToQueue(new TestSplit(i, OptionalInt.empty(), 67108864 / (((i + 1) % 2) + 1), hostAddress));
    }
    List<ConnectorSplit> connectorSplits = getSplits(hiveSplitSource, 100);
    List<ConnectorSplit> groupedConnectorSplits = hiveSplitSource.groupSmallSplits(connectorSplits, 1);
    List<HiveSplitWrapper> hiveSplitWrappers = new ArrayList<>();
    groupedConnectorSplits.forEach(pendingSplit -> hiveSplitWrappers.add((HiveSplitWrapper) pendingSplit));
    System.out.println("hiveSplitWrappers.get(i).getSplits().size() " + groupedConnectorSplits.size());
    for (int i = 0; i < 50; i++) {
        assertEquals(hiveSplitWrappers.get(i).getSplits().size(), 1);
    }
    for (int i = 50; i < groupedConnectorSplits.size(); i++) {
        System.out.println(hiveSplitWrappers.get(i).getSplits().size());
        assertEquals(hiveSplitWrappers.get(i).getSplits().size(), 2);
    }
}
Also used : CounterStat(io.airlift.stats.CounterStat) ArrayList(java.util.ArrayList) HostAddress(io.prestosql.spi.HostAddress) DataSize(io.airlift.units.DataSize) ConnectorSplit(io.prestosql.spi.connector.ConnectorSplit) Test(org.testng.annotations.Test)

Example 27 with HostAddress

use of io.prestosql.spi.HostAddress in project hetu-core by openlookeng.

the class KafkaSplitManager method getSplits.

@Override
public ConnectorSplitSource getSplits(ConnectorTransactionHandle transaction, ConnectorSession session, ConnectorTableHandle table, SplitSchedulingStrategy splitSchedulingStrategy) {
    KafkaTableHandle kafkaTableHandle = (KafkaTableHandle) table;
    try {
        SimpleConsumer simpleConsumer = consumerManager.getConsumer(selectRandom(nodes));
        TopicMetadataRequest topicMetadataRequest = new TopicMetadataRequest(ImmutableList.of(kafkaTableHandle.getTopicName()));
        TopicMetadataResponse topicMetadataResponse = simpleConsumer.send(topicMetadataRequest);
        ImmutableList.Builder<ConnectorSplit> splits = ImmutableList.builder();
        for (TopicMetadata metadata : topicMetadataResponse.topicsMetadata()) {
            for (PartitionMetadata part : metadata.partitionsMetadata()) {
                log.debug("Adding Partition %s/%s", metadata.topic(), part.partitionId());
                BrokerEndPoint leader = part.leader();
                if (leader == null) {
                    throw new PrestoException(GENERIC_INTERNAL_ERROR, format("Leader election in progress for Kafka topic '%s' partition %s", metadata.topic(), part.partitionId()));
                }
                HostAddress partitionLeader = HostAddress.fromParts(leader.host(), leader.port());
                SimpleConsumer leaderConsumer = consumerManager.getConsumer(partitionLeader);
                // Kafka contains a reverse list of "end - start" pairs for the splits
                long[] offsets = findAllOffsets(leaderConsumer, metadata.topic(), part.partitionId());
                for (int i = offsets.length - 1; i > 0; i--) {
                    KafkaSplit split = new KafkaSplit(metadata.topic(), kafkaTableHandle.getKeyDataFormat(), kafkaTableHandle.getMessageDataFormat(), kafkaTableHandle.getKeyDataSchemaLocation().map(KafkaSplitManager::readSchema), kafkaTableHandle.getMessageDataSchemaLocation().map(KafkaSplitManager::readSchema), part.partitionId(), offsets[i], offsets[i - 1], partitionLeader);
                    splits.add(split);
                }
            }
        }
        return new FixedSplitSource(splits.build());
    } catch (Exception e) {
        // Catch all exceptions because Kafka library is written in scala and checked exceptions are not declared in method signature.
        if (e instanceof PrestoException) {
            throw e;
        }
        throw new PrestoException(KAFKA_SPLIT_ERROR, format("Cannot list splits for table '%s' reading topic '%s'", kafkaTableHandle.getTableName(), kafkaTableHandle.getTopicName()), e);
    }
}
Also used : ImmutableList(com.google.common.collect.ImmutableList) TopicMetadataRequest(kafka.javaapi.TopicMetadataRequest) TopicMetadataResponse(kafka.javaapi.TopicMetadataResponse) PrestoException(io.prestosql.spi.PrestoException) HostAddress(io.prestosql.spi.HostAddress) BrokerEndPoint(kafka.cluster.BrokerEndPoint) PrestoException(io.prestosql.spi.PrestoException) MalformedURLException(java.net.MalformedURLException) IOException(java.io.IOException) TopicMetadata(kafka.javaapi.TopicMetadata) FixedSplitSource(io.prestosql.spi.connector.FixedSplitSource) PartitionMetadata(kafka.javaapi.PartitionMetadata) BrokerEndPoint(kafka.cluster.BrokerEndPoint) ConnectorSplit(io.prestosql.spi.connector.ConnectorSplit) SimpleConsumer(kafka.javaapi.consumer.SimpleConsumer)

Example 28 with HostAddress

use of io.prestosql.spi.HostAddress in project hetu-core by openlookeng.

the class TestJmxSplitManager method testNoPredicate.

@Test
public void testNoPredicate() throws Exception {
    JmxTableHandle tableHandle = new JmxTableHandle(new SchemaTableName("schema", "tableName"), ImmutableList.of("objectName"), ImmutableList.of(columnHandle), true, TupleDomain.all());
    ConnectorSplitSource splitSource = splitManager.getSplits(JmxTransactionHandle.INSTANCE, SESSION, tableHandle, UNGROUPED_SCHEDULING);
    List<ConnectorSplit> allSplits = getAllSplits(splitSource);
    assertEquals(allSplits.size(), nodes.size());
    Set<String> actualNodes = nodes.stream().map(Node::getNodeIdentifier).collect(toSet());
    Set<String> expectedNodes = new HashSet<>();
    for (ConnectorSplit split : allSplits) {
        List<HostAddress> addresses = split.getAddresses();
        assertEquals(addresses.size(), 1);
        expectedNodes.add(addresses.get(0).getHostText());
    }
    assertEquals(actualNodes, expectedNodes);
}
Also used : ConnectorSplitSource(io.prestosql.spi.connector.ConnectorSplitSource) HostAddress(io.prestosql.spi.HostAddress) SchemaTableName(io.prestosql.spi.connector.SchemaTableName) ConnectorSplit(io.prestosql.spi.connector.ConnectorSplit) HashSet(java.util.HashSet) Test(org.testng.annotations.Test)

Example 29 with HostAddress

use of io.prestosql.spi.HostAddress in project hetu-core by openlookeng.

the class TopologyAwareNodeSelector method computeAssignments.

@Override
public SplitPlacementResult computeAssignments(Set<Split> splits, List<RemoteTask> existingTasks, Optional<SqlStageExecution> stage) {
    NodeMap nodeMapSlice = this.nodeMap.get().get();
    Multimap<InternalNode, Split> assignment = HashMultimap.create();
    NodeAssignmentStats assignmentStats = new NodeAssignmentStats(nodeTaskMap, nodeMapSlice, existingTasks);
    int[] topologicCounters = new int[topologicalSplitCounters.size()];
    Set<NetworkLocation> filledLocations = new HashSet<>();
    Set<InternalNode> blockedExactNodes = new HashSet<>();
    boolean splitWaitingForAnyNode = false;
    for (Split split : splits) {
        if (!split.isRemotelyAccessible()) {
            List<InternalNode> candidateNodes = selectExactNodes(nodeMapSlice, split.getAddresses(), includeCoordinator);
            if (candidateNodes.isEmpty()) {
                log.debug("No nodes available to schedule %s. Available nodes %s", split, nodeMapSlice.getNodesByHost().keys());
                throw new PrestoException(NO_NODES_AVAILABLE, "No nodes available to run query");
            }
            InternalNode chosenNode = bestNodeSplitCount(candidateNodes.iterator(), minCandidates, maxPendingSplitsPerTask, assignmentStats);
            if (chosenNode != null) {
                assignment.put(chosenNode, split);
                assignmentStats.addAssignedSplit(chosenNode);
            } else // Exact node set won't matter, if a split is waiting for any node
            if (!splitWaitingForAnyNode) {
                blockedExactNodes.addAll(candidateNodes);
            }
            continue;
        }
        InternalNode chosenNode = null;
        int depth = networkLocationSegmentNames.size();
        int chosenDepth = 0;
        Set<NetworkLocation> locations = new HashSet<>();
        for (HostAddress host : split.getAddresses()) {
            locations.add(networkLocationCache.get(host));
        }
        if (locations.isEmpty()) {
            // Add the root location
            locations.add(ROOT_LOCATION);
            depth = 0;
        }
        // Try each address at progressively shallower network locations
        for (int i = depth; i >= 0 && chosenNode == null; i--) {
            for (NetworkLocation location : locations) {
                // For example, locations which couldn't be located will be at the "root" location
                if (location.getSegments().size() < i) {
                    continue;
                }
                location = location.subLocation(0, i);
                if (filledLocations.contains(location)) {
                    continue;
                }
                Set<InternalNode> nodes = nodeMapSlice.getWorkersByNetworkPath().get(location);
                chosenNode = bestNodeSplitCount(new ResettableRandomizedIterator<>(nodes), minCandidates, calculateMaxPendingSplits(i, depth), assignmentStats);
                if (chosenNode != null) {
                    chosenDepth = i;
                    break;
                }
                filledLocations.add(location);
            }
        }
        if (chosenNode != null) {
            assignment.put(chosenNode, split);
            assignmentStats.addAssignedSplit(chosenNode);
            topologicCounters[chosenDepth]++;
        } else {
            splitWaitingForAnyNode = true;
        }
    }
    for (int i = 0; i < topologicCounters.length; i++) {
        if (topologicCounters[i] > 0) {
            topologicalSplitCounters.get(i).update(topologicCounters[i]);
        }
    }
    ListenableFuture<?> blocked;
    int maxPendingForWildcardNetworkAffinity = calculateMaxPendingSplits(0, networkLocationSegmentNames.size());
    if (splitWaitingForAnyNode) {
        blocked = toWhenHasSplitQueueSpaceFuture(existingTasks, calculateLowWatermark(maxPendingForWildcardNetworkAffinity));
    } else {
        blocked = toWhenHasSplitQueueSpaceFuture(blockedExactNodes, existingTasks, calculateLowWatermark(maxPendingForWildcardNetworkAffinity));
    }
    // Check if its CTE node and its feeder
    if (stage.isPresent() && stage.get().getFragment().getFeederCTEId().isPresent()) {
        updateFeederNodeAndSplitCount(stage.get(), assignment);
    }
    return new SplitPlacementResult(blocked, assignment);
}
Also used : PrestoException(io.prestosql.spi.PrestoException) HostAddress(io.prestosql.spi.HostAddress) InternalNode(io.prestosql.metadata.InternalNode) Split(io.prestosql.metadata.Split) HashSet(java.util.HashSet)

Example 30 with HostAddress

use of io.prestosql.spi.HostAddress in project hetu-core by openlookeng.

the class NodeScheduler method createNodeSelector.

public NodeSelector createNodeSelector(CatalogName catalogName, boolean keepConsumerOnFeederNodes, Map<PlanNodeId, FixedNodeScheduleData> feederScheduledNodes) {
    // this supplier is thread-safe. TODO: this logic should probably move to the scheduler since the choice of which node to run in should be
    // done as close to when the the split is about to be scheduled
    Supplier<NodeMap> nodeMap = Suppliers.memoizeWithExpiration(() -> {
        ImmutableSetMultimap.Builder<HostAddress, InternalNode> byHostAndPort = ImmutableSetMultimap.builder();
        ImmutableSetMultimap.Builder<InetAddress, InternalNode> byHost = ImmutableSetMultimap.builder();
        ImmutableSetMultimap.Builder<NetworkLocation, InternalNode> workersByNetworkPath = ImmutableSetMultimap.builder();
        Set<InternalNode> nodes;
        if (catalogName != null) {
            nodes = nodeManager.getActiveConnectorNodes(catalogName);
        } else {
            nodes = nodeManager.getNodes(ACTIVE);
        }
        Set<String> coordinatorNodeIds = nodeManager.getCoordinators().stream().map(InternalNode::getNodeIdentifier).collect(toImmutableSet());
        for (InternalNode node : nodes) {
            if (useNetworkTopology && (includeCoordinator || !coordinatorNodeIds.contains(node.getNodeIdentifier()))) {
                NetworkLocation location = networkLocationCache.get(node.getHostAndPort());
                for (int i = 0; i <= location.getSegments().size(); i++) {
                    workersByNetworkPath.put(location.subLocation(0, i), node);
                }
            }
            try {
                byHostAndPort.put(node.getHostAndPort(), node);
                InetAddress host = InetAddress.getByName(node.getInternalUri().getHost());
                byHost.put(host, node);
            } catch (UnknownHostException e) {
                if (inaccessibleNodeLogCache.getIfPresent(node) == null) {
                    inaccessibleNodeLogCache.put(node, true);
                    LOG.warn(e, "Unable to resolve host name for node: %s", node);
                }
            }
        }
        // //TODO duration will be reverted to 5 seconds after implementing discovery Service AA mode(now with AP mode)
        return new NodeMap(byHostAndPort.build(), byHost.build(), workersByNetworkPath.build(), coordinatorNodeIds);
    }, 2, TimeUnit.SECONDS);
    if (keepConsumerOnFeederNodes) {
        return new SimpleFixedNodeSelector(nodeManager, nodeTaskMap, includeCoordinator, nodeMap, minCandidates, maxSplitsPerNode, maxPendingSplitsPerTask, optimizedLocalScheduling, feederScheduledNodes);
    }
    NodeSelector defaultNodeSelector = null;
    if (useNetworkTopology) {
        defaultNodeSelector = new TopologyAwareNodeSelector(nodeManager, nodeTaskMap, includeCoordinator, nodeMap, minCandidates, maxSplitsPerNode, maxPendingSplitsPerTask, topologicalSplitCounters, networkLocationSegmentNames, networkLocationCache, feederScheduledNodes);
    } else {
        defaultNodeSelector = new SimpleNodeSelector(nodeManager, nodeTaskMap, includeCoordinator, nodeMap, minCandidates, maxSplitsPerNode, maxPendingSplitsPerTask, optimizedLocalScheduling, feederScheduledNodes);
    }
    if (PropertyService.getBooleanProperty(HetuConstant.SPLIT_CACHE_MAP_ENABLED)) {
        return new SplitCacheAwareNodeSelector(nodeManager, nodeTaskMap, includeCoordinator, nodeMap, minCandidates, maxSplitsPerNode, maxPendingSplitsPerTask, defaultNodeSelector, feederScheduledNodes);
    } else {
        return defaultNodeSelector;
    }
}
Also used : UnknownHostException(java.net.UnknownHostException) ImmutableSetMultimap(com.google.common.collect.ImmutableSetMultimap) HostAddress(io.prestosql.spi.HostAddress) InternalNode(io.prestosql.metadata.InternalNode) InetAddress(java.net.InetAddress)

Aggregations

HostAddress (io.prestosql.spi.HostAddress)38 ConnectorSplit (io.prestosql.spi.connector.ConnectorSplit)25 Test (org.testng.annotations.Test)25 ArrayList (java.util.ArrayList)23 CounterStat (io.airlift.stats.CounterStat)18 DataSize (io.airlift.units.DataSize)18 ImmutableList (com.google.common.collect.ImmutableList)5 InternalNode (io.prestosql.metadata.InternalNode)5 FixedSplitSource (io.prestosql.spi.connector.FixedSplitSource)5 HashMap (java.util.HashMap)5 List (java.util.List)5 HBaseSplit (io.hetu.core.plugin.hbase.split.HBaseSplit)3 PrestoException (io.prestosql.spi.PrestoException)3 ImmutableSet (com.google.common.collect.ImmutableSet)2 HBaseColumnHandle (io.hetu.core.plugin.hbase.connector.HBaseColumnHandle)2 HBaseTableHandle (io.hetu.core.plugin.hbase.connector.HBaseTableHandle)2 HBaseRecordSet (io.hetu.core.plugin.hbase.query.HBaseRecordSet)2 Split (io.prestosql.metadata.Split)2 Node (io.prestosql.spi.Node)2 ColumnHandle (io.prestosql.spi.connector.ColumnHandle)2