use of io.trino.spi.HostAddress in project trino by trinodb.
the class CassandraSplitManager method getSplitsForPartitions.
private List<ConnectorSplit> getSplitsForPartitions(CassandraTableHandle cassTableHandle, List<CassandraPartition> partitions, String clusteringPredicates) {
String schema = cassTableHandle.getSchemaName();
HostAddressFactory hostAddressFactory = new HostAddressFactory();
ImmutableList.Builder<ConnectorSplit> builder = ImmutableList.builder();
// For single partition key column table, we can merge multiple partitions into a single split
// by using IN CLAUSE in a single select query if the partitions have the same host list.
// For multiple partition key columns table, we can't merge them into a single select query, so
// keep them in a separate split.
boolean singlePartitionKeyColumn = true;
String partitionKeyColumnName = null;
if (!partitions.isEmpty()) {
singlePartitionKeyColumn = partitions.get(0).getTupleDomain().getDomains().get().size() == 1;
if (singlePartitionKeyColumn) {
String partitionId = partitions.get(0).getPartitionId();
partitionKeyColumnName = partitionId.substring(0, partitionId.lastIndexOf('=') - 1);
}
}
Map<Set<String>, Set<String>> hostsToPartitionKeys = new HashMap<>();
Map<Set<String>, List<HostAddress>> hostMap = new HashMap<>();
for (CassandraPartition cassandraPartition : partitions) {
Set<Host> hosts = cassandraSession.getReplicas(schema, cassandraPartition.getKeyAsByteBuffer());
List<HostAddress> addresses = hostAddressFactory.toHostAddressList(hosts);
if (singlePartitionKeyColumn) {
// host ip addresses
ImmutableSet.Builder<String> sb = ImmutableSet.builder();
for (HostAddress address : addresses) {
sb.add(address.getHostText());
}
Set<String> hostAddresses = sb.build();
// partition key values
Set<String> values = hostsToPartitionKeys.get(hostAddresses);
if (values == null) {
values = new HashSet<>();
}
String partitionId = cassandraPartition.getPartitionId();
values.add(partitionId.substring(partitionId.lastIndexOf('=') + 2));
hostsToPartitionKeys.put(hostAddresses, values);
hostMap.put(hostAddresses, addresses);
} else {
builder.add(createSplitForClusteringPredicates(cassandraPartition.getPartitionId(), addresses, clusteringPredicates));
}
}
if (singlePartitionKeyColumn) {
for (Map.Entry<Set<String>, Set<String>> entry : hostsToPartitionKeys.entrySet()) {
StringBuilder sb = new StringBuilder(partitionSizeForBatchSelect);
int size = 0;
for (String value : entry.getValue()) {
if (size > 0) {
sb.append(",");
}
sb.append(value);
size++;
if (size > partitionSizeForBatchSelect) {
String partitionId = format("%s in (%s)", partitionKeyColumnName, sb);
builder.add(createSplitForClusteringPredicates(partitionId, hostMap.get(entry.getKey()), clusteringPredicates));
size = 0;
sb.setLength(0);
sb.trimToSize();
}
}
if (size > 0) {
String partitionId = format("%s in (%s)", partitionKeyColumnName, sb);
builder.add(createSplitForClusteringPredicates(partitionId, hostMap.get(entry.getKey()), clusteringPredicates));
}
}
}
return builder.build();
}
use of io.trino.spi.HostAddress in project trino by trinodb.
the class HostAddressFactory method toHostAddress.
public HostAddress toHostAddress(String hostAddressName) {
HostAddress address = hostMap.get(hostAddressName);
if (address == null) {
address = HostAddress.fromString(hostAddressName);
hostMap.put(hostAddressName, address);
}
return address;
}
use of io.trino.spi.HostAddress in project trino by trinodb.
the class ConfluentModule method createSchemaRegistryClient.
@Provides
@Singleton
public static SchemaRegistryClient createSchemaRegistryClient(ConfluentSchemaRegistryConfig confluentConfig, Set<SchemaProvider> schemaProviders, Set<SchemaRegistryClientPropertiesProvider> propertiesProviders, ClassLoader classLoader) {
requireNonNull(confluentConfig, "confluentConfig is null");
requireNonNull(schemaProviders, "schemaProviders is null");
requireNonNull(propertiesProviders, "propertiesProviders is null");
List<String> baseUrl = confluentConfig.getConfluentSchemaRegistryUrls().stream().map(HostAddress::getHostText).collect(toImmutableList());
Map<String, ?> schemaRegistryClientProperties = propertiesProviders.stream().map(SchemaRegistryClientPropertiesProvider::getSchemaRegistryClientProperties).flatMap(properties -> properties.entrySet().stream()).collect(toImmutableMap(Map.Entry::getKey, Map.Entry::getValue));
return new ClassLoaderSafeSchemaRegistryClient(new CachedSchemaRegistryClient(baseUrl, confluentConfig.getConfluentSchemaRegistryClientCacheSize(), ImmutableList.copyOf(schemaProviders), schemaRegistryClientProperties), classLoader);
}
use of io.trino.spi.HostAddress in project trino by trinodb.
the class KafkaSplitManager method getSplits.
@Override
public ConnectorSplitSource getSplits(ConnectorTransactionHandle transaction, ConnectorSession session, ConnectorTableHandle table, SplitSchedulingStrategy splitSchedulingStrategy, DynamicFilter dynamicFilter) {
KafkaTableHandle kafkaTableHandle = (KafkaTableHandle) table;
try (KafkaConsumer<byte[], byte[]> kafkaConsumer = consumerFactory.create(session)) {
List<PartitionInfo> partitionInfos = kafkaConsumer.partitionsFor(kafkaTableHandle.getTopicName());
List<TopicPartition> topicPartitions = partitionInfos.stream().map(KafkaSplitManager::toTopicPartition).collect(toImmutableList());
Map<TopicPartition, Long> partitionBeginOffsets = kafkaConsumer.beginningOffsets(topicPartitions);
Map<TopicPartition, Long> partitionEndOffsets = kafkaConsumer.endOffsets(topicPartitions);
KafkaFilteringResult kafkaFilteringResult = kafkaFilterManager.getKafkaFilterResult(session, kafkaTableHandle, partitionInfos, partitionBeginOffsets, partitionEndOffsets);
partitionInfos = kafkaFilteringResult.getPartitionInfos();
partitionBeginOffsets = kafkaFilteringResult.getPartitionBeginOffsets();
partitionEndOffsets = kafkaFilteringResult.getPartitionEndOffsets();
ImmutableList.Builder<KafkaSplit> splits = ImmutableList.builder();
Optional<String> keyDataSchemaContents = contentSchemaReader.readKeyContentSchema(kafkaTableHandle);
Optional<String> messageDataSchemaContents = contentSchemaReader.readValueContentSchema(kafkaTableHandle);
for (PartitionInfo partitionInfo : partitionInfos) {
TopicPartition topicPartition = toTopicPartition(partitionInfo);
HostAddress leader = HostAddress.fromParts(partitionInfo.leader().host(), partitionInfo.leader().port());
new Range(partitionBeginOffsets.get(topicPartition), partitionEndOffsets.get(topicPartition)).partition(messagesPerSplit).stream().map(range -> new KafkaSplit(kafkaTableHandle.getTopicName(), kafkaTableHandle.getKeyDataFormat(), kafkaTableHandle.getMessageDataFormat(), keyDataSchemaContents, messageDataSchemaContents, partitionInfo.partition(), range, leader)).forEach(splits::add);
}
return new FixedSplitSource(splits.build());
} catch (Exception e) {
// Catch all exceptions because Kafka library is written in scala and checked exceptions are not declared in method signature.
if (e instanceof TrinoException) {
throw e;
}
throw new TrinoException(KAFKA_SPLIT_ERROR, format("Cannot list splits for table '%s' reading topic '%s'", kafkaTableHandle.getTableName(), kafkaTableHandle.getTopicName()), e);
}
}
use of io.trino.spi.HostAddress in project trino by trinodb.
the class TestJmxSplitManager method testNoPredicate.
@Test
public void testNoPredicate() throws Exception {
JmxTableHandle tableHandle = new JmxTableHandle(new SchemaTableName("schema", "tableName"), ImmutableList.of("objectName"), ImmutableList.of(columnHandle), true, TupleDomain.all());
ConnectorSplitSource splitSource = splitManager.getSplits(JmxTransactionHandle.INSTANCE, SESSION, tableHandle, UNGROUPED_SCHEDULING, DynamicFilter.EMPTY);
List<ConnectorSplit> allSplits = getAllSplits(splitSource);
assertEquals(allSplits.size(), nodes.size());
Set<String> actualNodes = nodes.stream().map(Node::getNodeIdentifier).collect(toSet());
Set<String> expectedNodes = new HashSet<>();
for (ConnectorSplit split : allSplits) {
List<HostAddress> addresses = split.getAddresses();
assertEquals(addresses.size(), 1);
expectedNodes.add(addresses.get(0).getHostText());
}
assertEquals(actualNodes, expectedNodes);
}
Aggregations