Search in sources :

Example 6 with HostAddress

use of io.trino.spi.HostAddress in project trino by trinodb.

the class CassandraSplitManager method getSplitsForPartitions.

private List<ConnectorSplit> getSplitsForPartitions(CassandraTableHandle cassTableHandle, List<CassandraPartition> partitions, String clusteringPredicates) {
    String schema = cassTableHandle.getSchemaName();
    HostAddressFactory hostAddressFactory = new HostAddressFactory();
    ImmutableList.Builder<ConnectorSplit> builder = ImmutableList.builder();
    // For single partition key column table, we can merge multiple partitions into a single split
    // by using IN CLAUSE in a single select query if the partitions have the same host list.
    // For multiple partition key columns table, we can't merge them into a single select query, so
    // keep them in a separate split.
    boolean singlePartitionKeyColumn = true;
    String partitionKeyColumnName = null;
    if (!partitions.isEmpty()) {
        singlePartitionKeyColumn = partitions.get(0).getTupleDomain().getDomains().get().size() == 1;
        if (singlePartitionKeyColumn) {
            String partitionId = partitions.get(0).getPartitionId();
            partitionKeyColumnName = partitionId.substring(0, partitionId.lastIndexOf('=') - 1);
        }
    }
    Map<Set<String>, Set<String>> hostsToPartitionKeys = new HashMap<>();
    Map<Set<String>, List<HostAddress>> hostMap = new HashMap<>();
    for (CassandraPartition cassandraPartition : partitions) {
        Set<Host> hosts = cassandraSession.getReplicas(schema, cassandraPartition.getKeyAsByteBuffer());
        List<HostAddress> addresses = hostAddressFactory.toHostAddressList(hosts);
        if (singlePartitionKeyColumn) {
            // host ip addresses
            ImmutableSet.Builder<String> sb = ImmutableSet.builder();
            for (HostAddress address : addresses) {
                sb.add(address.getHostText());
            }
            Set<String> hostAddresses = sb.build();
            // partition key values
            Set<String> values = hostsToPartitionKeys.get(hostAddresses);
            if (values == null) {
                values = new HashSet<>();
            }
            String partitionId = cassandraPartition.getPartitionId();
            values.add(partitionId.substring(partitionId.lastIndexOf('=') + 2));
            hostsToPartitionKeys.put(hostAddresses, values);
            hostMap.put(hostAddresses, addresses);
        } else {
            builder.add(createSplitForClusteringPredicates(cassandraPartition.getPartitionId(), addresses, clusteringPredicates));
        }
    }
    if (singlePartitionKeyColumn) {
        for (Map.Entry<Set<String>, Set<String>> entry : hostsToPartitionKeys.entrySet()) {
            StringBuilder sb = new StringBuilder(partitionSizeForBatchSelect);
            int size = 0;
            for (String value : entry.getValue()) {
                if (size > 0) {
                    sb.append(",");
                }
                sb.append(value);
                size++;
                if (size > partitionSizeForBatchSelect) {
                    String partitionId = format("%s in (%s)", partitionKeyColumnName, sb);
                    builder.add(createSplitForClusteringPredicates(partitionId, hostMap.get(entry.getKey()), clusteringPredicates));
                    size = 0;
                    sb.setLength(0);
                    sb.trimToSize();
                }
            }
            if (size > 0) {
                String partitionId = format("%s in (%s)", partitionKeyColumnName, sb);
                builder.add(createSplitForClusteringPredicates(partitionId, hostMap.get(entry.getKey()), clusteringPredicates));
            }
        }
    }
    return builder.build();
}
Also used : HostAddressFactory(io.trino.plugin.cassandra.util.HostAddressFactory) HashSet(java.util.HashSet) ImmutableSet(com.google.common.collect.ImmutableSet) Set(java.util.Set) HashMap(java.util.HashMap) ImmutableList(com.google.common.collect.ImmutableList) Host(com.datastax.driver.core.Host) HostAddress(io.trino.spi.HostAddress) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) ConnectorSplit(io.trino.spi.connector.ConnectorSplit) HashMap(java.util.HashMap) Map(java.util.Map)

Example 7 with HostAddress

use of io.trino.spi.HostAddress in project trino by trinodb.

the class HostAddressFactory method toHostAddress.

public HostAddress toHostAddress(String hostAddressName) {
    HostAddress address = hostMap.get(hostAddressName);
    if (address == null) {
        address = HostAddress.fromString(hostAddressName);
        hostMap.put(hostAddressName, address);
    }
    return address;
}
Also used : HostAddress(io.trino.spi.HostAddress)

Example 8 with HostAddress

use of io.trino.spi.HostAddress in project trino by trinodb.

the class ConfluentModule method createSchemaRegistryClient.

@Provides
@Singleton
public static SchemaRegistryClient createSchemaRegistryClient(ConfluentSchemaRegistryConfig confluentConfig, Set<SchemaProvider> schemaProviders, Set<SchemaRegistryClientPropertiesProvider> propertiesProviders, ClassLoader classLoader) {
    requireNonNull(confluentConfig, "confluentConfig is null");
    requireNonNull(schemaProviders, "schemaProviders is null");
    requireNonNull(propertiesProviders, "propertiesProviders is null");
    List<String> baseUrl = confluentConfig.getConfluentSchemaRegistryUrls().stream().map(HostAddress::getHostText).collect(toImmutableList());
    Map<String, ?> schemaRegistryClientProperties = propertiesProviders.stream().map(SchemaRegistryClientPropertiesProvider::getSchemaRegistryClientProperties).flatMap(properties -> properties.entrySet().stream()).collect(toImmutableMap(Map.Entry::getKey, Map.Entry::getValue));
    return new ClassLoaderSafeSchemaRegistryClient(new CachedSchemaRegistryClient(baseUrl, confluentConfig.getConfluentSchemaRegistryClientCacheSize(), ImmutableList.copyOf(schemaProviders), schemaRegistryClientProperties), classLoader);
}
Also used : DummyRowDecoder(io.trino.decoder.dummy.DummyRowDecoder) Module(com.google.inject.Module) DispatchingRowEncoderFactory(io.trino.plugin.kafka.encoder.DispatchingRowEncoderFactory) SchemaRegistryClient(io.confluent.kafka.schemaregistry.client.SchemaRegistryClient) Singleton(javax.inject.Singleton) SINGLETON(com.google.inject.Scopes.SINGLETON) EncoderModule.encoderFactory(io.trino.plugin.kafka.encoder.EncoderModule.encoderFactory) NOT_SUPPORTED(io.trino.spi.StandardErrorCode.NOT_SUPPORTED) AvroRowDecoderFactory(io.trino.decoder.avro.AvroRowDecoderFactory) AbstractConfigurationAwareModule(io.airlift.configuration.AbstractConfigurationAwareModule) SessionPropertiesProvider(io.trino.plugin.base.session.SessionPropertiesProvider) ImmutableList(com.google.common.collect.ImmutableList) Binder(com.google.inject.Binder) SchemaProvider(io.confluent.kafka.schemaregistry.SchemaProvider) TableDescriptionSupplier(io.trino.plugin.kafka.schema.TableDescriptionSupplier) DispatchingRowDecoderFactory(io.trino.decoder.DispatchingRowDecoderFactory) Map(java.util.Map) Objects.requireNonNull(java.util.Objects.requireNonNull) Multibinder.newSetBinder(com.google.inject.multibindings.Multibinder.newSetBinder) MapBinder(com.google.inject.multibindings.MapBinder) MapBinder.newMapBinder(com.google.inject.multibindings.MapBinder.newMapBinder) RowEncoderFactory(io.trino.plugin.kafka.encoder.RowEncoderFactory) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Set(java.util.Set) AvroSchemaProvider(io.confluent.kafka.schemaregistry.avro.AvroSchemaProvider) TrinoException(io.trino.spi.TrinoException) AvroBytesDeserializer(io.trino.decoder.avro.AvroBytesDeserializer) AvroDeserializer(io.trino.decoder.avro.AvroDeserializer) Scopes(com.google.inject.Scopes) AvroReaderSupplier(io.trino.decoder.avro.AvroReaderSupplier) List(java.util.List) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) Provides(com.google.inject.Provides) AvroRowEncoder(io.trino.plugin.kafka.encoder.avro.AvroRowEncoder) DummyRowDecoderFactory(io.trino.decoder.dummy.DummyRowDecoderFactory) ContentSchemaReader(io.trino.plugin.kafka.schema.ContentSchemaReader) ConfigBinder.configBinder(io.airlift.configuration.ConfigBinder.configBinder) CachedSchemaRegistryClient(io.confluent.kafka.schemaregistry.client.CachedSchemaRegistryClient) RowDecoderFactory(io.trino.decoder.RowDecoderFactory) HostAddress(io.trino.spi.HostAddress) CachedSchemaRegistryClient(io.confluent.kafka.schemaregistry.client.CachedSchemaRegistryClient) Map(java.util.Map) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) Singleton(javax.inject.Singleton) Provides(com.google.inject.Provides)

Example 9 with HostAddress

use of io.trino.spi.HostAddress in project trino by trinodb.

the class KafkaSplitManager method getSplits.

@Override
public ConnectorSplitSource getSplits(ConnectorTransactionHandle transaction, ConnectorSession session, ConnectorTableHandle table, SplitSchedulingStrategy splitSchedulingStrategy, DynamicFilter dynamicFilter) {
    KafkaTableHandle kafkaTableHandle = (KafkaTableHandle) table;
    try (KafkaConsumer<byte[], byte[]> kafkaConsumer = consumerFactory.create(session)) {
        List<PartitionInfo> partitionInfos = kafkaConsumer.partitionsFor(kafkaTableHandle.getTopicName());
        List<TopicPartition> topicPartitions = partitionInfos.stream().map(KafkaSplitManager::toTopicPartition).collect(toImmutableList());
        Map<TopicPartition, Long> partitionBeginOffsets = kafkaConsumer.beginningOffsets(topicPartitions);
        Map<TopicPartition, Long> partitionEndOffsets = kafkaConsumer.endOffsets(topicPartitions);
        KafkaFilteringResult kafkaFilteringResult = kafkaFilterManager.getKafkaFilterResult(session, kafkaTableHandle, partitionInfos, partitionBeginOffsets, partitionEndOffsets);
        partitionInfos = kafkaFilteringResult.getPartitionInfos();
        partitionBeginOffsets = kafkaFilteringResult.getPartitionBeginOffsets();
        partitionEndOffsets = kafkaFilteringResult.getPartitionEndOffsets();
        ImmutableList.Builder<KafkaSplit> splits = ImmutableList.builder();
        Optional<String> keyDataSchemaContents = contentSchemaReader.readKeyContentSchema(kafkaTableHandle);
        Optional<String> messageDataSchemaContents = contentSchemaReader.readValueContentSchema(kafkaTableHandle);
        for (PartitionInfo partitionInfo : partitionInfos) {
            TopicPartition topicPartition = toTopicPartition(partitionInfo);
            HostAddress leader = HostAddress.fromParts(partitionInfo.leader().host(), partitionInfo.leader().port());
            new Range(partitionBeginOffsets.get(topicPartition), partitionEndOffsets.get(topicPartition)).partition(messagesPerSplit).stream().map(range -> new KafkaSplit(kafkaTableHandle.getTopicName(), kafkaTableHandle.getKeyDataFormat(), kafkaTableHandle.getMessageDataFormat(), keyDataSchemaContents, messageDataSchemaContents, partitionInfo.partition(), range, leader)).forEach(splits::add);
        }
        return new FixedSplitSource(splits.build());
    } catch (Exception e) {
        // Catch all exceptions because Kafka library is written in scala and checked exceptions are not declared in method signature.
        if (e instanceof TrinoException) {
            throw e;
        }
        throw new TrinoException(KAFKA_SPLIT_ERROR, format("Cannot list splits for table '%s' reading topic '%s'", kafkaTableHandle.getTableName(), kafkaTableHandle.getTopicName()), e);
    }
}
Also used : TopicPartition(org.apache.kafka.common.TopicPartition) ConnectorSplitManager(io.trino.spi.connector.ConnectorSplitManager) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) TrinoException(io.trino.spi.TrinoException) ConnectorSplitSource(io.trino.spi.connector.ConnectorSplitSource) ConnectorSession(io.trino.spi.connector.ConnectorSession) PartitionInfo(org.apache.kafka.common.PartitionInfo) String.format(java.lang.String.format) FixedSplitSource(io.trino.spi.connector.FixedSplitSource) Inject(javax.inject.Inject) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) DynamicFilter(io.trino.spi.connector.DynamicFilter) ConnectorTableHandle(io.trino.spi.connector.ConnectorTableHandle) Map(java.util.Map) Objects.requireNonNull(java.util.Objects.requireNonNull) ContentSchemaReader(io.trino.plugin.kafka.schema.ContentSchemaReader) Optional(java.util.Optional) KAFKA_SPLIT_ERROR(io.trino.plugin.kafka.KafkaErrorCode.KAFKA_SPLIT_ERROR) HostAddress(io.trino.spi.HostAddress) ConnectorTransactionHandle(io.trino.spi.connector.ConnectorTransactionHandle) KafkaConsumer(org.apache.kafka.clients.consumer.KafkaConsumer) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ImmutableList(com.google.common.collect.ImmutableList) HostAddress(io.trino.spi.HostAddress) TrinoException(io.trino.spi.TrinoException) TopicPartition(org.apache.kafka.common.TopicPartition) FixedSplitSource(io.trino.spi.connector.FixedSplitSource) TrinoException(io.trino.spi.TrinoException) PartitionInfo(org.apache.kafka.common.PartitionInfo)

Example 10 with HostAddress

use of io.trino.spi.HostAddress in project trino by trinodb.

the class TestJmxSplitManager method testNoPredicate.

@Test
public void testNoPredicate() throws Exception {
    JmxTableHandle tableHandle = new JmxTableHandle(new SchemaTableName("schema", "tableName"), ImmutableList.of("objectName"), ImmutableList.of(columnHandle), true, TupleDomain.all());
    ConnectorSplitSource splitSource = splitManager.getSplits(JmxTransactionHandle.INSTANCE, SESSION, tableHandle, UNGROUPED_SCHEDULING, DynamicFilter.EMPTY);
    List<ConnectorSplit> allSplits = getAllSplits(splitSource);
    assertEquals(allSplits.size(), nodes.size());
    Set<String> actualNodes = nodes.stream().map(Node::getNodeIdentifier).collect(toSet());
    Set<String> expectedNodes = new HashSet<>();
    for (ConnectorSplit split : allSplits) {
        List<HostAddress> addresses = split.getAddresses();
        assertEquals(addresses.size(), 1);
        expectedNodes.add(addresses.get(0).getHostText());
    }
    assertEquals(actualNodes, expectedNodes);
}
Also used : ConnectorSplitSource(io.trino.spi.connector.ConnectorSplitSource) HostAddress(io.trino.spi.HostAddress) SchemaTableName(io.trino.spi.connector.SchemaTableName) ConnectorSplit(io.trino.spi.connector.ConnectorSplit) HashSet(java.util.HashSet) Test(org.testng.annotations.Test)

Aggregations

HostAddress (io.trino.spi.HostAddress)24 ImmutableList (com.google.common.collect.ImmutableList)9 ConnectorSplit (io.trino.spi.connector.ConnectorSplit)8 Test (org.testng.annotations.Test)7 InternalNode (io.trino.metadata.InternalNode)6 HashSet (java.util.HashSet)5 List (java.util.List)5 Map (java.util.Map)5 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)4 ImmutableSet (com.google.common.collect.ImmutableSet)4 HashMap (java.util.HashMap)4 Objects.requireNonNull (java.util.Objects.requireNonNull)4 Split (io.trino.metadata.Split)3 TrinoException (io.trino.spi.TrinoException)3 FixedSplitSource (io.trino.spi.connector.FixedSplitSource)3 Objects (java.util.Objects)3 Properties (java.util.Properties)3 Host (com.datastax.driver.core.Host)2 HashMultimap (com.google.common.collect.HashMultimap)2 ImmutableMultimap (com.google.common.collect.ImmutableMultimap)2