Search in sources :

Example 21 with HostAddress

use of io.prestosql.spi.HostAddress in project hetu-core by openlookeng.

the class TestHiveSplitSource method testGroupSmallSplit.

@Test
public void testGroupSmallSplit() {
    HiveConfig hiveConfig = new HiveConfig();
    hiveConfig.setMaxSplitsToGroup(10);
    HiveSplitSource hiveSplitSource = HiveSplitSource.allAtOnce(HiveTestUtils.SESSION, "database", "table", 10, 10, new DataSize(1, MEGABYTE), Integer.MAX_VALUE, new TestingHiveSplitLoader(), Executors.newFixedThreadPool(5), new CounterStat(), null, null, null, hiveConfig, HiveStorageFormat.ORC);
    List<HostAddress> hostAddress = new ArrayList<>();
    hostAddress.add(new HostAddress("vm1", 1));
    hostAddress.add(new HostAddress("vm3", 1));
    hostAddress.add(new HostAddress("vm2", 1));
    for (int i = 0; i < 12; i++) {
        hiveSplitSource.addToQueue(new TestSplit(i, hostAddress));
        assertEquals(hiveSplitSource.getBufferedInternalSplitCount(), i + 1);
    }
    List<ConnectorSplit> connectorSplits = getSplits(hiveSplitSource, 100);
    List<ConnectorSplit> groupedConnectorSplits = hiveSplitSource.groupSmallSplits(connectorSplits, 1);
    assertEquals(groupedConnectorSplits.size(), 3);
    List<HiveSplitWrapper> hiveSplitWrappers = new ArrayList<>();
    groupedConnectorSplits.forEach(pendingSplit -> hiveSplitWrappers.add((HiveSplitWrapper) pendingSplit));
    assertEquals(hiveSplitWrappers.get(0).getSplits().size(), 4);
    assertEquals(hiveSplitWrappers.get(1).getSplits().size(), 4);
    assertEquals(hiveSplitWrappers.get(2).getSplits().size(), 4);
}
Also used : CounterStat(io.airlift.stats.CounterStat) ArrayList(java.util.ArrayList) HostAddress(io.prestosql.spi.HostAddress) DataSize(io.airlift.units.DataSize) ConnectorSplit(io.prestosql.spi.connector.ConnectorSplit) Test(org.testng.annotations.Test)

Example 22 with HostAddress

use of io.prestosql.spi.HostAddress in project hetu-core by openlookeng.

the class TestHiveSplitSource method testGroupSmallSplitBucket.

@Test
public void testGroupSmallSplitBucket() {
    // test with 4 different bucket values
    HiveConfig hiveConfig = new HiveConfig();
    hiveConfig.setMaxSplitsToGroup(100);
    HiveSplitSource hiveSplitSource = HiveSplitSource.allAtOnce(HiveTestUtils.SESSION, "database", "table", 10, 10, new DataSize(1, MEGABYTE), Integer.MAX_VALUE, new TestingHiveSplitLoader(), Executors.newFixedThreadPool(5), new CounterStat(), null, null, null, hiveConfig, HiveStorageFormat.ORC);
    for (int i = 0; i < 100; i++) {
        List<HostAddress> hostAddress = new ArrayList<>();
        hostAddress.add(new HostAddress("vm1", 1));
        hiveSplitSource.addToQueue(new TestSplit(i, OptionalInt.of(i % 4), 100, hostAddress));
    }
    List<ConnectorSplit> connectorSplits = getSplits(hiveSplitSource, 100);
    List<ConnectorSplit> groupedConnectorSplits = hiveSplitSource.groupSmallSplits(connectorSplits, 1);
    assertEquals(groupedConnectorSplits.size(), 4);
    List<HiveSplitWrapper> hiveSplitWrappers = new ArrayList<>();
    groupedConnectorSplits.forEach(pendingSplit -> hiveSplitWrappers.add((HiveSplitWrapper) pendingSplit));
    for (int i = 0; i < 4; i++) {
        assertEquals(hiveSplitWrappers.get(i).getSplits().size(), 25);
    }
}
Also used : CounterStat(io.airlift.stats.CounterStat) ArrayList(java.util.ArrayList) HostAddress(io.prestosql.spi.HostAddress) DataSize(io.airlift.units.DataSize) ConnectorSplit(io.prestosql.spi.connector.ConnectorSplit) Test(org.testng.annotations.Test)

Example 23 with HostAddress

use of io.prestosql.spi.HostAddress in project hetu-core by openlookeng.

the class TestHiveSplitSource method testGroupSmallSplitAlternativeFileSize.

@Test
public void testGroupSmallSplitAlternativeFileSize() {
    // alternative big and small size total 100 files
    HiveConfig hiveConfig = new HiveConfig();
    hiveConfig.setMaxSplitsToGroup(100);
    HiveSplitSource hiveSplitSource = HiveSplitSource.allAtOnce(HiveTestUtils.SESSION, "database", "table", 10, 10, new DataSize(1, MEGABYTE), Integer.MAX_VALUE, new TestingHiveSplitLoader(), Executors.newFixedThreadPool(5), new CounterStat(), null, null, null, hiveConfig, HiveStorageFormat.ORC);
    for (int i = 0; i < 100; i++) {
        List<HostAddress> hostAddress = new ArrayList<>();
        hostAddress.add(new HostAddress("vm1", 1));
        hiveSplitSource.addToQueue(new TestSplit(i, OptionalInt.empty(), 67108864 / (((i + 1) % 2) + 1), hostAddress));
    }
    List<ConnectorSplit> connectorSplits = getSplits(hiveSplitSource, 100);
    List<ConnectorSplit> groupedConnectorSplits = hiveSplitSource.groupSmallSplits(connectorSplits, 1);
    List<HiveSplitWrapper> hiveSplitWrappers = new ArrayList<>();
    groupedConnectorSplits.forEach(pendingSplit -> hiveSplitWrappers.add((HiveSplitWrapper) pendingSplit));
    System.out.println("hiveSplitWrappers.get(i).getSplits().size() " + groupedConnectorSplits.size());
    for (int i = 0; i < 50; i++) {
        assertEquals(hiveSplitWrappers.get(i).getSplits().size(), 1);
    }
    for (int i = 50; i < groupedConnectorSplits.size(); i++) {
        System.out.println(hiveSplitWrappers.get(i).getSplits().size());
        assertEquals(hiveSplitWrappers.get(i).getSplits().size(), 2);
    }
}
Also used : CounterStat(io.airlift.stats.CounterStat) ArrayList(java.util.ArrayList) HostAddress(io.prestosql.spi.HostAddress) DataSize(io.airlift.units.DataSize) ConnectorSplit(io.prestosql.spi.connector.ConnectorSplit) Test(org.testng.annotations.Test)

Example 24 with HostAddress

use of io.prestosql.spi.HostAddress in project hetu-core by openlookeng.

the class KafkaSplitManager method getSplits.

@Override
public ConnectorSplitSource getSplits(ConnectorTransactionHandle transaction, ConnectorSession session, ConnectorTableHandle table, SplitSchedulingStrategy splitSchedulingStrategy) {
    KafkaTableHandle kafkaTableHandle = (KafkaTableHandle) table;
    try {
        SimpleConsumer simpleConsumer = consumerManager.getConsumer(selectRandom(nodes));
        TopicMetadataRequest topicMetadataRequest = new TopicMetadataRequest(ImmutableList.of(kafkaTableHandle.getTopicName()));
        TopicMetadataResponse topicMetadataResponse = simpleConsumer.send(topicMetadataRequest);
        ImmutableList.Builder<ConnectorSplit> splits = ImmutableList.builder();
        for (TopicMetadata metadata : topicMetadataResponse.topicsMetadata()) {
            for (PartitionMetadata part : metadata.partitionsMetadata()) {
                log.debug("Adding Partition %s/%s", metadata.topic(), part.partitionId());
                BrokerEndPoint leader = part.leader();
                if (leader == null) {
                    throw new PrestoException(GENERIC_INTERNAL_ERROR, format("Leader election in progress for Kafka topic '%s' partition %s", metadata.topic(), part.partitionId()));
                }
                HostAddress partitionLeader = HostAddress.fromParts(leader.host(), leader.port());
                SimpleConsumer leaderConsumer = consumerManager.getConsumer(partitionLeader);
                // Kafka contains a reverse list of "end - start" pairs for the splits
                long[] offsets = findAllOffsets(leaderConsumer, metadata.topic(), part.partitionId());
                for (int i = offsets.length - 1; i > 0; i--) {
                    KafkaSplit split = new KafkaSplit(metadata.topic(), kafkaTableHandle.getKeyDataFormat(), kafkaTableHandle.getMessageDataFormat(), kafkaTableHandle.getKeyDataSchemaLocation().map(KafkaSplitManager::readSchema), kafkaTableHandle.getMessageDataSchemaLocation().map(KafkaSplitManager::readSchema), part.partitionId(), offsets[i], offsets[i - 1], partitionLeader);
                    splits.add(split);
                }
            }
        }
        return new FixedSplitSource(splits.build());
    } catch (Exception e) {
        // Catch all exceptions because Kafka library is written in scala and checked exceptions are not declared in method signature.
        if (e instanceof PrestoException) {
            throw e;
        }
        throw new PrestoException(KAFKA_SPLIT_ERROR, format("Cannot list splits for table '%s' reading topic '%s'", kafkaTableHandle.getTableName(), kafkaTableHandle.getTopicName()), e);
    }
}
Also used : ImmutableList(com.google.common.collect.ImmutableList) TopicMetadataRequest(kafka.javaapi.TopicMetadataRequest) TopicMetadataResponse(kafka.javaapi.TopicMetadataResponse) PrestoException(io.prestosql.spi.PrestoException) HostAddress(io.prestosql.spi.HostAddress) BrokerEndPoint(kafka.cluster.BrokerEndPoint) PrestoException(io.prestosql.spi.PrestoException) MalformedURLException(java.net.MalformedURLException) IOException(java.io.IOException) TopicMetadata(kafka.javaapi.TopicMetadata) FixedSplitSource(io.prestosql.spi.connector.FixedSplitSource) PartitionMetadata(kafka.javaapi.PartitionMetadata) BrokerEndPoint(kafka.cluster.BrokerEndPoint) ConnectorSplit(io.prestosql.spi.connector.ConnectorSplit) SimpleConsumer(kafka.javaapi.consumer.SimpleConsumer)

Example 25 with HostAddress

use of io.prestosql.spi.HostAddress in project hetu-core by openlookeng.

the class TestJmxSplitManager method testNoPredicate.

@Test
public void testNoPredicate() throws Exception {
    JmxTableHandle tableHandle = new JmxTableHandle(new SchemaTableName("schema", "tableName"), ImmutableList.of("objectName"), ImmutableList.of(columnHandle), true, TupleDomain.all());
    ConnectorSplitSource splitSource = splitManager.getSplits(JmxTransactionHandle.INSTANCE, SESSION, tableHandle, UNGROUPED_SCHEDULING);
    List<ConnectorSplit> allSplits = getAllSplits(splitSource);
    assertEquals(allSplits.size(), nodes.size());
    Set<String> actualNodes = nodes.stream().map(Node::getNodeIdentifier).collect(toSet());
    Set<String> expectedNodes = new HashSet<>();
    for (ConnectorSplit split : allSplits) {
        List<HostAddress> addresses = split.getAddresses();
        assertEquals(addresses.size(), 1);
        expectedNodes.add(addresses.get(0).getHostText());
    }
    assertEquals(actualNodes, expectedNodes);
}
Also used : ConnectorSplitSource(io.prestosql.spi.connector.ConnectorSplitSource) HostAddress(io.prestosql.spi.HostAddress) SchemaTableName(io.prestosql.spi.connector.SchemaTableName) ConnectorSplit(io.prestosql.spi.connector.ConnectorSplit) HashSet(java.util.HashSet) Test(org.testng.annotations.Test)

Aggregations

HostAddress (io.prestosql.spi.HostAddress)28 ConnectorSplit (io.prestosql.spi.connector.ConnectorSplit)16 Test (org.testng.annotations.Test)15 ArrayList (java.util.ArrayList)14 CounterStat (io.airlift.stats.CounterStat)9 DataSize (io.airlift.units.DataSize)9 ImmutableList (com.google.common.collect.ImmutableList)5 InternalNode (io.prestosql.metadata.InternalNode)5 FixedSplitSource (io.prestosql.spi.connector.FixedSplitSource)5 HashMap (java.util.HashMap)5 List (java.util.List)5 HBaseSplit (io.hetu.core.plugin.hbase.split.HBaseSplit)3 PrestoException (io.prestosql.spi.PrestoException)3 ImmutableSet (com.google.common.collect.ImmutableSet)2 HBaseColumnHandle (io.hetu.core.plugin.hbase.connector.HBaseColumnHandle)2 HBaseTableHandle (io.hetu.core.plugin.hbase.connector.HBaseTableHandle)2 HBaseRecordSet (io.hetu.core.plugin.hbase.query.HBaseRecordSet)2 Split (io.prestosql.metadata.Split)2 Node (io.prestosql.spi.Node)2 ColumnHandle (io.prestosql.spi.connector.ColumnHandle)2