use of io.prestosql.spi.HostAddress in project hetu-core by openlookeng.
the class TestHiveSplitSource method testGroupSmallSplit.
@Test
public void testGroupSmallSplit() {
HiveConfig hiveConfig = new HiveConfig();
hiveConfig.setMaxSplitsToGroup(10);
HiveSplitSource hiveSplitSource = HiveSplitSource.allAtOnce(HiveTestUtils.SESSION, "database", "table", 10, 10, new DataSize(1, MEGABYTE), Integer.MAX_VALUE, new TestingHiveSplitLoader(), Executors.newFixedThreadPool(5), new CounterStat(), null, null, null, hiveConfig, HiveStorageFormat.ORC);
List<HostAddress> hostAddress = new ArrayList<>();
hostAddress.add(new HostAddress("vm1", 1));
hostAddress.add(new HostAddress("vm3", 1));
hostAddress.add(new HostAddress("vm2", 1));
for (int i = 0; i < 12; i++) {
hiveSplitSource.addToQueue(new TestSplit(i, hostAddress));
assertEquals(hiveSplitSource.getBufferedInternalSplitCount(), i + 1);
}
List<ConnectorSplit> connectorSplits = getSplits(hiveSplitSource, 100);
List<ConnectorSplit> groupedConnectorSplits = hiveSplitSource.groupSmallSplits(connectorSplits, 1);
assertEquals(groupedConnectorSplits.size(), 3);
List<HiveSplitWrapper> hiveSplitWrappers = new ArrayList<>();
groupedConnectorSplits.forEach(pendingSplit -> hiveSplitWrappers.add((HiveSplitWrapper) pendingSplit));
assertEquals(hiveSplitWrappers.get(0).getSplits().size(), 4);
assertEquals(hiveSplitWrappers.get(1).getSplits().size(), 4);
assertEquals(hiveSplitWrappers.get(2).getSplits().size(), 4);
}
use of io.prestosql.spi.HostAddress in project hetu-core by openlookeng.
the class TestHiveSplitSource method testGroupSmallSplitBucket.
@Test
public void testGroupSmallSplitBucket() {
// test with 4 different bucket values
HiveConfig hiveConfig = new HiveConfig();
hiveConfig.setMaxSplitsToGroup(100);
HiveSplitSource hiveSplitSource = HiveSplitSource.allAtOnce(HiveTestUtils.SESSION, "database", "table", 10, 10, new DataSize(1, MEGABYTE), Integer.MAX_VALUE, new TestingHiveSplitLoader(), Executors.newFixedThreadPool(5), new CounterStat(), null, null, null, hiveConfig, HiveStorageFormat.ORC);
for (int i = 0; i < 100; i++) {
List<HostAddress> hostAddress = new ArrayList<>();
hostAddress.add(new HostAddress("vm1", 1));
hiveSplitSource.addToQueue(new TestSplit(i, OptionalInt.of(i % 4), 100, hostAddress));
}
List<ConnectorSplit> connectorSplits = getSplits(hiveSplitSource, 100);
List<ConnectorSplit> groupedConnectorSplits = hiveSplitSource.groupSmallSplits(connectorSplits, 1);
assertEquals(groupedConnectorSplits.size(), 4);
List<HiveSplitWrapper> hiveSplitWrappers = new ArrayList<>();
groupedConnectorSplits.forEach(pendingSplit -> hiveSplitWrappers.add((HiveSplitWrapper) pendingSplit));
for (int i = 0; i < 4; i++) {
assertEquals(hiveSplitWrappers.get(i).getSplits().size(), 25);
}
}
use of io.prestosql.spi.HostAddress in project hetu-core by openlookeng.
the class TestHiveSplitSource method testGroupSmallSplitAlternativeFileSize.
@Test
public void testGroupSmallSplitAlternativeFileSize() {
// alternative big and small size total 100 files
HiveConfig hiveConfig = new HiveConfig();
hiveConfig.setMaxSplitsToGroup(100);
HiveSplitSource hiveSplitSource = HiveSplitSource.allAtOnce(HiveTestUtils.SESSION, "database", "table", 10, 10, new DataSize(1, MEGABYTE), Integer.MAX_VALUE, new TestingHiveSplitLoader(), Executors.newFixedThreadPool(5), new CounterStat(), null, null, null, hiveConfig, HiveStorageFormat.ORC);
for (int i = 0; i < 100; i++) {
List<HostAddress> hostAddress = new ArrayList<>();
hostAddress.add(new HostAddress("vm1", 1));
hiveSplitSource.addToQueue(new TestSplit(i, OptionalInt.empty(), 67108864 / (((i + 1) % 2) + 1), hostAddress));
}
List<ConnectorSplit> connectorSplits = getSplits(hiveSplitSource, 100);
List<ConnectorSplit> groupedConnectorSplits = hiveSplitSource.groupSmallSplits(connectorSplits, 1);
List<HiveSplitWrapper> hiveSplitWrappers = new ArrayList<>();
groupedConnectorSplits.forEach(pendingSplit -> hiveSplitWrappers.add((HiveSplitWrapper) pendingSplit));
System.out.println("hiveSplitWrappers.get(i).getSplits().size() " + groupedConnectorSplits.size());
for (int i = 0; i < 50; i++) {
assertEquals(hiveSplitWrappers.get(i).getSplits().size(), 1);
}
for (int i = 50; i < groupedConnectorSplits.size(); i++) {
System.out.println(hiveSplitWrappers.get(i).getSplits().size());
assertEquals(hiveSplitWrappers.get(i).getSplits().size(), 2);
}
}
use of io.prestosql.spi.HostAddress in project hetu-core by openlookeng.
the class KafkaSplitManager method getSplits.
@Override
public ConnectorSplitSource getSplits(ConnectorTransactionHandle transaction, ConnectorSession session, ConnectorTableHandle table, SplitSchedulingStrategy splitSchedulingStrategy) {
KafkaTableHandle kafkaTableHandle = (KafkaTableHandle) table;
try {
SimpleConsumer simpleConsumer = consumerManager.getConsumer(selectRandom(nodes));
TopicMetadataRequest topicMetadataRequest = new TopicMetadataRequest(ImmutableList.of(kafkaTableHandle.getTopicName()));
TopicMetadataResponse topicMetadataResponse = simpleConsumer.send(topicMetadataRequest);
ImmutableList.Builder<ConnectorSplit> splits = ImmutableList.builder();
for (TopicMetadata metadata : topicMetadataResponse.topicsMetadata()) {
for (PartitionMetadata part : metadata.partitionsMetadata()) {
log.debug("Adding Partition %s/%s", metadata.topic(), part.partitionId());
BrokerEndPoint leader = part.leader();
if (leader == null) {
throw new PrestoException(GENERIC_INTERNAL_ERROR, format("Leader election in progress for Kafka topic '%s' partition %s", metadata.topic(), part.partitionId()));
}
HostAddress partitionLeader = HostAddress.fromParts(leader.host(), leader.port());
SimpleConsumer leaderConsumer = consumerManager.getConsumer(partitionLeader);
// Kafka contains a reverse list of "end - start" pairs for the splits
long[] offsets = findAllOffsets(leaderConsumer, metadata.topic(), part.partitionId());
for (int i = offsets.length - 1; i > 0; i--) {
KafkaSplit split = new KafkaSplit(metadata.topic(), kafkaTableHandle.getKeyDataFormat(), kafkaTableHandle.getMessageDataFormat(), kafkaTableHandle.getKeyDataSchemaLocation().map(KafkaSplitManager::readSchema), kafkaTableHandle.getMessageDataSchemaLocation().map(KafkaSplitManager::readSchema), part.partitionId(), offsets[i], offsets[i - 1], partitionLeader);
splits.add(split);
}
}
}
return new FixedSplitSource(splits.build());
} catch (Exception e) {
// Catch all exceptions because Kafka library is written in scala and checked exceptions are not declared in method signature.
if (e instanceof PrestoException) {
throw e;
}
throw new PrestoException(KAFKA_SPLIT_ERROR, format("Cannot list splits for table '%s' reading topic '%s'", kafkaTableHandle.getTableName(), kafkaTableHandle.getTopicName()), e);
}
}
use of io.prestosql.spi.HostAddress in project hetu-core by openlookeng.
the class TestJmxSplitManager method testNoPredicate.
@Test
public void testNoPredicate() throws Exception {
JmxTableHandle tableHandle = new JmxTableHandle(new SchemaTableName("schema", "tableName"), ImmutableList.of("objectName"), ImmutableList.of(columnHandle), true, TupleDomain.all());
ConnectorSplitSource splitSource = splitManager.getSplits(JmxTransactionHandle.INSTANCE, SESSION, tableHandle, UNGROUPED_SCHEDULING);
List<ConnectorSplit> allSplits = getAllSplits(splitSource);
assertEquals(allSplits.size(), nodes.size());
Set<String> actualNodes = nodes.stream().map(Node::getNodeIdentifier).collect(toSet());
Set<String> expectedNodes = new HashSet<>();
for (ConnectorSplit split : allSplits) {
List<HostAddress> addresses = split.getAddresses();
assertEquals(addresses.size(), 1);
expectedNodes.add(addresses.get(0).getHostText());
}
assertEquals(actualNodes, expectedNodes);
}
Aggregations