use of io.prestosql.spi.HostAddress in project boostkit-bigdata by kunpengcompute.
the class TestHiveSplitSource method testGroupSmallSplitReplicationFactor1.
@Test
public void testGroupSmallSplitReplicationFactor1() {
HiveConfig hiveConfig = new HiveConfig();
hiveConfig.setMaxSplitsToGroup(10);
// ReplicationFactor 1 & all splits have same location
HiveSplitSource hiveSplitSource = HiveSplitSource.allAtOnce(HiveTestUtils.SESSION, "database", "table", 10, 10, new DataSize(1, MEGABYTE), Integer.MAX_VALUE, new TestingHiveSplitLoader(), Executors.newFixedThreadPool(5), new CounterStat(), null, null, null, hiveConfig, HiveStorageFormat.ORC);
List<HostAddress> hostAddress = new ArrayList<>();
hostAddress.add(new HostAddress("vm1", 1));
for (int i = 0; i < 30; i++) {
hiveSplitSource.addToQueue(new TestSplit(i, hostAddress));
assertEquals(hiveSplitSource.getBufferedInternalSplitCount(), i + 1);
}
List<ConnectorSplit> connectorSplits = getSplits(hiveSplitSource, 100);
List<ConnectorSplit> groupedConnectorSplits = hiveSplitSource.groupSmallSplits(connectorSplits, 1);
assertEquals(groupedConnectorSplits.size(), 3);
List<HiveSplitWrapper> hiveSplitWrappers = new ArrayList<>();
groupedConnectorSplits.forEach(pendingSplit -> hiveSplitWrappers.add((HiveSplitWrapper) pendingSplit));
assertEquals(hiveSplitWrappers.get(0).getSplits().size(), 10);
assertEquals(hiveSplitWrappers.get(1).getSplits().size(), 10);
assertEquals(hiveSplitWrappers.get(2).getSplits().size(), 10);
}
use of io.prestosql.spi.HostAddress in project boostkit-bigdata by kunpengcompute.
the class TestHiveSplitSource method testGroupSmallSplitDifferentFileSize.
@Test
public void testGroupSmallSplitDifferentFileSize() {
// alternative big and small size total 100 files
HiveConfig hiveConfig = new HiveConfig();
hiveConfig.setMaxSplitsToGroup(100);
HiveSplitSource hiveSplitSource = HiveSplitSource.allAtOnce(HiveTestUtils.SESSION, "database", "table", 10, 10, new DataSize(1, MEGABYTE), Integer.MAX_VALUE, new TestingHiveSplitLoader(), Executors.newFixedThreadPool(5), new CounterStat(), null, null, null, hiveConfig, HiveStorageFormat.ORC);
List<HostAddress> hostAddress = new ArrayList<>();
hostAddress.add(new HostAddress("vm1", 1));
hiveSplitSource.addToQueue(new TestSplit(1, OptionalInt.empty(), 67108864 / 2, hostAddress));
hiveSplitSource.addToQueue(new TestSplit(2, OptionalInt.empty(), 67108864 / 100, hostAddress));
hiveSplitSource.addToQueue(new TestSplit(3, OptionalInt.empty(), 67108864 / 10, hostAddress));
hiveSplitSource.addToQueue(new TestSplit(4, OptionalInt.empty(), 67108864 / 2, hostAddress));
hiveSplitSource.addToQueue(new TestSplit(5, OptionalInt.empty(), 67108864 / 4, hostAddress));
hiveSplitSource.addToQueue(new TestSplit(6, OptionalInt.empty(), 67108864 / 100, hostAddress));
hiveSplitSource.addToQueue(new TestSplit(7, OptionalInt.empty(), 67108864 / 20, hostAddress));
hiveSplitSource.addToQueue(new TestSplit(8, OptionalInt.empty(), 67108864 / 100, hostAddress));
hiveSplitSource.addToQueue(new TestSplit(9, OptionalInt.empty(), 67108864 / 2, hostAddress));
hiveSplitSource.addToQueue(new TestSplit(10, OptionalInt.empty(), 67108864 / 4, hostAddress));
hiveSplitSource.addToQueue(new TestSplit(11, OptionalInt.empty(), 67108864 / 4, hostAddress));
hiveSplitSource.addToQueue(new TestSplit(12, OptionalInt.empty(), 67108864 / 4, hostAddress));
hiveSplitSource.addToQueue(new TestSplit(13, OptionalInt.empty(), 67108864 / 5, hostAddress));
hiveSplitSource.addToQueue(new TestSplit(14, OptionalInt.empty(), 67108864 * 2, hostAddress));
hiveSplitSource.addToQueue(new TestSplit(15, OptionalInt.empty(), 7000, hostAddress));
hiveSplitSource.addToQueue(new TestSplit(16, OptionalInt.empty(), 20000, hostAddress));
List<ConnectorSplit> connectorSplits = getSplits(hiveSplitSource, 100);
List<ConnectorSplit> groupedConnectorSplits = hiveSplitSource.groupSmallSplits(connectorSplits, 1);
List<HiveSplitWrapper> hiveSplitWrappers = new ArrayList<>();
groupedConnectorSplits.forEach(pendingSplit -> hiveSplitWrappers.add((HiveSplitWrapper) pendingSplit));
assertEquals(groupedConnectorSplits.size(), 6);
}
use of io.prestosql.spi.HostAddress in project hetu-core by openlookeng.
the class HBaseSplitManager method getSplitsForScan.
/**
* Get splits by slicing the rowKeys, according to the first character of rowKey (user can specify it when create
* table, the default value is "0~9,a~z,A~Z", if your rowKey is chinese, you should set split_by_char = '一~锯'),
* generate many startAndEndKey pairs.
*
* @param tupleDomain tupleDomain
* @param tableHandle tableHandle
* @return splits
*/
private List<HBaseSplit> getSplitsForScan(TupleDomain<ColumnHandle> tupleDomain, HBaseTableHandle tableHandle) {
List<HBaseSplit> splits = new ArrayList<>();
TableName hbaseTableName = TableName.valueOf(tableHandle.getHbaseTableName().get());
Map<Integer, List<Range>> ranges = predicateTransferToRanges(tupleDomain.getDomains().get());
List<HostAddress> hostAddresses = new ArrayList<>();
// splitByChar read from hetuMetastore, the default value is "0~9,a~z,A~Z"
String splitByChar = hbaseConnection.getTable(tableHandle.getTableName()).getSplitByChar().get();
LOG.debug("Create multi-splits by the first char of rowKey, table is " + hbaseTableName.getName() + ", the range of first char is : " + splitByChar);
List<StartAndEndKey> startAndEndRowKeys = getStartAndEndKeys(splitByChar, Constants.START_END_KEYS_COUNT);
for (StartAndEndKey startAndEndRowKey : startAndEndRowKeys) {
splits.add(new HBaseSplit(tableHandle.getRowId(), tableHandle, hostAddresses, String.valueOf(startAndEndRowKey.getStart()), startAndEndRowKey.getEnd() + Constants.ROWKEY_TAIL, ranges, -1, false, null));
}
printSplits("Scan", splits);
return splits;
}
use of io.prestosql.spi.HostAddress in project hetu-core by openlookeng.
the class TestQuery method testHBaseRecordSetCursorIsBatchGet.
/**
* testHBaseRecordSetCursorIsBatchGet
*/
@Test
public void testHBaseRecordSetCursorIsBatchGet() {
HBaseTableHandle tableHandle = new HBaseTableHandle("hbase", "test_table", "rowkey", false, "io.hetu.core.plugin.hbase.utils.serializers.StringRowSerializer", Optional.of("test_table"), "", TestUtils.createTupleDomain(1), TestUtils.createColumnList(), 0, OptionalLong.empty());
HBaseSplit hBasesplit = new HBaseSplit("rowKey", tableHandle, new ArrayList<HostAddress>(1), "startrow", "endrow", new HashMap<>(), -1, false, null);
HBaseRecordSet rSet = new HBaseRecordSet(hconn, session, hBasesplit, tableHandle, TestUtils.createColumnList());
rSet.cursor();
}
use of io.prestosql.spi.HostAddress in project hetu-core by openlookeng.
the class TestHiveSplit method testJsonRoundTrip.
@Test
public void testJsonRoundTrip() {
Properties schema = new Properties();
schema.setProperty("foo", "bar");
schema.setProperty("bar", "baz");
ImmutableList<HivePartitionKey> partitionKeys = ImmutableList.of(new HivePartitionKey("a", "apple"), new HivePartitionKey("b", "42"));
ImmutableList<HostAddress> addresses = ImmutableList.of(HostAddress.fromParts("127.0.0.1", 44), HostAddress.fromParts("127.0.0.1", 45));
DeleteDeltaLocations.Builder deleteDeltaLocationsBuilder = DeleteDeltaLocations.builder(new Path("file:///data/fullacid"));
deleteDeltaLocationsBuilder.addDeleteDelta(new Path("file:///data/fullacid/delete_delta_0000004_0000004_0000"), 4L, 4L, 0);
deleteDeltaLocationsBuilder.addDeleteDelta(new Path("file:///data/fullacid/delete_delta_0000007_0000007_0000"), 7L, 7L, 0);
DeleteDeltaLocations deleteDeltaLocations = deleteDeltaLocationsBuilder.build().get();
Map<String, String> customSplitInfo = ImmutableMap.of("key", "value");
HiveSplit expected = new HiveSplit("db", "table", "partitionId", "path", 42, 87, 88, 0, schema, partitionKeys, addresses, OptionalInt.empty(), true, ImmutableMap.of(1, HIVE_STRING), Optional.of(new HiveSplit.BucketConversion(BUCKETING_V1, 32, 16, ImmutableList.of(new HiveColumnHandle("col", HIVE_LONG, BIGINT.getTypeSignature(), 5, ColumnType.REGULAR, Optional.of("comment"))))), false, Optional.of(deleteDeltaLocations), Optional.empty(), false, customSplitInfo);
String json = codec.toJson(expected);
HiveSplit actual = codec.fromJson(json);
assertEquals(actual.getDatabase(), expected.getDatabase());
assertEquals(actual.getTable(), expected.getTable());
assertEquals(actual.getPartitionName(), expected.getPartitionName());
assertEquals(actual.getPath(), expected.getPath());
assertEquals(actual.getStart(), expected.getStart());
assertEquals(actual.getLength(), expected.getLength());
assertEquals(actual.getFileSize(), expected.getFileSize());
assertEquals(actual.getSchema(), expected.getSchema());
assertEquals(actual.getPartitionKeys(), expected.getPartitionKeys());
assertEquals(actual.getAddresses(), expected.getAddresses());
assertEquals(actual.getColumnCoercions(), expected.getColumnCoercions());
assertEquals(actual.getBucketConversion(), expected.getBucketConversion());
assertEquals(actual.isForceLocalScheduling(), expected.isForceLocalScheduling());
assertEquals(actual.isS3SelectPushdownEnabled(), expected.isS3SelectPushdownEnabled());
assertEquals(actual.getDeleteDeltaLocations().get(), expected.getDeleteDeltaLocations().get());
assertEquals(actual.getCustomSplitInfo(), expected.getCustomSplitInfo());
}
Aggregations