Search in sources :

Example 16 with HostAddress

use of io.prestosql.spi.HostAddress in project boostkit-bigdata by kunpengcompute.

the class TestHiveSplitSource method testGroupSmallSplitReplicationFactor1.

@Test
public void testGroupSmallSplitReplicationFactor1() {
    HiveConfig hiveConfig = new HiveConfig();
    hiveConfig.setMaxSplitsToGroup(10);
    // ReplicationFactor 1 & all splits have same location
    HiveSplitSource hiveSplitSource = HiveSplitSource.allAtOnce(HiveTestUtils.SESSION, "database", "table", 10, 10, new DataSize(1, MEGABYTE), Integer.MAX_VALUE, new TestingHiveSplitLoader(), Executors.newFixedThreadPool(5), new CounterStat(), null, null, null, hiveConfig, HiveStorageFormat.ORC);
    List<HostAddress> hostAddress = new ArrayList<>();
    hostAddress.add(new HostAddress("vm1", 1));
    for (int i = 0; i < 30; i++) {
        hiveSplitSource.addToQueue(new TestSplit(i, hostAddress));
        assertEquals(hiveSplitSource.getBufferedInternalSplitCount(), i + 1);
    }
    List<ConnectorSplit> connectorSplits = getSplits(hiveSplitSource, 100);
    List<ConnectorSplit> groupedConnectorSplits = hiveSplitSource.groupSmallSplits(connectorSplits, 1);
    assertEquals(groupedConnectorSplits.size(), 3);
    List<HiveSplitWrapper> hiveSplitWrappers = new ArrayList<>();
    groupedConnectorSplits.forEach(pendingSplit -> hiveSplitWrappers.add((HiveSplitWrapper) pendingSplit));
    assertEquals(hiveSplitWrappers.get(0).getSplits().size(), 10);
    assertEquals(hiveSplitWrappers.get(1).getSplits().size(), 10);
    assertEquals(hiveSplitWrappers.get(2).getSplits().size(), 10);
}
Also used : CounterStat(io.airlift.stats.CounterStat) ArrayList(java.util.ArrayList) HostAddress(io.prestosql.spi.HostAddress) DataSize(io.airlift.units.DataSize) ConnectorSplit(io.prestosql.spi.connector.ConnectorSplit) Test(org.testng.annotations.Test)

Example 17 with HostAddress

use of io.prestosql.spi.HostAddress in project boostkit-bigdata by kunpengcompute.

the class TestHiveSplitSource method testGroupSmallSplitDifferentFileSize.

@Test
public void testGroupSmallSplitDifferentFileSize() {
    // alternative big and small size total 100 files
    HiveConfig hiveConfig = new HiveConfig();
    hiveConfig.setMaxSplitsToGroup(100);
    HiveSplitSource hiveSplitSource = HiveSplitSource.allAtOnce(HiveTestUtils.SESSION, "database", "table", 10, 10, new DataSize(1, MEGABYTE), Integer.MAX_VALUE, new TestingHiveSplitLoader(), Executors.newFixedThreadPool(5), new CounterStat(), null, null, null, hiveConfig, HiveStorageFormat.ORC);
    List<HostAddress> hostAddress = new ArrayList<>();
    hostAddress.add(new HostAddress("vm1", 1));
    hiveSplitSource.addToQueue(new TestSplit(1, OptionalInt.empty(), 67108864 / 2, hostAddress));
    hiveSplitSource.addToQueue(new TestSplit(2, OptionalInt.empty(), 67108864 / 100, hostAddress));
    hiveSplitSource.addToQueue(new TestSplit(3, OptionalInt.empty(), 67108864 / 10, hostAddress));
    hiveSplitSource.addToQueue(new TestSplit(4, OptionalInt.empty(), 67108864 / 2, hostAddress));
    hiveSplitSource.addToQueue(new TestSplit(5, OptionalInt.empty(), 67108864 / 4, hostAddress));
    hiveSplitSource.addToQueue(new TestSplit(6, OptionalInt.empty(), 67108864 / 100, hostAddress));
    hiveSplitSource.addToQueue(new TestSplit(7, OptionalInt.empty(), 67108864 / 20, hostAddress));
    hiveSplitSource.addToQueue(new TestSplit(8, OptionalInt.empty(), 67108864 / 100, hostAddress));
    hiveSplitSource.addToQueue(new TestSplit(9, OptionalInt.empty(), 67108864 / 2, hostAddress));
    hiveSplitSource.addToQueue(new TestSplit(10, OptionalInt.empty(), 67108864 / 4, hostAddress));
    hiveSplitSource.addToQueue(new TestSplit(11, OptionalInt.empty(), 67108864 / 4, hostAddress));
    hiveSplitSource.addToQueue(new TestSplit(12, OptionalInt.empty(), 67108864 / 4, hostAddress));
    hiveSplitSource.addToQueue(new TestSplit(13, OptionalInt.empty(), 67108864 / 5, hostAddress));
    hiveSplitSource.addToQueue(new TestSplit(14, OptionalInt.empty(), 67108864 * 2, hostAddress));
    hiveSplitSource.addToQueue(new TestSplit(15, OptionalInt.empty(), 7000, hostAddress));
    hiveSplitSource.addToQueue(new TestSplit(16, OptionalInt.empty(), 20000, hostAddress));
    List<ConnectorSplit> connectorSplits = getSplits(hiveSplitSource, 100);
    List<ConnectorSplit> groupedConnectorSplits = hiveSplitSource.groupSmallSplits(connectorSplits, 1);
    List<HiveSplitWrapper> hiveSplitWrappers = new ArrayList<>();
    groupedConnectorSplits.forEach(pendingSplit -> hiveSplitWrappers.add((HiveSplitWrapper) pendingSplit));
    assertEquals(groupedConnectorSplits.size(), 6);
}
Also used : CounterStat(io.airlift.stats.CounterStat) ArrayList(java.util.ArrayList) HostAddress(io.prestosql.spi.HostAddress) DataSize(io.airlift.units.DataSize) ConnectorSplit(io.prestosql.spi.connector.ConnectorSplit) Test(org.testng.annotations.Test)

Example 18 with HostAddress

use of io.prestosql.spi.HostAddress in project hetu-core by openlookeng.

the class HBaseSplitManager method getSplitsForScan.

/**
 * Get splits by slicing the rowKeys, according to the first character of rowKey (user can specify it when create
 * table, the default value is "0~9,a~z,A~Z", if your rowKey is chinese, you should set split_by_char = '一~锯'),
 * generate many startAndEndKey pairs.
 *
 * @param tupleDomain tupleDomain
 * @param tableHandle tableHandle
 * @return splits
 */
private List<HBaseSplit> getSplitsForScan(TupleDomain<ColumnHandle> tupleDomain, HBaseTableHandle tableHandle) {
    List<HBaseSplit> splits = new ArrayList<>();
    TableName hbaseTableName = TableName.valueOf(tableHandle.getHbaseTableName().get());
    Map<Integer, List<Range>> ranges = predicateTransferToRanges(tupleDomain.getDomains().get());
    List<HostAddress> hostAddresses = new ArrayList<>();
    // splitByChar read from hetuMetastore, the default value is "0~9,a~z,A~Z"
    String splitByChar = hbaseConnection.getTable(tableHandle.getTableName()).getSplitByChar().get();
    LOG.debug("Create multi-splits by the first char of rowKey, table is " + hbaseTableName.getName() + ", the range of first char is : " + splitByChar);
    List<StartAndEndKey> startAndEndRowKeys = getStartAndEndKeys(splitByChar, Constants.START_END_KEYS_COUNT);
    for (StartAndEndKey startAndEndRowKey : startAndEndRowKeys) {
        splits.add(new HBaseSplit(tableHandle.getRowId(), tableHandle, hostAddresses, String.valueOf(startAndEndRowKey.getStart()), startAndEndRowKey.getEnd() + Constants.ROWKEY_TAIL, ranges, -1, false, null));
    }
    printSplits("Scan", splits);
    return splits;
}
Also used : TableName(org.apache.hadoop.hbase.TableName) ArrayList(java.util.ArrayList) StartAndEndKey(io.hetu.core.plugin.hbase.utils.StartAndEndKey) ArrayList(java.util.ArrayList) List(java.util.List) HostAddress(io.prestosql.spi.HostAddress)

Example 19 with HostAddress

use of io.prestosql.spi.HostAddress in project hetu-core by openlookeng.

the class TestQuery method testHBaseRecordSetCursorIsBatchGet.

/**
 * testHBaseRecordSetCursorIsBatchGet
 */
@Test
public void testHBaseRecordSetCursorIsBatchGet() {
    HBaseTableHandle tableHandle = new HBaseTableHandle("hbase", "test_table", "rowkey", false, "io.hetu.core.plugin.hbase.utils.serializers.StringRowSerializer", Optional.of("test_table"), "", TestUtils.createTupleDomain(1), TestUtils.createColumnList(), 0, OptionalLong.empty());
    HBaseSplit hBasesplit = new HBaseSplit("rowKey", tableHandle, new ArrayList<HostAddress>(1), "startrow", "endrow", new HashMap<>(), -1, false, null);
    HBaseRecordSet rSet = new HBaseRecordSet(hconn, session, hBasesplit, tableHandle, TestUtils.createColumnList());
    rSet.cursor();
}
Also used : HBaseRecordSet(io.hetu.core.plugin.hbase.query.HBaseRecordSet) HBaseSplit(io.hetu.core.plugin.hbase.split.HBaseSplit) HostAddress(io.prestosql.spi.HostAddress) HBaseTableHandle(io.hetu.core.plugin.hbase.connector.HBaseTableHandle) Test(org.testng.annotations.Test)

Example 20 with HostAddress

use of io.prestosql.spi.HostAddress in project hetu-core by openlookeng.

the class TestHiveSplit method testJsonRoundTrip.

@Test
public void testJsonRoundTrip() {
    Properties schema = new Properties();
    schema.setProperty("foo", "bar");
    schema.setProperty("bar", "baz");
    ImmutableList<HivePartitionKey> partitionKeys = ImmutableList.of(new HivePartitionKey("a", "apple"), new HivePartitionKey("b", "42"));
    ImmutableList<HostAddress> addresses = ImmutableList.of(HostAddress.fromParts("127.0.0.1", 44), HostAddress.fromParts("127.0.0.1", 45));
    DeleteDeltaLocations.Builder deleteDeltaLocationsBuilder = DeleteDeltaLocations.builder(new Path("file:///data/fullacid"));
    deleteDeltaLocationsBuilder.addDeleteDelta(new Path("file:///data/fullacid/delete_delta_0000004_0000004_0000"), 4L, 4L, 0);
    deleteDeltaLocationsBuilder.addDeleteDelta(new Path("file:///data/fullacid/delete_delta_0000007_0000007_0000"), 7L, 7L, 0);
    DeleteDeltaLocations deleteDeltaLocations = deleteDeltaLocationsBuilder.build().get();
    Map<String, String> customSplitInfo = ImmutableMap.of("key", "value");
    HiveSplit expected = new HiveSplit("db", "table", "partitionId", "path", 42, 87, 88, 0, schema, partitionKeys, addresses, OptionalInt.empty(), true, ImmutableMap.of(1, HIVE_STRING), Optional.of(new HiveSplit.BucketConversion(BUCKETING_V1, 32, 16, ImmutableList.of(new HiveColumnHandle("col", HIVE_LONG, BIGINT.getTypeSignature(), 5, ColumnType.REGULAR, Optional.of("comment"))))), false, Optional.of(deleteDeltaLocations), Optional.empty(), false, customSplitInfo);
    String json = codec.toJson(expected);
    HiveSplit actual = codec.fromJson(json);
    assertEquals(actual.getDatabase(), expected.getDatabase());
    assertEquals(actual.getTable(), expected.getTable());
    assertEquals(actual.getPartitionName(), expected.getPartitionName());
    assertEquals(actual.getPath(), expected.getPath());
    assertEquals(actual.getStart(), expected.getStart());
    assertEquals(actual.getLength(), expected.getLength());
    assertEquals(actual.getFileSize(), expected.getFileSize());
    assertEquals(actual.getSchema(), expected.getSchema());
    assertEquals(actual.getPartitionKeys(), expected.getPartitionKeys());
    assertEquals(actual.getAddresses(), expected.getAddresses());
    assertEquals(actual.getColumnCoercions(), expected.getColumnCoercions());
    assertEquals(actual.getBucketConversion(), expected.getBucketConversion());
    assertEquals(actual.isForceLocalScheduling(), expected.isForceLocalScheduling());
    assertEquals(actual.isS3SelectPushdownEnabled(), expected.isS3SelectPushdownEnabled());
    assertEquals(actual.getDeleteDeltaLocations().get(), expected.getDeleteDeltaLocations().get());
    assertEquals(actual.getCustomSplitInfo(), expected.getCustomSplitInfo());
}
Also used : Path(org.apache.hadoop.fs.Path) Properties(java.util.Properties) HostAddress(io.prestosql.spi.HostAddress) Test(org.testng.annotations.Test)

Aggregations

HostAddress (io.prestosql.spi.HostAddress)38 ConnectorSplit (io.prestosql.spi.connector.ConnectorSplit)25 Test (org.testng.annotations.Test)25 ArrayList (java.util.ArrayList)23 CounterStat (io.airlift.stats.CounterStat)18 DataSize (io.airlift.units.DataSize)18 ImmutableList (com.google.common.collect.ImmutableList)5 InternalNode (io.prestosql.metadata.InternalNode)5 FixedSplitSource (io.prestosql.spi.connector.FixedSplitSource)5 HashMap (java.util.HashMap)5 List (java.util.List)5 HBaseSplit (io.hetu.core.plugin.hbase.split.HBaseSplit)3 PrestoException (io.prestosql.spi.PrestoException)3 ImmutableSet (com.google.common.collect.ImmutableSet)2 HBaseColumnHandle (io.hetu.core.plugin.hbase.connector.HBaseColumnHandle)2 HBaseTableHandle (io.hetu.core.plugin.hbase.connector.HBaseTableHandle)2 HBaseRecordSet (io.hetu.core.plugin.hbase.query.HBaseRecordSet)2 Split (io.prestosql.metadata.Split)2 Node (io.prestosql.spi.Node)2 ColumnHandle (io.prestosql.spi.connector.ColumnHandle)2