use of io.prestosql.spi.HostAddress in project hetu-core by openlookeng.
the class TpchSplitManager method getSplits.
@Override
public ConnectorSplitSource getSplits(ConnectorTransactionHandle transaction, ConnectorSession session, ConnectorTableHandle tableHandle, SplitSchedulingStrategy splitSchedulingStrategy) {
Set<Node> nodes = nodeManager.getRequiredWorkerNodes();
int totalParts = nodes.size() * splitsPerNode;
int partNumber = 0;
ImmutableList.Builder<ConnectorSplit> splits = ImmutableList.builder();
if (session.isSnapshotEnabled()) {
// Snapshot: Modify splits as needed to all them to be scheduled on any node.
// This allows them to be processed by a different worker after resume.
List<HostAddress> addresses = nodes.stream().map(Node::getHostAndPort).collect(Collectors.toList());
for (int i = 0; i < totalParts; i++) {
splits.add(new TpchSplit(partNumber, totalParts, addresses));
partNumber++;
}
} else {
// Split the data using split and skew by the number of nodes available.
for (Node node : nodes) {
for (int i = 0; i < splitsPerNode; i++) {
splits.add(new TpchSplit(partNumber, totalParts, ImmutableList.of(node.getHostAndPort())));
partNumber++;
}
}
}
return new FixedSplitSource(splits.build());
}
use of io.prestosql.spi.HostAddress in project hetu-core by openlookeng.
the class MemoryMetadata method updateRowsOnHosts.
private void updateRowsOnHosts(long tableId, Collection<Slice> fragments) {
TableInfo info = getTableInfo(tableId);
checkState(info != null, "Uninitialized tableId [%s.%s]", info.getSchemaName(), info.getTableName());
Map<HostAddress, MemoryDataFragment> dataFragments = new HashMap<>(info.getDataFragments());
for (Slice fragment : fragments) {
MemoryDataFragment memoryDataFragment = MemoryDataFragment.fromSlice(fragment);
dataFragments.merge(memoryDataFragment.getHostAddress(), memoryDataFragment, MemoryDataFragment::merge);
}
updateTableInfo(tableId, new TableInfo(tableId, info.getSchemaName(), info.getTableName(), info.getColumns(), dataFragments, System.currentTimeMillis()));
}
use of io.prestosql.spi.HostAddress in project hetu-core by openlookeng.
the class TestHBase method testGetSRecordSet.
/**
* testGetSRecordSet
*/
@Test
public void testGetSRecordSet() {
List<HostAddress> hostAddressList = new ArrayList<>(1);
Map<Integer, List<Range>> ranges = new HashMap<>();
HBaseSplit split = new HBaseSplit("rowkey", TestUtils.createHBaseTableHandle(), hostAddressList, null, null, ranges, 0, false, null);
HBaseRecordSetProvider hrsp = new HBaseRecordSetProvider(hconn);
RecordSet rs = hrsp.getRecordSet(new HBaseTransactionHandle(), session, split, TestUtils.createHBaseTableHandle(), hconn.getTable("hbase.test_table").getColumns());
assertEquals(5, rs.getColumnTypes().size());
}
use of io.prestosql.spi.HostAddress in project hetu-core by openlookeng.
the class HBaseSplitManager method getSplitsForClientSide.
/**
* Use client side mode, it will create a snapshot in HDFS for this table. Client Open the region to scan HDFS files
* without sending scan requests to RegionServer.
* Reasons for performance improvement:
* 1. The scanning process decreased one time of network transmission. For scanning a large amount of data,
* network transmission takes a large amount of time, which may involve data serialization and deserialization
* overheads.
* 2. RegionServer may become a bottleneck in normal scanning, but clientSide scanning does not.
*
* @param tupleDomain tupleDomain
* @param tableHandle tableHandle
* @return splits
*/
private List<HBaseSplit> getSplitsForClientSide(TupleDomain<ColumnHandle> tupleDomain, HBaseTableHandle tableHandle) {
List<HBaseSplit> splits = new ArrayList<>();
TableName hbaseTableName = TableName.valueOf(tableHandle.getHbaseTableName().get());
Map<Integer, List<Range>> ranges = predicateTransferToRanges(tupleDomain.getDomains().get());
long startTime = System.currentTimeMillis();
String snapshotName = tableHandle.getTableName() + "-" + startTime;
try {
if (!hbaseConnection.getHbaseAdmin().isTableAvailable(hbaseTableName)) {
throw new PrestoException(HBaseErrorCode.UNEXPECTED_HBASE_ERROR, format(tableHandle.getHbaseTableName().get() + " is not available."));
}
hbaseConnection.getHbaseAdmin().snapshot(snapshotName, hbaseTableName);
LOG.info("Create Snapshot " + snapshotName + " finished, spend " + (System.currentTimeMillis() - startTime) + " mill seconds.");
} catch (Exception e) {
if (e instanceof PrestoException) {
throw new PrestoException(HBaseErrorCode.UNEXPECTED_HBASE_ERROR, format(tableHandle.getHbaseTableName().get() + " is not available."));
}
int retryCreateSnapshotNumber = hbaseConnection.getHbaseConfig().getRetryCreateSnapshotNumber();
for (int retry = 0; retry < retryCreateSnapshotNumber; retry++) {
try {
Thread.sleep(1000);
snapshotName = tableHandle.getTableName() + "-" + System.currentTimeMillis();
hbaseConnection.getHbaseAdmin().snapshot(snapshotName, hbaseTableName);
LOG.info("Recreate snapshot success! snapshotName is " + snapshotName + ", retried :" + (retry + 1) + " times, using " + (System.currentTimeMillis() - startTime) + " mill seconds.");
break;
} catch (Exception ee) {
if (retry == retryCreateSnapshotNumber - 1) {
LOG.error("Too many people create snapshot for the same table, maybe you should set 'hbase.client.side.snapshot.retry' more larger.");
LOG.error(ee, "Retry: create snapshot failed, snapshotName is " + snapshotName + ", retried :" + retryCreateSnapshotNumber + " times, track:" + ee.getMessage());
}
}
}
}
// get regions from snapshot
List<RegionInfo> regionInfos = Utils.getRegionInfos(snapshotName, hbaseConnection);
List<HostAddress> hostAddresses = new ArrayList<>();
// create splits
for (int index = 0; index < regionInfos.size(); index++) {
// Client side region scanner using no startKey and endKey.
splits.add(new HBaseSplit(tableHandle.getRowId(), tableHandle, hostAddresses, null, null, ranges, index, false, snapshotName));
}
printSplits("Client Side", splits);
return splits;
}
use of io.prestosql.spi.HostAddress in project hetu-core by openlookeng.
the class HBaseSplitManager method getSplitsForBatchGet.
/**
* If the predicate of sql includes "rowKey='xxx'" or "rowKey in ('xxx','xxx')",
* we can specify rowkey values in each split, then performance will be good.
*
* @param tupleDomain tupleDomain
* @param tableHandle tableHandle
* @return splits
*/
private List<HBaseSplit> getSplitsForBatchGet(TupleDomain<ColumnHandle> tupleDomain, HBaseTableHandle tableHandle) {
List<HBaseSplit> splits = new ArrayList<>();
Domain rowIdDomain = null;
Map<ColumnHandle, Domain> domains = tupleDomain.getDomains().get();
for (Map.Entry<ColumnHandle, Domain> entry : domains.entrySet()) {
ColumnHandle handle = entry.getKey();
if (handle instanceof HBaseColumnHandle) {
HBaseColumnHandle columnHandle = (HBaseColumnHandle) handle;
if (columnHandle.getOrdinal() == tableHandle.getRowIdOrdinal()) {
rowIdDomain = entry.getValue();
}
}
}
List<Range> rowIds = rowIdDomain != null ? rowIdDomain.getValues().getRanges().getOrderedRanges() : new ArrayList<>();
int maxSplitSize;
// Each split has at least 20 pieces of data, and the maximum number of splits is 30.
if (rowIds.size() / Constants.BATCHGET_SPLIT_RECORD_COUNT > Constants.BATCHGET_SPLIT_MAX_COUNT) {
maxSplitSize = rowIds.size() / Constants.BATCHGET_SPLIT_MAX_COUNT;
} else {
maxSplitSize = Constants.BATCHGET_SPLIT_RECORD_COUNT;
}
List<HostAddress> hostAddresses = new ArrayList<>();
int rangeSize = rowIds.size();
int currentIndex = 0;
while (currentIndex < rangeSize) {
int endIndex = rangeSize - currentIndex > maxSplitSize ? (currentIndex + maxSplitSize) : rangeSize;
Map<Integer, List<Range>> splitRange = new HashMap<>();
splitRange.put(tableHandle.getRowIdOrdinal(), rowIds.subList(currentIndex, endIndex));
splits.add(new HBaseSplit(tableHandle.getRowId(), tableHandle, hostAddresses, null, null, splitRange, -1, false, null));
currentIndex = endIndex;
}
printSplits("Batch Get", splits);
return splits;
}
Aggregations