Search in sources :

Example 1 with HostAddress

use of io.prestosql.spi.HostAddress in project hetu-core by openlookeng.

the class TpchSplitManager method getSplits.

@Override
public ConnectorSplitSource getSplits(ConnectorTransactionHandle transaction, ConnectorSession session, ConnectorTableHandle tableHandle, SplitSchedulingStrategy splitSchedulingStrategy) {
    Set<Node> nodes = nodeManager.getRequiredWorkerNodes();
    int totalParts = nodes.size() * splitsPerNode;
    int partNumber = 0;
    ImmutableList.Builder<ConnectorSplit> splits = ImmutableList.builder();
    if (session.isSnapshotEnabled()) {
        // Snapshot: Modify splits as needed to all them to be scheduled on any node.
        // This allows them to be processed by a different worker after resume.
        List<HostAddress> addresses = nodes.stream().map(Node::getHostAndPort).collect(Collectors.toList());
        for (int i = 0; i < totalParts; i++) {
            splits.add(new TpchSplit(partNumber, totalParts, addresses));
            partNumber++;
        }
    } else {
        // Split the data using split and skew by the number of nodes available.
        for (Node node : nodes) {
            for (int i = 0; i < splitsPerNode; i++) {
                splits.add(new TpchSplit(partNumber, totalParts, ImmutableList.of(node.getHostAndPort())));
                partNumber++;
            }
        }
    }
    return new FixedSplitSource(splits.build());
}
Also used : ImmutableList(com.google.common.collect.ImmutableList) FixedSplitSource(io.prestosql.spi.connector.FixedSplitSource) Node(io.prestosql.spi.Node) HostAddress(io.prestosql.spi.HostAddress) ConnectorSplit(io.prestosql.spi.connector.ConnectorSplit)

Example 2 with HostAddress

use of io.prestosql.spi.HostAddress in project hetu-core by openlookeng.

the class MemoryMetadata method updateRowsOnHosts.

private void updateRowsOnHosts(long tableId, Collection<Slice> fragments) {
    TableInfo info = getTableInfo(tableId);
    checkState(info != null, "Uninitialized tableId [%s.%s]", info.getSchemaName(), info.getTableName());
    Map<HostAddress, MemoryDataFragment> dataFragments = new HashMap<>(info.getDataFragments());
    for (Slice fragment : fragments) {
        MemoryDataFragment memoryDataFragment = MemoryDataFragment.fromSlice(fragment);
        dataFragments.merge(memoryDataFragment.getHostAddress(), memoryDataFragment, MemoryDataFragment::merge);
    }
    updateTableInfo(tableId, new TableInfo(tableId, info.getSchemaName(), info.getTableName(), info.getColumns(), dataFragments, System.currentTimeMillis()));
}
Also used : ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) Slice(io.airlift.slice.Slice) HostAddress(io.prestosql.spi.HostAddress)

Example 3 with HostAddress

use of io.prestosql.spi.HostAddress in project hetu-core by openlookeng.

the class TestHBase method testGetSRecordSet.

/**
 * testGetSRecordSet
 */
@Test
public void testGetSRecordSet() {
    List<HostAddress> hostAddressList = new ArrayList<>(1);
    Map<Integer, List<Range>> ranges = new HashMap<>();
    HBaseSplit split = new HBaseSplit("rowkey", TestUtils.createHBaseTableHandle(), hostAddressList, null, null, ranges, 0, false, null);
    HBaseRecordSetProvider hrsp = new HBaseRecordSetProvider(hconn);
    RecordSet rs = hrsp.getRecordSet(new HBaseTransactionHandle(), session, split, TestUtils.createHBaseTableHandle(), hconn.getTable("hbase.test_table").getColumns());
    assertEquals(5, rs.getColumnTypes().size());
}
Also used : HashMap(java.util.HashMap) HBaseRecordSetProvider(io.hetu.core.plugin.hbase.query.HBaseRecordSetProvider) ArrayList(java.util.ArrayList) HBaseTransactionHandle(io.hetu.core.plugin.hbase.connector.HBaseTransactionHandle) List(java.util.List) ArrayList(java.util.ArrayList) HBaseSplit(io.hetu.core.plugin.hbase.split.HBaseSplit) RecordSet(io.prestosql.spi.connector.RecordSet) HostAddress(io.prestosql.spi.HostAddress) Test(org.testng.annotations.Test)

Example 4 with HostAddress

use of io.prestosql.spi.HostAddress in project hetu-core by openlookeng.

the class HBaseSplitManager method getSplitsForClientSide.

/**
 * Use client side mode, it will create a snapshot in HDFS for this table. Client Open the region to scan HDFS files
 * without sending scan requests to RegionServer.
 * Reasons for performance improvement:
 * 1. The scanning process decreased one time of network transmission. For scanning a large amount of data,
 *    network transmission takes a large amount of time, which may involve data serialization and deserialization
 *    overheads.
 * 2. RegionServer may become a bottleneck in normal scanning, but clientSide scanning does not.
 *
 * @param tupleDomain tupleDomain
 * @param tableHandle tableHandle
 * @return splits
 */
private List<HBaseSplit> getSplitsForClientSide(TupleDomain<ColumnHandle> tupleDomain, HBaseTableHandle tableHandle) {
    List<HBaseSplit> splits = new ArrayList<>();
    TableName hbaseTableName = TableName.valueOf(tableHandle.getHbaseTableName().get());
    Map<Integer, List<Range>> ranges = predicateTransferToRanges(tupleDomain.getDomains().get());
    long startTime = System.currentTimeMillis();
    String snapshotName = tableHandle.getTableName() + "-" + startTime;
    try {
        if (!hbaseConnection.getHbaseAdmin().isTableAvailable(hbaseTableName)) {
            throw new PrestoException(HBaseErrorCode.UNEXPECTED_HBASE_ERROR, format(tableHandle.getHbaseTableName().get() + " is not available."));
        }
        hbaseConnection.getHbaseAdmin().snapshot(snapshotName, hbaseTableName);
        LOG.info("Create Snapshot " + snapshotName + " finished, spend " + (System.currentTimeMillis() - startTime) + " mill seconds.");
    } catch (Exception e) {
        if (e instanceof PrestoException) {
            throw new PrestoException(HBaseErrorCode.UNEXPECTED_HBASE_ERROR, format(tableHandle.getHbaseTableName().get() + " is not available."));
        }
        int retryCreateSnapshotNumber = hbaseConnection.getHbaseConfig().getRetryCreateSnapshotNumber();
        for (int retry = 0; retry < retryCreateSnapshotNumber; retry++) {
            try {
                Thread.sleep(1000);
                snapshotName = tableHandle.getTableName() + "-" + System.currentTimeMillis();
                hbaseConnection.getHbaseAdmin().snapshot(snapshotName, hbaseTableName);
                LOG.info("Recreate snapshot success! snapshotName is " + snapshotName + ", retried :" + (retry + 1) + " times, using " + (System.currentTimeMillis() - startTime) + " mill seconds.");
                break;
            } catch (Exception ee) {
                if (retry == retryCreateSnapshotNumber - 1) {
                    LOG.error("Too many people create snapshot for the same table, maybe you should set 'hbase.client.side.snapshot.retry' more larger.");
                    LOG.error(ee, "Retry: create snapshot failed, snapshotName is " + snapshotName + ", retried :" + retryCreateSnapshotNumber + " times, track:" + ee.getMessage());
                }
            }
        }
    }
    // get regions from snapshot
    List<RegionInfo> regionInfos = Utils.getRegionInfos(snapshotName, hbaseConnection);
    List<HostAddress> hostAddresses = new ArrayList<>();
    // create splits
    for (int index = 0; index < regionInfos.size(); index++) {
        // Client side region scanner using no startKey and endKey.
        splits.add(new HBaseSplit(tableHandle.getRowId(), tableHandle, hostAddresses, null, null, ranges, index, false, snapshotName));
    }
    printSplits("Client Side", splits);
    return splits;
}
Also used : ArrayList(java.util.ArrayList) PrestoException(io.prestosql.spi.PrestoException) RegionInfo(org.apache.hadoop.hbase.client.RegionInfo) HostAddress(io.prestosql.spi.HostAddress) PrestoException(io.prestosql.spi.PrestoException) TableName(org.apache.hadoop.hbase.TableName) ArrayList(java.util.ArrayList) List(java.util.List)

Example 5 with HostAddress

use of io.prestosql.spi.HostAddress in project hetu-core by openlookeng.

the class HBaseSplitManager method getSplitsForBatchGet.

/**
 * If the predicate of sql includes "rowKey='xxx'" or "rowKey in ('xxx','xxx')",
 * we can specify rowkey values in each split, then performance will be good.
 *
 * @param tupleDomain tupleDomain
 * @param tableHandle tableHandle
 * @return splits
 */
private List<HBaseSplit> getSplitsForBatchGet(TupleDomain<ColumnHandle> tupleDomain, HBaseTableHandle tableHandle) {
    List<HBaseSplit> splits = new ArrayList<>();
    Domain rowIdDomain = null;
    Map<ColumnHandle, Domain> domains = tupleDomain.getDomains().get();
    for (Map.Entry<ColumnHandle, Domain> entry : domains.entrySet()) {
        ColumnHandle handle = entry.getKey();
        if (handle instanceof HBaseColumnHandle) {
            HBaseColumnHandle columnHandle = (HBaseColumnHandle) handle;
            if (columnHandle.getOrdinal() == tableHandle.getRowIdOrdinal()) {
                rowIdDomain = entry.getValue();
            }
        }
    }
    List<Range> rowIds = rowIdDomain != null ? rowIdDomain.getValues().getRanges().getOrderedRanges() : new ArrayList<>();
    int maxSplitSize;
    // Each split has at least 20 pieces of data, and the maximum number of splits is 30.
    if (rowIds.size() / Constants.BATCHGET_SPLIT_RECORD_COUNT > Constants.BATCHGET_SPLIT_MAX_COUNT) {
        maxSplitSize = rowIds.size() / Constants.BATCHGET_SPLIT_MAX_COUNT;
    } else {
        maxSplitSize = Constants.BATCHGET_SPLIT_RECORD_COUNT;
    }
    List<HostAddress> hostAddresses = new ArrayList<>();
    int rangeSize = rowIds.size();
    int currentIndex = 0;
    while (currentIndex < rangeSize) {
        int endIndex = rangeSize - currentIndex > maxSplitSize ? (currentIndex + maxSplitSize) : rangeSize;
        Map<Integer, List<Range>> splitRange = new HashMap<>();
        splitRange.put(tableHandle.getRowIdOrdinal(), rowIds.subList(currentIndex, endIndex));
        splits.add(new HBaseSplit(tableHandle.getRowId(), tableHandle, hostAddresses, null, null, splitRange, -1, false, null));
        currentIndex = endIndex;
    }
    printSplits("Batch Get", splits);
    return splits;
}
Also used : HBaseColumnHandle(io.hetu.core.plugin.hbase.connector.HBaseColumnHandle) HBaseColumnHandle(io.hetu.core.plugin.hbase.connector.HBaseColumnHandle) ColumnHandle(io.prestosql.spi.connector.ColumnHandle) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) Range(io.prestosql.spi.predicate.Range) HostAddress(io.prestosql.spi.HostAddress) ArrayList(java.util.ArrayList) List(java.util.List) TupleDomain(io.prestosql.spi.predicate.TupleDomain) Domain(io.prestosql.spi.predicate.Domain) HashMap(java.util.HashMap) Map(java.util.Map)

Aggregations

HostAddress (io.prestosql.spi.HostAddress)28 ConnectorSplit (io.prestosql.spi.connector.ConnectorSplit)16 Test (org.testng.annotations.Test)15 ArrayList (java.util.ArrayList)14 CounterStat (io.airlift.stats.CounterStat)9 DataSize (io.airlift.units.DataSize)9 ImmutableList (com.google.common.collect.ImmutableList)5 InternalNode (io.prestosql.metadata.InternalNode)5 FixedSplitSource (io.prestosql.spi.connector.FixedSplitSource)5 HashMap (java.util.HashMap)5 List (java.util.List)5 HBaseSplit (io.hetu.core.plugin.hbase.split.HBaseSplit)3 PrestoException (io.prestosql.spi.PrestoException)3 ImmutableSet (com.google.common.collect.ImmutableSet)2 HBaseColumnHandle (io.hetu.core.plugin.hbase.connector.HBaseColumnHandle)2 HBaseTableHandle (io.hetu.core.plugin.hbase.connector.HBaseTableHandle)2 HBaseRecordSet (io.hetu.core.plugin.hbase.query.HBaseRecordSet)2 Split (io.prestosql.metadata.Split)2 Node (io.prestosql.spi.Node)2 ColumnHandle (io.prestosql.spi.connector.ColumnHandle)2