Search in sources :

Example 16 with ByteKeyRange

use of org.apache.beam.sdk.io.range.ByteKeyRange in project beam by apache.

the class BigtableIOTest method tesReduceSplitsWithAdjacentRanges.

/**
 * Tests reduce Splits with all adjacent ranges.
 */
@Test
public void tesReduceSplitsWithAdjacentRanges() throws Exception {
    final String table = "TEST-MANY-ROWS-SPLITS-TABLE";
    final int numRows = 10;
    final int numSamples = 10;
    final long bytesPerRow = 100L;
    final int maxSplit = 3;
    // Set up test table data and sample row keys for size estimation and splitting.
    makeTableData(table, numRows);
    service.setupSampleRowKeys(table, numSamples, bytesPerRow);
    // Generate source and split it.
    BigtableSource source = new BigtableSource(config.withTableId(StaticValueProvider.of(table)), BigtableReadOptions.builder().setKeyRanges(ALL_KEY_RANGE).build(), null);
    List<BigtableSource> splits = new ArrayList<>();
    List<ByteKeyRange> keyRanges = Arrays.asList(ByteKeyRange.of(ByteKey.EMPTY, createByteKey(1)), ByteKeyRange.of(createByteKey(1), createByteKey(2)), ByteKeyRange.of(createByteKey(2), createByteKey(3)), ByteKeyRange.of(createByteKey(3), createByteKey(4)), ByteKeyRange.of(createByteKey(4), createByteKey(5)), ByteKeyRange.of(createByteKey(5), createByteKey(6)), ByteKeyRange.of(createByteKey(6), createByteKey(7)), ByteKeyRange.of(createByteKey(7), createByteKey(8)), ByteKeyRange.of(createByteKey(8), createByteKey(9)), ByteKeyRange.of(createByteKey(9), ByteKey.EMPTY));
    for (ByteKeyRange range : keyRanges) {
        splits.add(source.withSingleRange(range));
    }
    // Splits Source have ranges [..1][1..2][2..3][3..4][4..5][5..6][6..7][7..8][8..9][9..]
    // expected reduced Split source ranges are [..4][4..8][8..]
    List<ByteKeyRange> expectedKeyRangesAfterReducedSplits = Arrays.asList(ByteKeyRange.of(ByteKey.EMPTY, createByteKey(4)), ByteKeyRange.of(createByteKey(4), createByteKey(8)), ByteKeyRange.of(createByteKey(8), ByteKey.EMPTY));
    List<BigtableSource> reducedSplits = source.reduceSplits(splits, null, maxSplit);
    List<ByteKeyRange> actualRangesAfterSplit = new ArrayList<>();
    for (BigtableSource splitSource : reducedSplits) {
        actualRangesAfterSplit.addAll(splitSource.getRanges());
    }
    assertThat(actualRangesAfterSplit, IsIterableContainingInAnyOrder.containsInAnyOrder(expectedKeyRangesAfterReducedSplits.toArray()));
    assertAllSourcesHaveSingleAdjacentRanges(reducedSplits);
    assertSourcesEqualReferenceSource(source, reducedSplits, null);
}
Also used : ByteKeyRange(org.apache.beam.sdk.io.range.ByteKeyRange) ArrayList(java.util.ArrayList) ByteString(com.google.protobuf.ByteString) BigtableSource(org.apache.beam.sdk.io.gcp.bigtable.BigtableIO.BigtableSource) Test(org.junit.Test)

Example 17 with ByteKeyRange

use of org.apache.beam.sdk.io.range.ByteKeyRange in project beam by apache.

the class BigtableIOTest method testReadingWithSplitsWithSeveralKeyRanges.

/**
 * Tests reading all rows from a split table with several key ranges.
 */
@Test
public void testReadingWithSplitsWithSeveralKeyRanges() throws Exception {
    final String table = "TEST-MANY-ROWS-SPLITS-TABLE-MULTIPLE-RANGES";
    final int numRows = 1500;
    final int numSamples = 10;
    // Two more splits are generated because of the split keys at 500 and 1000.
    // E.g. the split [450, 600) becomes [450, 500) and [500, 600).
    final int numSplits = 12;
    final long bytesPerRow = 100L;
    // Set up test table data and sample row keys for size estimation and splitting.
    makeTableData(table, numRows);
    service.setupSampleRowKeys(table, numSamples, bytesPerRow);
    ByteKey splitKey1 = ByteKey.copyFrom("key000000500".getBytes(StandardCharsets.UTF_8));
    ByteKey splitKey2 = ByteKey.copyFrom("key000001000".getBytes(StandardCharsets.UTF_8));
    ByteKeyRange tableRange = service.getTableRange(table);
    List<ByteKeyRange> keyRanges = Arrays.asList(tableRange.withEndKey(splitKey1), tableRange.withStartKey(splitKey1).withEndKey(splitKey2), tableRange.withStartKey(splitKey2));
    // Generate source and split it.
    BigtableSource source = new BigtableSource(config.withTableId(StaticValueProvider.of(table)), BigtableReadOptions.builder().setKeyRanges(StaticValueProvider.of(keyRanges)).build(), null);
    BigtableSource referenceSource = new BigtableSource(config.withTableId(StaticValueProvider.of(table)), BigtableReadOptions.builder().setKeyRanges(StaticValueProvider.of(Collections.singletonList(service.getTableRange(table)))).build(), null);
    // 10,000
    List<BigtableSource> splits = source.split(numRows * bytesPerRow / numSamples, null);
    // Test num splits and split equality.
    assertThat(splits, hasSize(numSplits));
    assertSourcesEqualReferenceSource(referenceSource, splits, null);
}
Also used : ByteKey(org.apache.beam.sdk.io.range.ByteKey) ByteKeyRange(org.apache.beam.sdk.io.range.ByteKeyRange) ByteString(com.google.protobuf.ByteString) BigtableSource(org.apache.beam.sdk.io.gcp.bigtable.BigtableIO.BigtableSource) Test(org.junit.Test)

Example 18 with ByteKeyRange

use of org.apache.beam.sdk.io.range.ByteKeyRange in project beam by apache.

the class BigtableIOTest method testReadingWithKeyRanges.

/**
 * Tests reading three key ranges with one read.
 */
@Test
public void testReadingWithKeyRanges() throws Exception {
    final String table = "TEST-KEY-RANGE-TABLE";
    final int numRows = 11;
    List<Row> testRows = makeTableData(table, numRows);
    ByteKey startKey1 = ByteKey.copyFrom("key000000001".getBytes(StandardCharsets.UTF_8));
    ByteKey endKey1 = ByteKey.copyFrom("key000000003".getBytes(StandardCharsets.UTF_8));
    ByteKey startKey2 = ByteKey.copyFrom("key000000004".getBytes(StandardCharsets.UTF_8));
    ByteKey endKey2 = ByteKey.copyFrom("key000000007".getBytes(StandardCharsets.UTF_8));
    ByteKey startKey3 = ByteKey.copyFrom("key000000008".getBytes(StandardCharsets.UTF_8));
    ByteKey endKey3 = ByteKey.copyFrom("key000000009".getBytes(StandardCharsets.UTF_8));
    service.setupSampleRowKeys(table, numRows / 10, "key000000001".length());
    final ByteKeyRange range1 = ByteKeyRange.of(startKey1, endKey1);
    final ByteKeyRange range2 = ByteKeyRange.of(startKey2, endKey2);
    final ByteKeyRange range3 = ByteKeyRange.of(startKey3, endKey3);
    List<ByteKeyRange> ranges = ImmutableList.of(range1, range2, range3);
    List<Row> rangeRows = filterToRanges(testRows, ranges);
    runReadTest(defaultRead.withTableId(table).withKeyRanges(ranges), rangeRows);
    // range rows should be non-trivial (non-zero,non-all).
    assertThat(rangeRows, allOf(hasSize(lessThan(numRows)), hasSize(greaterThan(0))));
}
Also used : ByteKey(org.apache.beam.sdk.io.range.ByteKey) ByteKeyRange(org.apache.beam.sdk.io.range.ByteKeyRange) ByteString(com.google.protobuf.ByteString) Row(com.google.bigtable.v2.Row) Test(org.junit.Test)

Example 19 with ByteKeyRange

use of org.apache.beam.sdk.io.range.ByteKeyRange in project beam by apache.

the class BigtableIOTest method testReduceSplitsWithSomeNonAdjacentRanges.

/**
 * Tests reduce splits with few non adjacent ranges.
 */
@Test
public void testReduceSplitsWithSomeNonAdjacentRanges() throws Exception {
    final String table = "TEST-MANY-ROWS-SPLITS-TABLE";
    final int numRows = 10;
    final int numSamples = 10;
    final long bytesPerRow = 100L;
    final int maxSplit = 3;
    // Set up test table data and sample row keys for size estimation and splitting.
    makeTableData(table, numRows);
    service.setupSampleRowKeys(table, numSamples, bytesPerRow);
    // Construct few non contiguous key ranges [..1][1..2][3..4][4..5][6..7][8..9]
    List<ByteKeyRange> keyRanges = Arrays.asList(ByteKeyRange.of(ByteKey.EMPTY, createByteKey(1)), ByteKeyRange.of(createByteKey(1), createByteKey(2)), ByteKeyRange.of(createByteKey(3), createByteKey(4)), ByteKeyRange.of(createByteKey(4), createByteKey(5)), ByteKeyRange.of(createByteKey(6), createByteKey(7)), ByteKeyRange.of(createByteKey(8), createByteKey(9)));
    // Expected ranges after split and reduction by maxSplitCount is [..2][3..5][6..7][8..9]
    List<ByteKeyRange> expectedKeyRangesAfterReducedSplits = Arrays.asList(ByteKeyRange.of(ByteKey.EMPTY, createByteKey(2)), ByteKeyRange.of(createByteKey(3), createByteKey(5)), ByteKeyRange.of(createByteKey(6), createByteKey(7)), ByteKeyRange.of(createByteKey(8), createByteKey(9)));
    // Generate source and split it.
    BigtableSource source = new BigtableSource(config.withTableId(StaticValueProvider.of(table)), BigtableReadOptions.builder().setKeyRanges(StaticValueProvider.of(keyRanges)).build(), null);
    List<BigtableSource> splits = new ArrayList<>();
    for (ByteKeyRange range : keyRanges) {
        splits.add(source.withSingleRange(range));
    }
    List<BigtableSource> reducedSplits = source.reduceSplits(splits, null, maxSplit);
    List<ByteKeyRange> actualRangesAfterSplit = new ArrayList<>();
    for (BigtableSource splitSource : reducedSplits) {
        actualRangesAfterSplit.addAll(splitSource.getRanges());
    }
    assertAllSourcesHaveSingleRanges(reducedSplits);
    assertThat(actualRangesAfterSplit, IsIterableContainingInAnyOrder.containsInAnyOrder(expectedKeyRangesAfterReducedSplits.toArray()));
}
Also used : ByteKeyRange(org.apache.beam.sdk.io.range.ByteKeyRange) ArrayList(java.util.ArrayList) ByteString(com.google.protobuf.ByteString) BigtableSource(org.apache.beam.sdk.io.gcp.bigtable.BigtableIO.BigtableSource) Test(org.junit.Test)

Example 20 with ByteKeyRange

use of org.apache.beam.sdk.io.range.ByteKeyRange in project beam by apache.

the class HBaseReadSplittableDoFn method splitRestriction.

@SplitRestriction
public void splitRestriction(@Element Read read, @Restriction ByteKeyRange range, OutputReceiver<ByteKeyRange> receiver) throws Exception {
    Connection connection = ConnectionFactory.createConnection(read.getConfiguration());
    List<HRegionLocation> regionLocations = HBaseUtils.getRegionLocations(connection, read.getTableId(), range);
    List<ByteKeyRange> splitRanges = HBaseUtils.getRanges(regionLocations, read.getTableId(), range);
    for (ByteKeyRange splitRange : splitRanges) {
        receiver.output(ByteKeyRange.of(splitRange.getStartKey(), splitRange.getEndKey()));
    }
}
Also used : HRegionLocation(org.apache.hadoop.hbase.HRegionLocation) ByteKeyRange(org.apache.beam.sdk.io.range.ByteKeyRange) Connection(org.apache.hadoop.hbase.client.Connection)

Aggregations

ByteKeyRange (org.apache.beam.sdk.io.range.ByteKeyRange)22 Test (org.junit.Test)17 ByteKey (org.apache.beam.sdk.io.range.ByteKey)10 ByteString (com.google.protobuf.ByteString)8 BigtableSource (org.apache.beam.sdk.io.gcp.bigtable.BigtableIO.BigtableSource)5 ArrayList (java.util.ArrayList)4 Row (com.google.bigtable.v2.Row)3 HRegionLocation (org.apache.hadoop.hbase.HRegionLocation)2 Connection (org.apache.hadoop.hbase.client.Connection)2 RowFilter (com.google.bigtable.v2.RowFilter)1 DisplayData (org.apache.beam.sdk.transforms.display.DisplayData)1 TableName (org.apache.hadoop.hbase.TableName)1 Result (org.apache.hadoop.hbase.client.Result)1 ResultScanner (org.apache.hadoop.hbase.client.ResultScanner)1 Table (org.apache.hadoop.hbase.client.Table)1