Search in sources :

Example 11 with ByteKeyRange

use of org.apache.beam.sdk.io.range.ByteKeyRange in project beam by apache.

the class BigtableIOTest method testReadingWithSubSplitsWithSeveralKeyRanges.

/**
 * Tests reading all rows from a sub-split table with several key ranges.
 */
@Test
public void testReadingWithSubSplitsWithSeveralKeyRanges() throws Exception {
    final String table = "TEST-MANY-ROWS-SPLITS-TABLE-MULTIPLE-RANGES";
    final int numRows = 1000;
    final int numSamples = 10;
    final int numSplits = 20;
    // We expect 24 splits instead of 20 due to the multiple ranges. For a key of 330 separating
    // the multiple ranges, first the [300, 330) range is subsplit into two (since numSplits is
    // twice numSamples), so we get [300, 315) and [315, 330). Then, the [330, 400) range is also
    // split into two, resulting in [330, 365) and [365, 400). These ranges would instead be
    // [300, 350) and [350, 400) if this source was one range. Thus, each extra range adds two
    // resulting splits.
    final int expectedNumSplits = 24;
    final long bytesPerRow = 100L;
    // Set up test table data and sample row keys for size estimation and splitting.
    makeTableData(table, numRows);
    service.setupSampleRowKeys(table, numSamples, bytesPerRow);
    ByteKey splitKey1 = ByteKey.copyFrom("key000000330".getBytes(StandardCharsets.UTF_8));
    ByteKey splitKey2 = ByteKey.copyFrom("key000000730".getBytes(StandardCharsets.UTF_8));
    ByteKeyRange tableRange = service.getTableRange(table);
    List<ByteKeyRange> keyRanges = Arrays.asList(tableRange.withEndKey(splitKey1), tableRange.withStartKey(splitKey1).withEndKey(splitKey2), tableRange.withStartKey(splitKey2));
    // Generate source and split it.
    BigtableSource source = new BigtableSource(config.withTableId(StaticValueProvider.of(table)), BigtableReadOptions.builder().setKeyRanges(StaticValueProvider.of(keyRanges)).build(), null);
    BigtableSource referenceSource = new BigtableSource(config.withTableId(StaticValueProvider.of(table)), BigtableReadOptions.builder().setKeyRanges(StaticValueProvider.of(ImmutableList.of(service.getTableRange(table)))).build(), null);
    List<BigtableSource> splits = source.split(numRows * bytesPerRow / numSplits, null);
    // Test num splits and split equality.
    assertThat(splits, hasSize(expectedNumSplits));
    assertSourcesEqualReferenceSource(referenceSource, splits, null);
}
Also used : ByteKey(org.apache.beam.sdk.io.range.ByteKey) ByteKeyRange(org.apache.beam.sdk.io.range.ByteKeyRange) ByteString(com.google.protobuf.ByteString) BigtableSource(org.apache.beam.sdk.io.gcp.bigtable.BigtableIO.BigtableSource) Test(org.junit.Test)

Example 12 with ByteKeyRange

use of org.apache.beam.sdk.io.range.ByteKeyRange in project beam by apache.

the class HBaseIOTest method testReadingWithKeyRange.

/**
     * Tests reading all rows using key ranges. Tests a prefix [), a suffix (], and a restricted
     * range [] and that some properties hold across them.
     */
@Test
public void testReadingWithKeyRange() throws Exception {
    final String table = "TEST-KEY-RANGE-TABLE";
    final int numRows = 1001;
    final byte[] startRow = "2".getBytes();
    final byte[] stopRow = "9".getBytes();
    final ByteKey startKey = ByteKey.copyFrom(startRow);
    createTable(table);
    writeData(table, numRows);
    // Test prefix: [beginning, startKey).
    final ByteKeyRange prefixRange = ByteKeyRange.ALL_KEYS.withEndKey(startKey);
    runReadTestLength(HBaseIO.read().withConfiguration(conf).withTableId(table).withKeyRange(prefixRange), 126);
    // Test suffix: [startKey, end).
    final ByteKeyRange suffixRange = ByteKeyRange.ALL_KEYS.withStartKey(startKey);
    runReadTestLength(HBaseIO.read().withConfiguration(conf).withTableId(table).withKeyRange(suffixRange), 875);
    // Test restricted range: [startKey, endKey).
    // This one tests the second signature of .withKeyRange
    runReadTestLength(HBaseIO.read().withConfiguration(conf).withTableId(table).withKeyRange(startRow, stopRow), 441);
}
Also used : ByteKey(org.apache.beam.sdk.io.range.ByteKey) ByteKeyRange(org.apache.beam.sdk.io.range.ByteKeyRange) Test(org.junit.Test)

Example 13 with ByteKeyRange

use of org.apache.beam.sdk.io.range.ByteKeyRange in project beam by apache.

the class BigtableIOTest method testReadingDisplayData.

@Test
public void testReadingDisplayData() {
    RowFilter rowFilter = RowFilter.newBuilder().setRowKeyRegexFilter(ByteString.copyFromUtf8("foo.*")).build();
    ByteKeyRange keyRange = ByteKeyRange.ALL_KEYS.withEndKey(ByteKey.of(0xab, 0xcd));
    BigtableIO.Read read = BigtableIO.read().withBigtableOptions(BIGTABLE_OPTIONS).withTableId("fooTable").withRowFilter(rowFilter).withKeyRange(keyRange);
    DisplayData displayData = DisplayData.from(read);
    assertThat(displayData, hasDisplayItem(allOf(hasKey("tableId"), hasLabel("Bigtable Table Id"), hasValue("fooTable"))));
    assertThat(displayData, hasDisplayItem("rowFilter", rowFilter.toString()));
    assertThat(displayData, hasDisplayItem("keyRanges", "[ByteKeyRange{startKey=[], endKey=[abcd]}]"));
    // BigtableIO adds user-agent to options; assert only on key and not value.
    assertThat(displayData, hasDisplayItem("bigtableOptions"));
}
Also used : RowFilter(com.google.bigtable.v2.RowFilter) ByteKeyRange(org.apache.beam.sdk.io.range.ByteKeyRange) DisplayData(org.apache.beam.sdk.transforms.display.DisplayData) Test(org.junit.Test)

Example 14 with ByteKeyRange

use of org.apache.beam.sdk.io.range.ByteKeyRange in project beam by apache.

the class BigtableIOTest method testReduceSplitsWithAllNonAdjacentRange.

/**
 * Tests reduce split with all non adjacent ranges.
 */
@Test
public void testReduceSplitsWithAllNonAdjacentRange() throws Exception {
    final String table = "TEST-MANY-ROWS-SPLITS-TABLE";
    final int numRows = 10;
    final int numSamples = 10;
    final long bytesPerRow = 100L;
    final int maxSplit = 3;
    // Set up test table data and sample row keys for size estimation and splitting.
    makeTableData(table, numRows);
    service.setupSampleRowKeys(table, numSamples, bytesPerRow);
    // Construct non contiguous key ranges [..1][2..3][4..5][6..7][8..9]
    List<ByteKeyRange> keyRanges = Arrays.asList(ByteKeyRange.of(ByteKey.EMPTY, createByteKey(1)), ByteKeyRange.of(createByteKey(2), createByteKey(3)), ByteKeyRange.of(createByteKey(4), createByteKey(5)), ByteKeyRange.of(createByteKey(6), createByteKey(7)), ByteKeyRange.of(createByteKey(8), createByteKey(9)));
    // Generate source and split it.
    BigtableSource source = new BigtableSource(config.withTableId(StaticValueProvider.of(table)), BigtableReadOptions.builder().setKeyRanges(StaticValueProvider.of(keyRanges)).build(), null);
    List<BigtableSource> splits = new ArrayList<>();
    for (ByteKeyRange range : keyRanges) {
        splits.add(source.withSingleRange(range));
    }
    List<BigtableSource> reducedSplits = source.reduceSplits(splits, null, maxSplit);
    List<ByteKeyRange> actualRangesAfterSplit = new ArrayList<>();
    for (BigtableSource splitSource : reducedSplits) {
        actualRangesAfterSplit.addAll(splitSource.getRanges());
    }
    assertAllSourcesHaveSingleRanges(reducedSplits);
    // The expected split source ranges are exactly same as original
    assertThat(actualRangesAfterSplit, IsIterableContainingInAnyOrder.containsInAnyOrder(keyRanges.toArray()));
}
Also used : ByteKeyRange(org.apache.beam.sdk.io.range.ByteKeyRange) ArrayList(java.util.ArrayList) ByteString(com.google.protobuf.ByteString) BigtableSource(org.apache.beam.sdk.io.gcp.bigtable.BigtableIO.BigtableSource) Test(org.junit.Test)

Example 15 with ByteKeyRange

use of org.apache.beam.sdk.io.range.ByteKeyRange in project beam by apache.

the class BigtableIOTest method testReadingWithRuntimeParameterizedKeyRange.

/**
 * Tests reading key ranges specified through a ValueProvider.
 */
@Test
public void testReadingWithRuntimeParameterizedKeyRange() throws Exception {
    final String table = "TEST-KEY-RANGE-TABLE";
    final int numRows = 1001;
    List<Row> testRows = makeTableData(table, numRows);
    ByteKey startKey = ByteKey.copyFrom("key000000100".getBytes(StandardCharsets.UTF_8));
    ByteKey endKey = ByteKey.copyFrom("key000000300".getBytes(StandardCharsets.UTF_8));
    service.setupSampleRowKeys(table, numRows / 10, "key000000100".length());
    final ByteKeyRange middleRange = ByteKeyRange.of(startKey, endKey);
    List<Row> middleRows = filterToRange(testRows, middleRange);
    runReadTest(defaultRead.withTableId(table).withKeyRanges(StaticValueProvider.of(Collections.singletonList(middleRange))), middleRows);
    assertThat(middleRows, allOf(hasSize(lessThan(numRows)), hasSize(greaterThan(0))));
}
Also used : ByteKey(org.apache.beam.sdk.io.range.ByteKey) ByteKeyRange(org.apache.beam.sdk.io.range.ByteKeyRange) ByteString(com.google.protobuf.ByteString) Row(com.google.bigtable.v2.Row) Test(org.junit.Test)

Aggregations

ByteKeyRange (org.apache.beam.sdk.io.range.ByteKeyRange)22 Test (org.junit.Test)17 ByteKey (org.apache.beam.sdk.io.range.ByteKey)10 ByteString (com.google.protobuf.ByteString)8 BigtableSource (org.apache.beam.sdk.io.gcp.bigtable.BigtableIO.BigtableSource)5 ArrayList (java.util.ArrayList)4 Row (com.google.bigtable.v2.Row)3 HRegionLocation (org.apache.hadoop.hbase.HRegionLocation)2 Connection (org.apache.hadoop.hbase.client.Connection)2 RowFilter (com.google.bigtable.v2.RowFilter)1 DisplayData (org.apache.beam.sdk.transforms.display.DisplayData)1 TableName (org.apache.hadoop.hbase.TableName)1 Result (org.apache.hadoop.hbase.client.Result)1 ResultScanner (org.apache.hadoop.hbase.client.ResultScanner)1 Table (org.apache.hadoop.hbase.client.Table)1