use of org.apache.beam.sdk.io.range.ByteKeyRange in project beam by apache.
the class BigtableIOTest method testReadingWithSubSplitsWithSeveralKeyRanges.
/**
* Tests reading all rows from a sub-split table with several key ranges.
*/
@Test
public void testReadingWithSubSplitsWithSeveralKeyRanges() throws Exception {
final String table = "TEST-MANY-ROWS-SPLITS-TABLE-MULTIPLE-RANGES";
final int numRows = 1000;
final int numSamples = 10;
final int numSplits = 20;
// We expect 24 splits instead of 20 due to the multiple ranges. For a key of 330 separating
// the multiple ranges, first the [300, 330) range is subsplit into two (since numSplits is
// twice numSamples), so we get [300, 315) and [315, 330). Then, the [330, 400) range is also
// split into two, resulting in [330, 365) and [365, 400). These ranges would instead be
// [300, 350) and [350, 400) if this source was one range. Thus, each extra range adds two
// resulting splits.
final int expectedNumSplits = 24;
final long bytesPerRow = 100L;
// Set up test table data and sample row keys for size estimation and splitting.
makeTableData(table, numRows);
service.setupSampleRowKeys(table, numSamples, bytesPerRow);
ByteKey splitKey1 = ByteKey.copyFrom("key000000330".getBytes(StandardCharsets.UTF_8));
ByteKey splitKey2 = ByteKey.copyFrom("key000000730".getBytes(StandardCharsets.UTF_8));
ByteKeyRange tableRange = service.getTableRange(table);
List<ByteKeyRange> keyRanges = Arrays.asList(tableRange.withEndKey(splitKey1), tableRange.withStartKey(splitKey1).withEndKey(splitKey2), tableRange.withStartKey(splitKey2));
// Generate source and split it.
BigtableSource source = new BigtableSource(config.withTableId(StaticValueProvider.of(table)), BigtableReadOptions.builder().setKeyRanges(StaticValueProvider.of(keyRanges)).build(), null);
BigtableSource referenceSource = new BigtableSource(config.withTableId(StaticValueProvider.of(table)), BigtableReadOptions.builder().setKeyRanges(StaticValueProvider.of(ImmutableList.of(service.getTableRange(table)))).build(), null);
List<BigtableSource> splits = source.split(numRows * bytesPerRow / numSplits, null);
// Test num splits and split equality.
assertThat(splits, hasSize(expectedNumSplits));
assertSourcesEqualReferenceSource(referenceSource, splits, null);
}
use of org.apache.beam.sdk.io.range.ByteKeyRange in project beam by apache.
the class HBaseIOTest method testReadingWithKeyRange.
/**
* Tests reading all rows using key ranges. Tests a prefix [), a suffix (], and a restricted
* range [] and that some properties hold across them.
*/
@Test
public void testReadingWithKeyRange() throws Exception {
final String table = "TEST-KEY-RANGE-TABLE";
final int numRows = 1001;
final byte[] startRow = "2".getBytes();
final byte[] stopRow = "9".getBytes();
final ByteKey startKey = ByteKey.copyFrom(startRow);
createTable(table);
writeData(table, numRows);
// Test prefix: [beginning, startKey).
final ByteKeyRange prefixRange = ByteKeyRange.ALL_KEYS.withEndKey(startKey);
runReadTestLength(HBaseIO.read().withConfiguration(conf).withTableId(table).withKeyRange(prefixRange), 126);
// Test suffix: [startKey, end).
final ByteKeyRange suffixRange = ByteKeyRange.ALL_KEYS.withStartKey(startKey);
runReadTestLength(HBaseIO.read().withConfiguration(conf).withTableId(table).withKeyRange(suffixRange), 875);
// Test restricted range: [startKey, endKey).
// This one tests the second signature of .withKeyRange
runReadTestLength(HBaseIO.read().withConfiguration(conf).withTableId(table).withKeyRange(startRow, stopRow), 441);
}
use of org.apache.beam.sdk.io.range.ByteKeyRange in project beam by apache.
the class BigtableIOTest method testReadingDisplayData.
@Test
public void testReadingDisplayData() {
RowFilter rowFilter = RowFilter.newBuilder().setRowKeyRegexFilter(ByteString.copyFromUtf8("foo.*")).build();
ByteKeyRange keyRange = ByteKeyRange.ALL_KEYS.withEndKey(ByteKey.of(0xab, 0xcd));
BigtableIO.Read read = BigtableIO.read().withBigtableOptions(BIGTABLE_OPTIONS).withTableId("fooTable").withRowFilter(rowFilter).withKeyRange(keyRange);
DisplayData displayData = DisplayData.from(read);
assertThat(displayData, hasDisplayItem(allOf(hasKey("tableId"), hasLabel("Bigtable Table Id"), hasValue("fooTable"))));
assertThat(displayData, hasDisplayItem("rowFilter", rowFilter.toString()));
assertThat(displayData, hasDisplayItem("keyRanges", "[ByteKeyRange{startKey=[], endKey=[abcd]}]"));
// BigtableIO adds user-agent to options; assert only on key and not value.
assertThat(displayData, hasDisplayItem("bigtableOptions"));
}
use of org.apache.beam.sdk.io.range.ByteKeyRange in project beam by apache.
the class BigtableIOTest method testReduceSplitsWithAllNonAdjacentRange.
/**
* Tests reduce split with all non adjacent ranges.
*/
@Test
public void testReduceSplitsWithAllNonAdjacentRange() throws Exception {
final String table = "TEST-MANY-ROWS-SPLITS-TABLE";
final int numRows = 10;
final int numSamples = 10;
final long bytesPerRow = 100L;
final int maxSplit = 3;
// Set up test table data and sample row keys for size estimation and splitting.
makeTableData(table, numRows);
service.setupSampleRowKeys(table, numSamples, bytesPerRow);
// Construct non contiguous key ranges [..1][2..3][4..5][6..7][8..9]
List<ByteKeyRange> keyRanges = Arrays.asList(ByteKeyRange.of(ByteKey.EMPTY, createByteKey(1)), ByteKeyRange.of(createByteKey(2), createByteKey(3)), ByteKeyRange.of(createByteKey(4), createByteKey(5)), ByteKeyRange.of(createByteKey(6), createByteKey(7)), ByteKeyRange.of(createByteKey(8), createByteKey(9)));
// Generate source and split it.
BigtableSource source = new BigtableSource(config.withTableId(StaticValueProvider.of(table)), BigtableReadOptions.builder().setKeyRanges(StaticValueProvider.of(keyRanges)).build(), null);
List<BigtableSource> splits = new ArrayList<>();
for (ByteKeyRange range : keyRanges) {
splits.add(source.withSingleRange(range));
}
List<BigtableSource> reducedSplits = source.reduceSplits(splits, null, maxSplit);
List<ByteKeyRange> actualRangesAfterSplit = new ArrayList<>();
for (BigtableSource splitSource : reducedSplits) {
actualRangesAfterSplit.addAll(splitSource.getRanges());
}
assertAllSourcesHaveSingleRanges(reducedSplits);
// The expected split source ranges are exactly same as original
assertThat(actualRangesAfterSplit, IsIterableContainingInAnyOrder.containsInAnyOrder(keyRanges.toArray()));
}
use of org.apache.beam.sdk.io.range.ByteKeyRange in project beam by apache.
the class BigtableIOTest method testReadingWithRuntimeParameterizedKeyRange.
/**
* Tests reading key ranges specified through a ValueProvider.
*/
@Test
public void testReadingWithRuntimeParameterizedKeyRange() throws Exception {
final String table = "TEST-KEY-RANGE-TABLE";
final int numRows = 1001;
List<Row> testRows = makeTableData(table, numRows);
ByteKey startKey = ByteKey.copyFrom("key000000100".getBytes(StandardCharsets.UTF_8));
ByteKey endKey = ByteKey.copyFrom("key000000300".getBytes(StandardCharsets.UTF_8));
service.setupSampleRowKeys(table, numRows / 10, "key000000100".length());
final ByteKeyRange middleRange = ByteKeyRange.of(startKey, endKey);
List<Row> middleRows = filterToRange(testRows, middleRange);
runReadTest(defaultRead.withTableId(table).withKeyRanges(StaticValueProvider.of(Collections.singletonList(middleRange))), middleRows);
assertThat(middleRows, allOf(hasSize(lessThan(numRows)), hasSize(greaterThan(0))));
}
Aggregations