use of org.apache.beam.sdk.io.range.ByteKeyRange in project beam by apache.
the class BigtableIOTest method tesReduceSplitsWithAdjacentRanges.
/**
* Tests reduce Splits with all adjacent ranges.
*/
@Test
public void tesReduceSplitsWithAdjacentRanges() throws Exception {
final String table = "TEST-MANY-ROWS-SPLITS-TABLE";
final int numRows = 10;
final int numSamples = 10;
final long bytesPerRow = 100L;
final int maxSplit = 3;
// Set up test table data and sample row keys for size estimation and splitting.
makeTableData(table, numRows);
service.setupSampleRowKeys(table, numSamples, bytesPerRow);
// Generate source and split it.
BigtableSource source = new BigtableSource(config.withTableId(StaticValueProvider.of(table)), BigtableReadOptions.builder().setKeyRanges(ALL_KEY_RANGE).build(), null);
List<BigtableSource> splits = new ArrayList<>();
List<ByteKeyRange> keyRanges = Arrays.asList(ByteKeyRange.of(ByteKey.EMPTY, createByteKey(1)), ByteKeyRange.of(createByteKey(1), createByteKey(2)), ByteKeyRange.of(createByteKey(2), createByteKey(3)), ByteKeyRange.of(createByteKey(3), createByteKey(4)), ByteKeyRange.of(createByteKey(4), createByteKey(5)), ByteKeyRange.of(createByteKey(5), createByteKey(6)), ByteKeyRange.of(createByteKey(6), createByteKey(7)), ByteKeyRange.of(createByteKey(7), createByteKey(8)), ByteKeyRange.of(createByteKey(8), createByteKey(9)), ByteKeyRange.of(createByteKey(9), ByteKey.EMPTY));
for (ByteKeyRange range : keyRanges) {
splits.add(source.withSingleRange(range));
}
// Splits Source have ranges [..1][1..2][2..3][3..4][4..5][5..6][6..7][7..8][8..9][9..]
// expected reduced Split source ranges are [..4][4..8][8..]
List<ByteKeyRange> expectedKeyRangesAfterReducedSplits = Arrays.asList(ByteKeyRange.of(ByteKey.EMPTY, createByteKey(4)), ByteKeyRange.of(createByteKey(4), createByteKey(8)), ByteKeyRange.of(createByteKey(8), ByteKey.EMPTY));
List<BigtableSource> reducedSplits = source.reduceSplits(splits, null, maxSplit);
List<ByteKeyRange> actualRangesAfterSplit = new ArrayList<>();
for (BigtableSource splitSource : reducedSplits) {
actualRangesAfterSplit.addAll(splitSource.getRanges());
}
assertThat(actualRangesAfterSplit, IsIterableContainingInAnyOrder.containsInAnyOrder(expectedKeyRangesAfterReducedSplits.toArray()));
assertAllSourcesHaveSingleAdjacentRanges(reducedSplits);
assertSourcesEqualReferenceSource(source, reducedSplits, null);
}
use of org.apache.beam.sdk.io.range.ByteKeyRange in project beam by apache.
the class BigtableIOTest method testReadingWithSplitsWithSeveralKeyRanges.
/**
* Tests reading all rows from a split table with several key ranges.
*/
@Test
public void testReadingWithSplitsWithSeveralKeyRanges() throws Exception {
final String table = "TEST-MANY-ROWS-SPLITS-TABLE-MULTIPLE-RANGES";
final int numRows = 1500;
final int numSamples = 10;
// Two more splits are generated because of the split keys at 500 and 1000.
// E.g. the split [450, 600) becomes [450, 500) and [500, 600).
final int numSplits = 12;
final long bytesPerRow = 100L;
// Set up test table data and sample row keys for size estimation and splitting.
makeTableData(table, numRows);
service.setupSampleRowKeys(table, numSamples, bytesPerRow);
ByteKey splitKey1 = ByteKey.copyFrom("key000000500".getBytes(StandardCharsets.UTF_8));
ByteKey splitKey2 = ByteKey.copyFrom("key000001000".getBytes(StandardCharsets.UTF_8));
ByteKeyRange tableRange = service.getTableRange(table);
List<ByteKeyRange> keyRanges = Arrays.asList(tableRange.withEndKey(splitKey1), tableRange.withStartKey(splitKey1).withEndKey(splitKey2), tableRange.withStartKey(splitKey2));
// Generate source and split it.
BigtableSource source = new BigtableSource(config.withTableId(StaticValueProvider.of(table)), BigtableReadOptions.builder().setKeyRanges(StaticValueProvider.of(keyRanges)).build(), null);
BigtableSource referenceSource = new BigtableSource(config.withTableId(StaticValueProvider.of(table)), BigtableReadOptions.builder().setKeyRanges(StaticValueProvider.of(Collections.singletonList(service.getTableRange(table)))).build(), null);
// 10,000
List<BigtableSource> splits = source.split(numRows * bytesPerRow / numSamples, null);
// Test num splits and split equality.
assertThat(splits, hasSize(numSplits));
assertSourcesEqualReferenceSource(referenceSource, splits, null);
}
use of org.apache.beam.sdk.io.range.ByteKeyRange in project beam by apache.
the class BigtableIOTest method testReadingWithKeyRanges.
/**
* Tests reading three key ranges with one read.
*/
@Test
public void testReadingWithKeyRanges() throws Exception {
final String table = "TEST-KEY-RANGE-TABLE";
final int numRows = 11;
List<Row> testRows = makeTableData(table, numRows);
ByteKey startKey1 = ByteKey.copyFrom("key000000001".getBytes(StandardCharsets.UTF_8));
ByteKey endKey1 = ByteKey.copyFrom("key000000003".getBytes(StandardCharsets.UTF_8));
ByteKey startKey2 = ByteKey.copyFrom("key000000004".getBytes(StandardCharsets.UTF_8));
ByteKey endKey2 = ByteKey.copyFrom("key000000007".getBytes(StandardCharsets.UTF_8));
ByteKey startKey3 = ByteKey.copyFrom("key000000008".getBytes(StandardCharsets.UTF_8));
ByteKey endKey3 = ByteKey.copyFrom("key000000009".getBytes(StandardCharsets.UTF_8));
service.setupSampleRowKeys(table, numRows / 10, "key000000001".length());
final ByteKeyRange range1 = ByteKeyRange.of(startKey1, endKey1);
final ByteKeyRange range2 = ByteKeyRange.of(startKey2, endKey2);
final ByteKeyRange range3 = ByteKeyRange.of(startKey3, endKey3);
List<ByteKeyRange> ranges = ImmutableList.of(range1, range2, range3);
List<Row> rangeRows = filterToRanges(testRows, ranges);
runReadTest(defaultRead.withTableId(table).withKeyRanges(ranges), rangeRows);
// range rows should be non-trivial (non-zero,non-all).
assertThat(rangeRows, allOf(hasSize(lessThan(numRows)), hasSize(greaterThan(0))));
}
use of org.apache.beam.sdk.io.range.ByteKeyRange in project beam by apache.
the class BigtableIOTest method testReduceSplitsWithSomeNonAdjacentRanges.
/**
* Tests reduce splits with few non adjacent ranges.
*/
@Test
public void testReduceSplitsWithSomeNonAdjacentRanges() throws Exception {
final String table = "TEST-MANY-ROWS-SPLITS-TABLE";
final int numRows = 10;
final int numSamples = 10;
final long bytesPerRow = 100L;
final int maxSplit = 3;
// Set up test table data and sample row keys for size estimation and splitting.
makeTableData(table, numRows);
service.setupSampleRowKeys(table, numSamples, bytesPerRow);
// Construct few non contiguous key ranges [..1][1..2][3..4][4..5][6..7][8..9]
List<ByteKeyRange> keyRanges = Arrays.asList(ByteKeyRange.of(ByteKey.EMPTY, createByteKey(1)), ByteKeyRange.of(createByteKey(1), createByteKey(2)), ByteKeyRange.of(createByteKey(3), createByteKey(4)), ByteKeyRange.of(createByteKey(4), createByteKey(5)), ByteKeyRange.of(createByteKey(6), createByteKey(7)), ByteKeyRange.of(createByteKey(8), createByteKey(9)));
// Expected ranges after split and reduction by maxSplitCount is [..2][3..5][6..7][8..9]
List<ByteKeyRange> expectedKeyRangesAfterReducedSplits = Arrays.asList(ByteKeyRange.of(ByteKey.EMPTY, createByteKey(2)), ByteKeyRange.of(createByteKey(3), createByteKey(5)), ByteKeyRange.of(createByteKey(6), createByteKey(7)), ByteKeyRange.of(createByteKey(8), createByteKey(9)));
// Generate source and split it.
BigtableSource source = new BigtableSource(config.withTableId(StaticValueProvider.of(table)), BigtableReadOptions.builder().setKeyRanges(StaticValueProvider.of(keyRanges)).build(), null);
List<BigtableSource> splits = new ArrayList<>();
for (ByteKeyRange range : keyRanges) {
splits.add(source.withSingleRange(range));
}
List<BigtableSource> reducedSplits = source.reduceSplits(splits, null, maxSplit);
List<ByteKeyRange> actualRangesAfterSplit = new ArrayList<>();
for (BigtableSource splitSource : reducedSplits) {
actualRangesAfterSplit.addAll(splitSource.getRanges());
}
assertAllSourcesHaveSingleRanges(reducedSplits);
assertThat(actualRangesAfterSplit, IsIterableContainingInAnyOrder.containsInAnyOrder(expectedKeyRangesAfterReducedSplits.toArray()));
}
use of org.apache.beam.sdk.io.range.ByteKeyRange in project beam by apache.
the class HBaseReadSplittableDoFn method splitRestriction.
@SplitRestriction
public void splitRestriction(@Element Read read, @Restriction ByteKeyRange range, OutputReceiver<ByteKeyRange> receiver) throws Exception {
Connection connection = ConnectionFactory.createConnection(read.getConfiguration());
List<HRegionLocation> regionLocations = HBaseUtils.getRegionLocations(connection, read.getTableId(), range);
List<ByteKeyRange> splitRanges = HBaseUtils.getRanges(regionLocations, read.getTableId(), range);
for (ByteKeyRange splitRange : splitRanges) {
receiver.output(ByteKeyRange.of(splitRange.getStartKey(), splitRange.getEndKey()));
}
}
Aggregations