use of org.apache.beam.sdk.io.gcp.bigtable.BigtableIO.BigtableSource in project beam by apache.
the class BigtableIOTest method testGetSplitPointsConsumed.
@Test
public void testGetSplitPointsConsumed() throws Exception {
final String table = "TEST-TABLE";
final int numRows = 100;
int splitPointsConsumed = 0;
makeTableData(table, numRows);
BigtableSource source = new BigtableSource(config.withTableId(StaticValueProvider.of(table)), BigtableReadOptions.builder().setKeyRanges(ALL_KEY_RANGE).build(), null);
BoundedReader<Row> reader = source.createReader(TestPipeline.testingPipelineOptions());
reader.start();
// Started, 0 split points consumed
assertEquals("splitPointsConsumed starting", splitPointsConsumed, reader.getSplitPointsConsumed());
// Split points consumed increases for each row read
while (reader.advance()) {
assertEquals("splitPointsConsumed advancing", ++splitPointsConsumed, reader.getSplitPointsConsumed());
}
// Reader marked as done, 100 split points consumed
assertEquals("splitPointsConsumed done", numRows, reader.getSplitPointsConsumed());
reader.close();
}
use of org.apache.beam.sdk.io.gcp.bigtable.BigtableIO.BigtableSource in project beam by apache.
the class BigtableIOTest method testReadingWithSubSplits.
/**
* Tests reading all rows from a sub-split table.
*/
@Test
public void testReadingWithSubSplits() throws Exception {
final String table = "TEST-MANY-ROWS-SPLITS-TABLE";
final int numRows = 1000;
final int numSamples = 10;
final int numSplits = 20;
final long bytesPerRow = 100L;
// Set up test table data and sample row keys for size estimation and splitting.
makeTableData(table, numRows);
service.setupSampleRowKeys(table, numSamples, bytesPerRow);
// Generate source and split it.
BigtableSource source = new BigtableSource(config.withTableId(StaticValueProvider.of(table)), BigtableReadOptions.builder().setKeyRanges(ALL_KEY_RANGE).build(), null);
List<BigtableSource> splits = source.split(numRows * bytesPerRow / numSplits, null);
// Test num splits and split equality.
assertThat(splits, hasSize(numSplits));
assertSourcesEqualReferenceSource(source, splits, null);
}
use of org.apache.beam.sdk.io.gcp.bigtable.BigtableIO.BigtableSource in project beam by apache.
the class BigtableIOTest method testReadingWithSplits.
/**
* Tests reading all rows from a split table.
*/
@Test
public void testReadingWithSplits() throws Exception {
final String table = "TEST-MANY-ROWS-SPLITS-TABLE";
final int numRows = 1500;
final int numSamples = 10;
final long bytesPerRow = 100L;
// Set up test table data and sample row keys for size estimation and splitting.
makeTableData(table, numRows);
service.setupSampleRowKeys(table, numSamples, bytesPerRow);
// Generate source and split it.
BigtableSource source = new BigtableSource(config.withTableId(StaticValueProvider.of(table)), BigtableReadOptions.builder().setKeyRanges(ALL_KEY_RANGE).build(), null);
List<BigtableSource> splits = source.split(numRows * bytesPerRow / numSamples, null);
// Test num splits and split equality.
assertThat(splits, hasSize(numSamples));
assertSourcesEqualReferenceSource(source, splits, null);
}
use of org.apache.beam.sdk.io.gcp.bigtable.BigtableIO.BigtableSource in project beam by apache.
the class BigtableIOTest method testReadingWithFilterAndSubSplits.
/**
* Tests reading all rows from a sub-split table.
*/
@Test
public void testReadingWithFilterAndSubSplits() throws Exception {
final String table = "TEST-FILTER-SUB-SPLITS";
final int numRows = 1700;
final int numSamples = 10;
final int numSplits = 20;
final long bytesPerRow = 100L;
// Set up test table data and sample row keys for size estimation and splitting.
makeTableData(table, numRows);
service.setupSampleRowKeys(table, numSamples, bytesPerRow);
// Generate source and split it.
RowFilter filter = RowFilter.newBuilder().setRowKeyRegexFilter(ByteString.copyFromUtf8(".*17.*")).build();
BigtableSource source = new BigtableSource(config.withTableId(StaticValueProvider.of(table)), BigtableReadOptions.builder().setRowFilter(StaticValueProvider.of(filter)).setKeyRanges(ALL_KEY_RANGE).build(), null);
List<BigtableSource> splits = source.split(numRows * bytesPerRow / numSplits, null);
// Test num splits and split equality.
assertThat(splits, hasSize(numSplits));
assertSourcesEqualReferenceSource(source, splits, null);
}
use of org.apache.beam.sdk.io.gcp.bigtable.BigtableIO.BigtableSource in project beam by apache.
the class BigtableIOTest method testReadingWithSubSplitsWithSeveralKeyRanges.
/**
* Tests reading all rows from a sub-split table with several key ranges.
*/
@Test
public void testReadingWithSubSplitsWithSeveralKeyRanges() throws Exception {
final String table = "TEST-MANY-ROWS-SPLITS-TABLE-MULTIPLE-RANGES";
final int numRows = 1000;
final int numSamples = 10;
final int numSplits = 20;
// We expect 24 splits instead of 20 due to the multiple ranges. For a key of 330 separating
// the multiple ranges, first the [300, 330) range is subsplit into two (since numSplits is
// twice numSamples), so we get [300, 315) and [315, 330). Then, the [330, 400) range is also
// split into two, resulting in [330, 365) and [365, 400). These ranges would instead be
// [300, 350) and [350, 400) if this source was one range. Thus, each extra range adds two
// resulting splits.
final int expectedNumSplits = 24;
final long bytesPerRow = 100L;
// Set up test table data and sample row keys for size estimation and splitting.
makeTableData(table, numRows);
service.setupSampleRowKeys(table, numSamples, bytesPerRow);
ByteKey splitKey1 = ByteKey.copyFrom("key000000330".getBytes(StandardCharsets.UTF_8));
ByteKey splitKey2 = ByteKey.copyFrom("key000000730".getBytes(StandardCharsets.UTF_8));
ByteKeyRange tableRange = service.getTableRange(table);
List<ByteKeyRange> keyRanges = Arrays.asList(tableRange.withEndKey(splitKey1), tableRange.withStartKey(splitKey1).withEndKey(splitKey2), tableRange.withStartKey(splitKey2));
// Generate source and split it.
BigtableSource source = new BigtableSource(config.withTableId(StaticValueProvider.of(table)), BigtableReadOptions.builder().setKeyRanges(StaticValueProvider.of(keyRanges)).build(), null);
BigtableSource referenceSource = new BigtableSource(config.withTableId(StaticValueProvider.of(table)), BigtableReadOptions.builder().setKeyRanges(StaticValueProvider.of(ImmutableList.of(service.getTableRange(table)))).build(), null);
List<BigtableSource> splits = source.split(numRows * bytesPerRow / numSplits, null);
// Test num splits and split equality.
assertThat(splits, hasSize(expectedNumSplits));
assertSourcesEqualReferenceSource(referenceSource, splits, null);
}
Aggregations