Search in sources :

Example 1 with BigtableSource

use of org.apache.beam.sdk.io.gcp.bigtable.BigtableIO.BigtableSource in project beam by apache.

the class BigtableIOTest method testGetSplitPointsConsumed.

@Test
public void testGetSplitPointsConsumed() throws Exception {
    final String table = "TEST-TABLE";
    final int numRows = 100;
    int splitPointsConsumed = 0;
    makeTableData(table, numRows);
    BigtableSource source = new BigtableSource(config.withTableId(StaticValueProvider.of(table)), BigtableReadOptions.builder().setKeyRanges(ALL_KEY_RANGE).build(), null);
    BoundedReader<Row> reader = source.createReader(TestPipeline.testingPipelineOptions());
    reader.start();
    // Started, 0 split points consumed
    assertEquals("splitPointsConsumed starting", splitPointsConsumed, reader.getSplitPointsConsumed());
    // Split points consumed increases for each row read
    while (reader.advance()) {
        assertEquals("splitPointsConsumed advancing", ++splitPointsConsumed, reader.getSplitPointsConsumed());
    }
    // Reader marked as done, 100 split points consumed
    assertEquals("splitPointsConsumed done", numRows, reader.getSplitPointsConsumed());
    reader.close();
}
Also used : ByteString(com.google.protobuf.ByteString) Row(com.google.bigtable.v2.Row) BigtableSource(org.apache.beam.sdk.io.gcp.bigtable.BigtableIO.BigtableSource) Test(org.junit.Test)

Example 2 with BigtableSource

use of org.apache.beam.sdk.io.gcp.bigtable.BigtableIO.BigtableSource in project beam by apache.

the class BigtableIOTest method testReadingWithSubSplits.

/**
 * Tests reading all rows from a sub-split table.
 */
@Test
public void testReadingWithSubSplits() throws Exception {
    final String table = "TEST-MANY-ROWS-SPLITS-TABLE";
    final int numRows = 1000;
    final int numSamples = 10;
    final int numSplits = 20;
    final long bytesPerRow = 100L;
    // Set up test table data and sample row keys for size estimation and splitting.
    makeTableData(table, numRows);
    service.setupSampleRowKeys(table, numSamples, bytesPerRow);
    // Generate source and split it.
    BigtableSource source = new BigtableSource(config.withTableId(StaticValueProvider.of(table)), BigtableReadOptions.builder().setKeyRanges(ALL_KEY_RANGE).build(), null);
    List<BigtableSource> splits = source.split(numRows * bytesPerRow / numSplits, null);
    // Test num splits and split equality.
    assertThat(splits, hasSize(numSplits));
    assertSourcesEqualReferenceSource(source, splits, null);
}
Also used : ByteString(com.google.protobuf.ByteString) BigtableSource(org.apache.beam.sdk.io.gcp.bigtable.BigtableIO.BigtableSource) Test(org.junit.Test)

Example 3 with BigtableSource

use of org.apache.beam.sdk.io.gcp.bigtable.BigtableIO.BigtableSource in project beam by apache.

the class BigtableIOTest method testReadingWithSplits.

/**
 * Tests reading all rows from a split table.
 */
@Test
public void testReadingWithSplits() throws Exception {
    final String table = "TEST-MANY-ROWS-SPLITS-TABLE";
    final int numRows = 1500;
    final int numSamples = 10;
    final long bytesPerRow = 100L;
    // Set up test table data and sample row keys for size estimation and splitting.
    makeTableData(table, numRows);
    service.setupSampleRowKeys(table, numSamples, bytesPerRow);
    // Generate source and split it.
    BigtableSource source = new BigtableSource(config.withTableId(StaticValueProvider.of(table)), BigtableReadOptions.builder().setKeyRanges(ALL_KEY_RANGE).build(), null);
    List<BigtableSource> splits = source.split(numRows * bytesPerRow / numSamples, null);
    // Test num splits and split equality.
    assertThat(splits, hasSize(numSamples));
    assertSourcesEqualReferenceSource(source, splits, null);
}
Also used : ByteString(com.google.protobuf.ByteString) BigtableSource(org.apache.beam.sdk.io.gcp.bigtable.BigtableIO.BigtableSource) Test(org.junit.Test)

Example 4 with BigtableSource

use of org.apache.beam.sdk.io.gcp.bigtable.BigtableIO.BigtableSource in project beam by apache.

the class BigtableIOTest method testReadingWithFilterAndSubSplits.

/**
 * Tests reading all rows from a sub-split table.
 */
@Test
public void testReadingWithFilterAndSubSplits() throws Exception {
    final String table = "TEST-FILTER-SUB-SPLITS";
    final int numRows = 1700;
    final int numSamples = 10;
    final int numSplits = 20;
    final long bytesPerRow = 100L;
    // Set up test table data and sample row keys for size estimation and splitting.
    makeTableData(table, numRows);
    service.setupSampleRowKeys(table, numSamples, bytesPerRow);
    // Generate source and split it.
    RowFilter filter = RowFilter.newBuilder().setRowKeyRegexFilter(ByteString.copyFromUtf8(".*17.*")).build();
    BigtableSource source = new BigtableSource(config.withTableId(StaticValueProvider.of(table)), BigtableReadOptions.builder().setRowFilter(StaticValueProvider.of(filter)).setKeyRanges(ALL_KEY_RANGE).build(), null);
    List<BigtableSource> splits = source.split(numRows * bytesPerRow / numSplits, null);
    // Test num splits and split equality.
    assertThat(splits, hasSize(numSplits));
    assertSourcesEqualReferenceSource(source, splits, null);
}
Also used : RowFilter(com.google.bigtable.v2.RowFilter) ByteString(com.google.protobuf.ByteString) BigtableSource(org.apache.beam.sdk.io.gcp.bigtable.BigtableIO.BigtableSource) Test(org.junit.Test)

Example 5 with BigtableSource

use of org.apache.beam.sdk.io.gcp.bigtable.BigtableIO.BigtableSource in project beam by apache.

the class BigtableIOTest method testReadingWithSubSplitsWithSeveralKeyRanges.

/**
 * Tests reading all rows from a sub-split table with several key ranges.
 */
@Test
public void testReadingWithSubSplitsWithSeveralKeyRanges() throws Exception {
    final String table = "TEST-MANY-ROWS-SPLITS-TABLE-MULTIPLE-RANGES";
    final int numRows = 1000;
    final int numSamples = 10;
    final int numSplits = 20;
    // We expect 24 splits instead of 20 due to the multiple ranges. For a key of 330 separating
    // the multiple ranges, first the [300, 330) range is subsplit into two (since numSplits is
    // twice numSamples), so we get [300, 315) and [315, 330). Then, the [330, 400) range is also
    // split into two, resulting in [330, 365) and [365, 400). These ranges would instead be
    // [300, 350) and [350, 400) if this source was one range. Thus, each extra range adds two
    // resulting splits.
    final int expectedNumSplits = 24;
    final long bytesPerRow = 100L;
    // Set up test table data and sample row keys for size estimation and splitting.
    makeTableData(table, numRows);
    service.setupSampleRowKeys(table, numSamples, bytesPerRow);
    ByteKey splitKey1 = ByteKey.copyFrom("key000000330".getBytes(StandardCharsets.UTF_8));
    ByteKey splitKey2 = ByteKey.copyFrom("key000000730".getBytes(StandardCharsets.UTF_8));
    ByteKeyRange tableRange = service.getTableRange(table);
    List<ByteKeyRange> keyRanges = Arrays.asList(tableRange.withEndKey(splitKey1), tableRange.withStartKey(splitKey1).withEndKey(splitKey2), tableRange.withStartKey(splitKey2));
    // Generate source and split it.
    BigtableSource source = new BigtableSource(config.withTableId(StaticValueProvider.of(table)), BigtableReadOptions.builder().setKeyRanges(StaticValueProvider.of(keyRanges)).build(), null);
    BigtableSource referenceSource = new BigtableSource(config.withTableId(StaticValueProvider.of(table)), BigtableReadOptions.builder().setKeyRanges(StaticValueProvider.of(ImmutableList.of(service.getTableRange(table)))).build(), null);
    List<BigtableSource> splits = source.split(numRows * bytesPerRow / numSplits, null);
    // Test num splits and split equality.
    assertThat(splits, hasSize(expectedNumSplits));
    assertSourcesEqualReferenceSource(referenceSource, splits, null);
}
Also used : ByteKey(org.apache.beam.sdk.io.range.ByteKey) ByteKeyRange(org.apache.beam.sdk.io.range.ByteKeyRange) ByteString(com.google.protobuf.ByteString) BigtableSource(org.apache.beam.sdk.io.gcp.bigtable.BigtableIO.BigtableSource) Test(org.junit.Test)

Aggregations

ByteString (com.google.protobuf.ByteString)11 BigtableSource (org.apache.beam.sdk.io.gcp.bigtable.BigtableIO.BigtableSource)11 Test (org.junit.Test)11 ByteKeyRange (org.apache.beam.sdk.io.range.ByteKeyRange)5 ArrayList (java.util.ArrayList)3 ByteKey (org.apache.beam.sdk.io.range.ByteKey)2 Row (com.google.bigtable.v2.Row)1 RowFilter (com.google.bigtable.v2.RowFilter)1