Search in sources :

Example 1 with RowFilter

use of com.google.bigtable.v2.RowFilter in project beam by apache.

the class BigtableIOTest method testReadingPrimitiveDisplayData.

@Test
public void testReadingPrimitiveDisplayData() throws IOException, InterruptedException {
    final String table = "fooTable";
    service.createTable(table);
    RowFilter rowFilter = RowFilter.newBuilder().setRowKeyRegexFilter(ByteString.copyFromUtf8("foo.*")).build();
    DisplayDataEvaluator evaluator = DisplayDataEvaluator.create();
    BigtableIO.Read read = BigtableIO.read().withBigtableOptions(BIGTABLE_OPTIONS).withTableId(table).withRowFilter(rowFilter).withBigtableService(service);
    Set<DisplayData> displayData = evaluator.displayDataForPrimitiveSourceTransforms(read);
    assertThat("BigtableIO.Read should include the table id in its primitive display data", displayData, Matchers.hasItem(hasDisplayItem("tableId")));
    assertThat("BigtableIO.Read should include the row filter, if it exists, in its primitive " + "display data", displayData, Matchers.hasItem(hasDisplayItem("rowFilter")));
}
Also used : RowFilter(com.google.bigtable.v2.RowFilter) ByteString(com.google.protobuf.ByteString) DisplayDataEvaluator(org.apache.beam.sdk.transforms.display.DisplayDataEvaluator) DisplayData(org.apache.beam.sdk.transforms.display.DisplayData) Test(org.junit.Test)

Example 2 with RowFilter

use of com.google.bigtable.v2.RowFilter in project YCSB by brianfrankcooper.

the class GoogleBigtableClient method read.

@Override
public Status read(String table, String key, Set<String> fields, HashMap<String, ByteIterator> result) {
    if (debug) {
        System.out.println("Doing read from Bigtable columnfamily " + new String(columnFamilyBytes));
        System.out.println("Doing read for key: " + key);
    }
    setTable(table);
    RowFilter filter = RowFilter.newBuilder().setFamilyNameRegexFilterBytes(ByteStringer.wrap(columnFamilyBytes)).build();
    if (fields != null && fields.size() > 0) {
        Builder filterChain = RowFilter.Chain.newBuilder();
        filterChain.addFilters(filter);
        filterChain.addFilters(RowFilter.newBuilder().setCellsPerColumnLimitFilter(1).build());
        int count = 0;
        // usually "field#" so pre-alloc
        final StringBuilder regex = new StringBuilder(fields.size() * 6);
        for (final String field : fields) {
            if (count++ > 0) {
                regex.append("|");
            }
            regex.append(field);
        }
        filterChain.addFilters(RowFilter.newBuilder().setColumnQualifierRegexFilter(ByteStringer.wrap(regex.toString().getBytes()))).build();
        filter = RowFilter.newBuilder().setChain(filterChain.build()).build();
    }
    final ReadRowsRequest.Builder rrr = ReadRowsRequest.newBuilder().setTableNameBytes(ByteStringer.wrap(lastTableBytes)).setFilter(filter).setRowKey(ByteStringer.wrap(key.getBytes()));
    List<Row> rows;
    try {
        rows = client.readRowsAsync(rrr.build()).get();
        if (rows == null || rows.isEmpty()) {
            return Status.NOT_FOUND;
        }
        for (final Row row : rows) {
            for (final Family family : row.getFamiliesList()) {
                if (Arrays.equals(family.getNameBytes().toByteArray(), columnFamilyBytes)) {
                    for (final Column column : family.getColumnsList()) {
                        // we should only have a single cell per column
                        result.put(column.getQualifier().toString(UTF8_CHARSET), new ByteArrayByteIterator(column.getCells(0).getValue().toByteArray()));
                        if (debug) {
                            System.out.println("Result for field: " + column.getQualifier().toString(UTF8_CHARSET) + " is: " + column.getCells(0).getValue().toString(UTF8_CHARSET));
                        }
                    }
                }
            }
        }
        return Status.OK;
    } catch (InterruptedException e) {
        System.err.println("Interrupted during get: " + e);
        Thread.currentThread().interrupt();
        return Status.ERROR;
    } catch (ExecutionException e) {
        System.err.println("Exception during get: " + e);
        return Status.ERROR;
    }
}
Also used : Builder(com.google.bigtable.v1.RowFilter.Chain.Builder) ReadRowsRequest(com.google.bigtable.v1.ReadRowsRequest) ByteString(com.google.bigtable.repackaged.com.google.protobuf.ByteString) ByteArrayByteIterator(com.yahoo.ycsb.ByteArrayByteIterator) RowFilter(com.google.bigtable.v1.RowFilter) Column(com.google.bigtable.v1.Column) Family(com.google.bigtable.v1.Family) DeleteFromRow(com.google.bigtable.v1.Mutation.DeleteFromRow) Row(com.google.bigtable.v1.Row) ExecutionException(java.util.concurrent.ExecutionException)

Example 3 with RowFilter

use of com.google.bigtable.v2.RowFilter in project YCSB by brianfrankcooper.

the class GoogleBigtableClient method scan.

@Override
public Status scan(String table, String startkey, int recordcount, Set<String> fields, Vector<HashMap<String, ByteIterator>> result) {
    setTable(table);
    RowFilter filter = RowFilter.newBuilder().setFamilyNameRegexFilterBytes(ByteStringer.wrap(columnFamilyBytes)).build();
    if (fields != null && fields.size() > 0) {
        Builder filterChain = RowFilter.Chain.newBuilder();
        filterChain.addFilters(filter);
        filterChain.addFilters(RowFilter.newBuilder().setCellsPerColumnLimitFilter(1).build());
        int count = 0;
        // usually "field#" so pre-alloc
        final StringBuilder regex = new StringBuilder(fields.size() * 6);
        for (final String field : fields) {
            if (count++ > 0) {
                regex.append("|");
            }
            regex.append(field);
        }
        filterChain.addFilters(RowFilter.newBuilder().setColumnQualifierRegexFilter(ByteStringer.wrap(regex.toString().getBytes()))).build();
        filter = RowFilter.newBuilder().setChain(filterChain.build()).build();
    }
    final RowRange range = RowRange.newBuilder().setStartKey(ByteStringer.wrap(startkey.getBytes())).build();
    final ReadRowsRequest.Builder rrr = ReadRowsRequest.newBuilder().setTableNameBytes(ByteStringer.wrap(lastTableBytes)).setFilter(filter).setRowRange(range);
    List<Row> rows;
    try {
        rows = client.readRowsAsync(rrr.build()).get();
        if (rows == null || rows.isEmpty()) {
            return Status.NOT_FOUND;
        }
        int numResults = 0;
        for (final Row row : rows) {
            final HashMap<String, ByteIterator> rowResult = new HashMap<String, ByteIterator>(fields != null ? fields.size() : 10);
            for (final Family family : row.getFamiliesList()) {
                if (Arrays.equals(family.getNameBytes().toByteArray(), columnFamilyBytes)) {
                    for (final Column column : family.getColumnsList()) {
                        // we should only have a single cell per column
                        rowResult.put(column.getQualifier().toString(UTF8_CHARSET), new ByteArrayByteIterator(column.getCells(0).getValue().toByteArray()));
                        if (debug) {
                            System.out.println("Result for field: " + column.getQualifier().toString(UTF8_CHARSET) + " is: " + column.getCells(0).getValue().toString(UTF8_CHARSET));
                        }
                    }
                }
            }
            result.add(rowResult);
            numResults++;
            if (numResults >= recordcount) {
                // if hit recordcount, bail out
                break;
            }
        }
        return Status.OK;
    } catch (InterruptedException e) {
        System.err.println("Interrupted during scan: " + e);
        Thread.currentThread().interrupt();
        return Status.ERROR;
    } catch (ExecutionException e) {
        System.err.println("Exception during scan: " + e);
        return Status.ERROR;
    }
}
Also used : HashMap(java.util.HashMap) Builder(com.google.bigtable.v1.RowFilter.Chain.Builder) ReadRowsRequest(com.google.bigtable.v1.ReadRowsRequest) ByteString(com.google.bigtable.repackaged.com.google.protobuf.ByteString) ByteArrayByteIterator(com.yahoo.ycsb.ByteArrayByteIterator) RowFilter(com.google.bigtable.v1.RowFilter) ByteArrayByteIterator(com.yahoo.ycsb.ByteArrayByteIterator) ByteIterator(com.yahoo.ycsb.ByteIterator) RowRange(com.google.bigtable.v1.RowRange) Column(com.google.bigtable.v1.Column) Family(com.google.bigtable.v1.Family) DeleteFromRow(com.google.bigtable.v1.Mutation.DeleteFromRow) Row(com.google.bigtable.v1.Row) ExecutionException(java.util.concurrent.ExecutionException)

Example 4 with RowFilter

use of com.google.bigtable.v2.RowFilter in project beam by apache.

the class BigtableIOTest method testReadingWithRuntimeParameterizedFilter.

/**
 * Tests reading rows using a filter provided through ValueProvider.
 */
@Test
public void testReadingWithRuntimeParameterizedFilter() throws Exception {
    final String table = "TEST-FILTER-TABLE";
    final int numRows = 1001;
    List<Row> testRows = makeTableData(table, numRows);
    String regex = ".*17.*";
    final KeyMatchesRegex keyPredicate = new KeyMatchesRegex(regex);
    Iterable<Row> filteredRows = testRows.stream().filter(input -> {
        verifyNotNull(input, "input");
        return keyPredicate.apply(input.getKey());
    }).collect(Collectors.toList());
    RowFilter filter = RowFilter.newBuilder().setRowKeyRegexFilter(ByteString.copyFromUtf8(regex)).build();
    service.setupSampleRowKeys(table, 5, 10L);
    runReadTest(defaultRead.withTableId(table).withRowFilter(StaticValueProvider.of(filter)), Lists.newArrayList(filteredRows));
}
Also used : ExpectedLogs(org.apache.beam.sdk.testing.ExpectedLogs) Mutation(com.google.bigtable.v2.Mutation) Arrays(java.util.Arrays) PipelineExecutionException(org.apache.beam.sdk.Pipeline.PipelineExecutionException) Matchers.hasItems(org.hamcrest.Matchers.hasItems) BoundedReader(org.apache.beam.sdk.io.BoundedSource.BoundedReader) Create(org.apache.beam.sdk.transforms.Create) Map(java.util.Map) Window(org.apache.beam.sdk.transforms.windowing.Window) GlobalWindow(org.apache.beam.sdk.transforms.windowing.GlobalWindow) Wait(org.apache.beam.sdk.transforms.Wait) Predicates(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Predicates) ValueProvider(org.apache.beam.sdk.options.ValueProvider) KvCoder(org.apache.beam.sdk.coders.KvCoder) GcpOptions(org.apache.beam.sdk.extensions.gcp.options.GcpOptions) Matchers.allOf(org.hamcrest.Matchers.allOf) Family(com.google.bigtable.v2.Family) StandardCharsets(java.nio.charset.StandardCharsets) Serializable(java.io.Serializable) CompletionStage(java.util.concurrent.CompletionStage) Verify.verifyNotNull(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Verify.verifyNotNull) MutateRowResponse(com.google.bigtable.v2.MutateRowResponse) TestStream(org.apache.beam.sdk.testing.TestStream) Matchers.greaterThan(org.hamcrest.Matchers.greaterThan) SourceTestUtils.assertSplitAtFractionSucceedsAndConsistent(org.apache.beam.sdk.testing.SourceTestUtils.assertSplitAtFractionSucceedsAndConsistent) DisplayDataMatchers.hasDisplayItem(org.apache.beam.sdk.transforms.display.DisplayDataMatchers.hasDisplayItem) DisplayDataMatchers.hasLabel(org.apache.beam.sdk.transforms.display.DisplayDataMatchers.hasLabel) Preconditions.checkNotNull(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions.checkNotNull) KV(org.apache.beam.sdk.values.KV) TypeDescriptor(org.apache.beam.sdk.values.TypeDescriptor) Duration(org.joda.time.Duration) RunWith(org.junit.runner.RunWith) ArrayList(java.util.ArrayList) DisplayDataMatchers.hasValue(org.apache.beam.sdk.transforms.display.DisplayDataMatchers.hasValue) TestPipeline(org.apache.beam.sdk.testing.TestPipeline) Preconditions.checkArgument(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions.checkArgument) Matchers.lessThan(org.hamcrest.Matchers.lessThan) SetCell(com.google.bigtable.v2.Mutation.SetCell) Matchers.hasSize(org.hamcrest.Matchers.hasSize) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) Before(org.junit.Before) DoFn(org.apache.beam.sdk.transforms.DoFn) PAssert(org.apache.beam.sdk.testing.PAssert) IterableCoder(org.apache.beam.sdk.coders.IterableCoder) Assert.assertTrue(org.junit.Assert.assertTrue) IOException(java.io.IOException) Test(org.junit.Test) TreeMap(java.util.TreeMap) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) SourceTestUtils.assertSplitAtFractionExhaustive(org.apache.beam.sdk.testing.SourceTestUtils.assertSplitAtFractionExhaustive) IntervalWindow(org.apache.beam.sdk.transforms.windowing.IntervalWindow) Assert.assertEquals(org.junit.Assert.assertEquals) ByteKeyRange(org.apache.beam.sdk.io.range.ByteKeyRange) BulkOptions(com.google.cloud.bigtable.config.BulkOptions) SampleRowKeysResponse(com.google.bigtable.v2.SampleRowKeysResponse) SerializableFunction(org.apache.beam.sdk.transforms.SerializableFunction) Column(com.google.bigtable.v2.Column) Description(org.apache.beam.sdk.options.Description) RowFilter(com.google.bigtable.v2.RowFilter) PipelineRunMissingException(org.apache.beam.sdk.testing.TestPipeline.PipelineRunMissingException) SourceTestUtils.assertSourcesEqualReferenceSource(org.apache.beam.sdk.testing.SourceTestUtils.assertSourcesEqualReferenceSource) Cell(com.google.bigtable.v2.Cell) Row(com.google.bigtable.v2.Row) VarLongCoder(org.apache.beam.sdk.coders.VarLongCoder) Collectors(java.util.stream.Collectors) ByteString(com.google.protobuf.ByteString) BigtableSource(org.apache.beam.sdk.io.gcp.bigtable.BigtableIO.BigtableSource) List(java.util.List) ParDo(org.apache.beam.sdk.transforms.ParDo) Matchers.containsInAnyOrder(org.hamcrest.Matchers.containsInAnyOrder) ImmutableList(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList) SortedMap(java.util.SortedMap) RetryOptions(com.google.cloud.bigtable.config.RetryOptions) ByteKey(org.apache.beam.sdk.io.range.ByteKey) Coder(org.apache.beam.sdk.coders.Coder) HashMap(java.util.HashMap) CompletableFuture(java.util.concurrent.CompletableFuture) PipelineOptionsFactory(org.apache.beam.sdk.options.PipelineOptionsFactory) DisplayDataMatchers.hasKey(org.apache.beam.sdk.transforms.display.DisplayDataMatchers.hasKey) StringUtf8Coder(org.apache.beam.sdk.coders.StringUtf8Coder) ByteStringCoder(org.apache.beam.sdk.extensions.protobuf.ByteStringCoder) SourceTestUtils.assertSplitAtFractionFails(org.apache.beam.sdk.testing.SourceTestUtils.assertSplitAtFractionFails) ProtoCoder(org.apache.beam.sdk.extensions.protobuf.ProtoCoder) CredentialType(com.google.cloud.bigtable.config.CredentialOptions.CredentialType) NoSuchElementException(java.util.NoSuchElementException) ExpectedException(org.junit.rules.ExpectedException) Predicate(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Predicate) Nullable(org.checkerframework.checker.nullness.qual.Nullable) BigtableOptions(com.google.cloud.bigtable.config.BigtableOptions) DisplayData(org.apache.beam.sdk.transforms.display.DisplayData) Iterator(java.util.Iterator) Assert.assertNotNull(org.junit.Assert.assertNotNull) CredentialOptions(com.google.cloud.bigtable.config.CredentialOptions) FixedWindows(org.apache.beam.sdk.transforms.windowing.FixedWindows) Lists(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Lists) Matchers(org.hamcrest.Matchers) JUnit4(org.junit.runners.JUnit4) PCollection(org.apache.beam.sdk.values.PCollection) TestCredential(org.apache.beam.sdk.extensions.gcp.auth.TestCredential) Rule(org.junit.Rule) Instant(org.joda.time.Instant) StaticValueProvider(org.apache.beam.sdk.options.ValueProvider.StaticValueProvider) Comparator(java.util.Comparator) Collections(java.util.Collections) IsIterableContainingInAnyOrder(org.hamcrest.collection.IsIterableContainingInAnyOrder) RowFilter(com.google.bigtable.v2.RowFilter) ByteString(com.google.protobuf.ByteString) Row(com.google.bigtable.v2.Row) Test(org.junit.Test)

Example 5 with RowFilter

use of com.google.bigtable.v2.RowFilter in project beam by apache.

the class BigtableIOTest method testReadingWithFilterAndSubSplits.

/**
 * Tests reading all rows from a sub-split table.
 */
@Test
public void testReadingWithFilterAndSubSplits() throws Exception {
    final String table = "TEST-FILTER-SUB-SPLITS";
    final int numRows = 1700;
    final int numSamples = 10;
    final int numSplits = 20;
    final long bytesPerRow = 100L;
    // Set up test table data and sample row keys for size estimation and splitting.
    makeTableData(table, numRows);
    service.setupSampleRowKeys(table, numSamples, bytesPerRow);
    // Generate source and split it.
    RowFilter filter = RowFilter.newBuilder().setRowKeyRegexFilter(ByteString.copyFromUtf8(".*17.*")).build();
    BigtableSource source = new BigtableSource(config.withTableId(StaticValueProvider.of(table)), BigtableReadOptions.builder().setRowFilter(StaticValueProvider.of(filter)).setKeyRanges(ALL_KEY_RANGE).build(), null);
    List<BigtableSource> splits = source.split(numRows * bytesPerRow / numSplits, null);
    // Test num splits and split equality.
    assertThat(splits, hasSize(numSplits));
    assertSourcesEqualReferenceSource(source, splits, null);
}
Also used : RowFilter(com.google.bigtable.v2.RowFilter) ByteString(com.google.protobuf.ByteString) BigtableSource(org.apache.beam.sdk.io.gcp.bigtable.BigtableIO.BigtableSource) Test(org.junit.Test)

Aggregations

RowFilter (com.google.bigtable.v2.RowFilter)7 ByteString (com.google.protobuf.ByteString)6 Test (org.junit.Test)5 Column (com.google.bigtable.v2.Column)4 Family (com.google.bigtable.v2.Family)4 Row (com.google.bigtable.v2.Row)4 HashMap (java.util.HashMap)4 ExecutionException (java.util.concurrent.ExecutionException)4 DisplayData (org.apache.beam.sdk.transforms.display.DisplayData)4 ByteString (com.google.bigtable.repackaged.com.google.protobuf.ByteString)2 Column (com.google.bigtable.v1.Column)2 Family (com.google.bigtable.v1.Family)2 DeleteFromRow (com.google.bigtable.v1.Mutation.DeleteFromRow)2 ReadRowsRequest (com.google.bigtable.v1.ReadRowsRequest)2 Row (com.google.bigtable.v1.Row)2 RowFilter (com.google.bigtable.v1.RowFilter)2 Builder (com.google.bigtable.v1.RowFilter.Chain.Builder)2 Cell (com.google.bigtable.v2.Cell)2 MutateRowResponse (com.google.bigtable.v2.MutateRowResponse)2 Mutation (com.google.bigtable.v2.Mutation)2