Search in sources :

Example 1 with CloudBigtableTableConfiguration

use of com.google.cloud.bigtable.beam.CloudBigtableTableConfiguration in project java-docs-samples by GoogleCloudPlatform.

the class LoadData method main.

public static void main(String[] args) {
    WriteDataOptions options = PipelineOptionsFactory.fromArgs(args).withValidation().as(WriteDataOptions.class);
    Pipeline p = Pipeline.create(options);
    CloudBigtableTableConfiguration bigtableTableConfig = new CloudBigtableTableConfiguration.Builder().withProjectId(options.getBigtableProjectId()).withInstanceId(options.getBigtableInstanceId()).withTableId(options.getBigtableTableId()).build();
    long rowSize = options.getMegabytesPerRow() * ONE_MB;
    final long max = (Math.round((options.getGigabytesWritten() * ONE_GB)) / rowSize);
    // Make each number the same length by padding with 0s
    int maxLength = ("" + max).length();
    String numberFormat = "%0" + maxLength + "d";
    p.apply(GenerateSequence.from(0).to(max)).apply(ParDo.of(new DoFn<Long, Mutation>() {

        @ProcessElement
        public void processElement(@Element Long rowkey, OutputReceiver<Mutation> out) {
            String paddedRowkey = String.format(numberFormat, rowkey);
            // Reverse the rowkey for more efficient writing
            String reversedRowkey = new StringBuilder(paddedRowkey).reverse().toString();
            Put row = new Put(Bytes.toBytes(reversedRowkey));
            // Generate random bytes
            byte[] b = new byte[(int) rowSize];
            new Random().nextBytes(b);
            long timestamp = System.currentTimeMillis();
            row.addColumn(Bytes.toBytes(COLUMN_FAMILY), Bytes.toBytes("C"), timestamp, b);
            out.output(row);
        }
    })).apply(CloudBigtableIO.writeToTable(bigtableTableConfig));
    p.run().waitUntilFinish();
}
Also used : Put(org.apache.hadoop.hbase.client.Put) Pipeline(org.apache.beam.sdk.Pipeline) CloudBigtableTableConfiguration(com.google.cloud.bigtable.beam.CloudBigtableTableConfiguration) Random(java.util.Random) Mutation(org.apache.hadoop.hbase.client.Mutation)

Example 2 with CloudBigtableTableConfiguration

use of com.google.cloud.bigtable.beam.CloudBigtableTableConfiguration in project java-docs-samples by GoogleCloudPlatform.

the class KeyVizArtTest method testWriteAndRead.

@Test
public void testWriteAndRead() {
    LoadData.main(new String[] { "--bigtableProjectId=" + projectId, "--bigtableInstanceId=" + instanceId, "--bigtableTableId=" + TABLE_ID, "--gigabytesWritten=" + GIGABYTES_WRITTEN, "--megabytesPerRow=" + MEGABYTES_PER_ROW });
    long count = 0;
    try (Connection connection = BigtableConfiguration.connect(projectId, instanceId)) {
        Table table = connection.getTable(TableName.valueOf(TABLE_ID));
        Scan scan = new Scan();
        ResultScanner rows = table.getScanner(scan);
        for (Result row : rows) {
            count++;
        }
    } catch (IOException e) {
        System.out.println("Unable to initialize service client, as a network error occurred: \n" + e.toString());
    }
    assertEquals(10, count);
    ReadDataOptions options = PipelineOptionsFactory.fromArgs("--bigtableProjectId=" + projectId, "--bigtableInstanceId=" + instanceId, "--bigtableTableId=" + TABLE_ID, "--gigabytesWritten=" + GIGABYTES_WRITTEN, "--megabytesPerRow=" + MEGABYTES_PER_ROW, "--filePath=gs://keyviz-art/maxgrid.txt").withValidation().as(ReadDataOptions.class);
    Pipeline p = Pipeline.create(options);
    CloudBigtableTableConfiguration bigtableTableConfig = new CloudBigtableTableConfiguration.Builder().withProjectId(options.getBigtableProjectId()).withInstanceId(options.getBigtableInstanceId()).withTableId(options.getBigtableTableId()).build();
    // Initiates a new pipeline every second
    p.apply(Create.of(1L)).apply(ParDo.of(new ReadFromTableFn(bigtableTableConfig, options)));
    p.run().waitUntilFinish();
    String output = bout.toString();
    assertThat(output).contains("got 10 rows");
    options = PipelineOptionsFactory.fromArgs("--bigtableProjectId=" + projectId, "--bigtableInstanceId=" + instanceId, "--bigtableTableId=" + TABLE_ID, "--gigabytesWritten=" + GIGABYTES_WRITTEN, "--megabytesPerRow=" + MEGABYTES_PER_ROW, "--filePath=gs://keyviz-art/halfgrid.txt").withValidation().as(ReadDataOptions.class);
    p = Pipeline.create(options);
    // Initiates a new pipeline every second
    p.apply(Create.of(1L)).apply(ParDo.of(new ReadFromTableFn(bigtableTableConfig, options)));
    p.run().waitUntilFinish();
    output = bout.toString();
    assertThat(output).contains("got 5 rows");
}
Also used : Table(org.apache.hadoop.hbase.client.Table) ResultScanner(org.apache.hadoop.hbase.client.ResultScanner) ReadDataOptions(keyviz.ReadData.ReadDataOptions) Connection(org.apache.hadoop.hbase.client.Connection) IOException(java.io.IOException) Result(org.apache.hadoop.hbase.client.Result) Pipeline(org.apache.beam.sdk.Pipeline) CloudBigtableTableConfiguration(com.google.cloud.bigtable.beam.CloudBigtableTableConfiguration) ReadFromTableFn(keyviz.ReadData.ReadFromTableFn) Scan(org.apache.hadoop.hbase.client.Scan) Test(org.junit.Test)

Example 3 with CloudBigtableTableConfiguration

use of com.google.cloud.bigtable.beam.CloudBigtableTableConfiguration in project java-docs-samples by GoogleCloudPlatform.

the class ReadData method main.

public static void main(String[] args) {
    ReadDataOptions options = PipelineOptionsFactory.fromArgs(args).withValidation().as(ReadDataOptions.class);
    Pipeline p = Pipeline.create(options);
    CloudBigtableTableConfiguration bigtableTableConfig = new CloudBigtableTableConfiguration.Builder().withProjectId(options.getBigtableProjectId()).withInstanceId(options.getBigtableInstanceId()).withTableId(options.getBigtableTableId()).build();
    // Initiates a new pipeline every second
    p.apply(GenerateSequence.from(0).withRate(1, new Duration(1000))).apply(ParDo.of(new ReadFromTableFn(bigtableTableConfig, options)));
    p.run();
}
Also used : CloudBigtableTableConfiguration(com.google.cloud.bigtable.beam.CloudBigtableTableConfiguration) Duration(org.joda.time.Duration) Pipeline(org.apache.beam.sdk.Pipeline)

Example 4 with CloudBigtableTableConfiguration

use of com.google.cloud.bigtable.beam.CloudBigtableTableConfiguration in project java-bigtable-hbase by googleapis.

the class ComputeAndValidateHashFromBigtableDoFnTest method setUp.

@Before
public void setUp() throws IOException {
    hashes = new ArrayList<>();
    // Initialize the clients to connect to the emulator
    tableAdminClient = BigtableTableAdminClient.create(BigtableTableAdminSettings.newBuilderForEmulator(bigtableEmulator.getPort()).setProjectId("fake-project").setInstanceId("fake-instance").build());
    CloudBigtableTableConfiguration config = new CloudBigtableTableConfiguration.Builder().withProjectId("fake-project").withInstanceId("fake-instance").withTableId(FAKE_TABLE).withConfiguration(BigtableOptionsFactory.BIGTABLE_EMULATOR_HOST_KEY, "localhost:" + bigtableEmulator.getPort()).build();
    Connection connection = BigtableConfiguration.connect(config.toHBaseConfig());
    table = connection.getTable(TableName.valueOf(FAKE_TABLE));
    fakeTableHashWrapper = new FakeTableHashWrapper();
    // Scan all the cells for the column, HBase scan fetches 1 cell/column by default
    fakeTableHashWrapper.scan = new Scan().setMaxVersions();
    FakeTableHashWrapperFactory fakeFactory = new FakeTableHashWrapperFactory(fakeTableHashWrapper);
    doFn = new ComputeAndValidateHashFromBigtableDoFn(config, StaticValueProvider.of(FAKE_TABLE), StaticValueProvider.of("proj"), StaticValueProvider.of("hash"), fakeFactory);
    // Create a test table that can be used in tests
    tableAdminClient.createTable(CreateTableRequest.of(FAKE_TABLE).addFamily(new String(CF), GCRULES.maxVersions(100)).addFamily(new String(CF2), GCRULES.maxVersions(100)));
    p.getCoderRegistry().registerCoderForClass(RangeHash.class, new RangeHashCoder());
    // Fill CBT table with data.
    writeDataToTable();
}
Also used : CloudBigtableTableConfiguration(com.google.cloud.bigtable.beam.CloudBigtableTableConfiguration) Connection(org.apache.hadoop.hbase.client.Connection) Scan(org.apache.hadoop.hbase.client.Scan) Before(org.junit.Before)

Example 5 with CloudBigtableTableConfiguration

use of com.google.cloud.bigtable.beam.CloudBigtableTableConfiguration in project DataflowTemplates by GoogleCloudPlatform.

the class BigQueryToBigtable method main.

/**
 * Runs a pipeline which reads data from BigQuery and writes it to Bigtable.
 *
 * @param args arguments to the pipeline
 */
public static void main(String[] args) {
    BigQueryToBigtableOptions options = PipelineOptionsFactory.fromArgs(args).withValidation().as(BigQueryToBigtableOptions.class);
    CloudBigtableTableConfiguration bigtableTableConfig = new CloudBigtableTableConfiguration.Builder().withProjectId(options.getBigtableWriteProjectId()).withInstanceId(options.getBigtableWriteInstanceId()).withAppProfileId(options.getBigtableWriteAppProfile()).withTableId(options.getBigtableWriteTableId()).build();
    Pipeline pipeline = Pipeline.create(options);
    pipeline.apply("AvroToMutation", BigQueryIO.read(AvroToMutation.newBuilder().setColumnFamily(options.getBigtableWriteColumnFamily()).setRowkey(options.getReadIdColumn()).build()).fromQuery(options.getReadQuery()).withoutValidation().withTemplateCompatibility().usingStandardSql()).apply("WriteToTable", CloudBigtableIO.writeToTable(bigtableTableConfig));
    pipeline.run();
}
Also used : CloudBigtableTableConfiguration(com.google.cloud.bigtable.beam.CloudBigtableTableConfiguration) Pipeline(org.apache.beam.sdk.Pipeline)

Aggregations

CloudBigtableTableConfiguration (com.google.cloud.bigtable.beam.CloudBigtableTableConfiguration)9 Pipeline (org.apache.beam.sdk.Pipeline)7 Scan (org.apache.hadoop.hbase.client.Scan)3 Test (org.junit.Test)3 IOException (java.io.IOException)2 PipelineResult (org.apache.beam.sdk.PipelineResult)2 Connection (org.apache.hadoop.hbase.client.Connection)2 Mutation (org.apache.hadoop.hbase.client.Mutation)2 Put (org.apache.hadoop.hbase.client.Put)2 ResultScanner (org.apache.hadoop.hbase.client.ResultScanner)2 Duration (org.joda.time.Duration)2 BigtableWorkloadOptions (bigtable.WorkloadGenerator.BigtableWorkloadOptions)1 ReadFromTableFn (bigtable.WorkloadGenerator.ReadFromTableFn)1 ArrayList (java.util.ArrayList)1 Random (java.util.Random)1 ScheduledThreadPoolExecutor (java.util.concurrent.ScheduledThreadPoolExecutor)1 ReadDataOptions (keyviz.ReadData.ReadDataOptions)1 ReadFromTableFn (keyviz.ReadData.ReadFromTableFn)1 DataflowPipelineOptions (org.apache.beam.runners.dataflow.options.DataflowPipelineOptions)1 Result (org.apache.hadoop.hbase.client.Result)1