Search in sources :

Example 1 with BigQueryHelpers.toJsonString

use of org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.toJsonString in project beam by apache.

the class BigQueryIOTest method testWriteWithDynamicTables.

public void testWriteWithDynamicTables(boolean streaming) throws Exception {
    BigQueryOptions bqOptions = TestPipeline.testingPipelineOptions().as(BigQueryOptions.class);
    bqOptions.setProject("defaultproject");
    bqOptions.setTempLocation(testFolder.newFolder("BigQueryIOTest").getAbsolutePath());
    FakeDatasetService datasetService = new FakeDatasetService();
    datasetService.createDataset("project-id", "dataset-id", "", "");
    FakeBigQueryServices fakeBqServices = new FakeBigQueryServices().withDatasetService(datasetService).withJobService(new FakeJobService());
    List<Integer> inserts = new ArrayList<>();
    for (int i = 0; i < 10; i++) {
        inserts.add(i);
    }
    // Create a windowing strategy that puts the input into five different windows depending on
    // record value.
    WindowFn<Integer, PartitionedGlobalWindow> windowFn = new PartitionedGlobalWindows(new SerializableFunction<Integer, String>() {

        @Override
        public String apply(Integer i) {
            return Integer.toString(i % 5);
        }
    });
    final Map<Integer, TableDestination> targetTables = Maps.newHashMap();
    Map<String, String> schemas = Maps.newHashMap();
    for (int i = 0; i < 5; i++) {
        TableDestination destination = new TableDestination("project-id:dataset-id" + ".table-id-" + i, "");
        targetTables.put(i, destination);
        // Make sure each target table has its own custom table.
        schemas.put(destination.getTableSpec(), BigQueryHelpers.toJsonString(new TableSchema().setFields(ImmutableList.of(new TableFieldSchema().setName("name").setType("STRING"), new TableFieldSchema().setName("number").setType("INTEGER"), new TableFieldSchema().setName("custom_" + i).setType("STRING")))));
    }
    SerializableFunction<ValueInSingleWindow<Integer>, TableDestination> tableFunction = new SerializableFunction<ValueInSingleWindow<Integer>, TableDestination>() {

        @Override
        public TableDestination apply(ValueInSingleWindow<Integer> input) {
            PartitionedGlobalWindow window = (PartitionedGlobalWindow) input.getWindow();
            // Check that we can access the element as well here and that it matches the window.
            checkArgument(window.value.equals(Integer.toString(input.getValue() % 5)), "Incorrect element");
            return targetTables.get(input.getValue() % 5);
        }
    };
    Pipeline p = TestPipeline.create(bqOptions);
    PCollection<Integer> input = p.apply("CreateSource", Create.of(inserts));
    if (streaming) {
        input = input.setIsBoundedInternal(PCollection.IsBounded.UNBOUNDED);
    }
    PCollectionView<Map<String, String>> schemasView = p.apply("CreateSchemaMap", Create.of(schemas)).apply("ViewSchemaAsMap", View.<String, String>asMap());
    input.apply(Window.<Integer>into(windowFn)).apply(BigQueryIO.<Integer>write().to(tableFunction).withFormatFunction(new SerializableFunction<Integer, TableRow>() {

        @Override
        public TableRow apply(Integer i) {
            return new TableRow().set("name", "number" + i).set("number", i);
        }
    }).withCreateDisposition(CreateDisposition.CREATE_IF_NEEDED).withSchemaFromView(schemasView).withTestServices(fakeBqServices).withoutValidation());
    p.run();
    for (int i = 0; i < 5; ++i) {
        String tableId = String.format("table-id-%d", i);
        String tableSpec = String.format("project-id:dataset-id.%s", tableId);
        // Verify that table was created with the correct schema.
        assertThat(BigQueryHelpers.toJsonString(datasetService.getTable(new TableReference().setProjectId("project-id").setDatasetId("dataset-id").setTableId(tableId)).getSchema()), equalTo(schemas.get(tableSpec)));
        // Verify that the table has the expected contents.
        assertThat(datasetService.getAllRows("project-id", "dataset-id", tableId), containsInAnyOrder(new TableRow().set("name", String.format("number%d", i)).set("number", i), new TableRow().set("name", String.format("number%d", i + 5)).set("number", i + 5)));
    }
}
Also used : TableSchema(com.google.api.services.bigquery.model.TableSchema) JsonSchemaToTableSchema(org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.JsonSchemaToTableSchema) SerializableFunction(org.apache.beam.sdk.transforms.SerializableFunction) ArrayList(java.util.ArrayList) BigQueryHelpers.toJsonString(org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.toJsonString) TableFieldSchema(com.google.api.services.bigquery.model.TableFieldSchema) BigQueryHelpers.createTempTableReference(org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.createTempTableReference) TableReference(com.google.api.services.bigquery.model.TableReference) TestPipeline(org.apache.beam.sdk.testing.TestPipeline) Pipeline(org.apache.beam.sdk.Pipeline) TableRow(com.google.api.services.bigquery.model.TableRow) ValueInSingleWindow(org.apache.beam.sdk.values.ValueInSingleWindow) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap)

Example 2 with BigQueryHelpers.toJsonString

use of org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.toJsonString in project beam by apache.

the class BigQueryIOTest method testWriteRename.

@Test
public void testWriteRename() throws Exception {
    p.enableAbandonedNodeEnforcement(false);
    FakeDatasetService datasetService = new FakeDatasetService();
    FakeBigQueryServices fakeBqServices = new FakeBigQueryServices().withJobService(new FakeJobService()).withDatasetService(datasetService);
    datasetService.createDataset("project-id", "dataset-id", "", "");
    final int numFinalTables = 3;
    final int numTempTablesPerFinalTable = 3;
    final int numRecordsPerTempTable = 10;
    Map<TableDestination, List<TableRow>> expectedRowsPerTable = Maps.newHashMap();
    String jobIdToken = "jobIdToken";
    Map<TableDestination, Iterable<String>> tempTables = Maps.newHashMap();
    for (int i = 0; i < numFinalTables; ++i) {
        String tableName = "project-id:dataset-id.table_" + i;
        TableDestination tableDestination = new TableDestination(tableName, "table_" + i + "_desc");
        List<String> tables = Lists.newArrayList();
        tempTables.put(tableDestination, tables);
        List<TableRow> expectedRows = expectedRowsPerTable.get(tableDestination);
        if (expectedRows == null) {
            expectedRows = Lists.newArrayList();
            expectedRowsPerTable.put(tableDestination, expectedRows);
        }
        for (int j = 0; i < numTempTablesPerFinalTable; ++i) {
            TableReference tempTable = new TableReference().setProjectId("project-id").setDatasetId("dataset-id").setTableId(String.format("%s_%05d_%05d", jobIdToken, i, j));
            datasetService.createTable(new Table().setTableReference(tempTable));
            List<TableRow> rows = Lists.newArrayList();
            for (int k = 0; k < numRecordsPerTempTable; ++k) {
                rows.add(new TableRow().set("number", j * numTempTablesPerFinalTable + k));
            }
            datasetService.insertAll(tempTable, rows, null);
            expectedRows.addAll(rows);
            tables.add(BigQueryHelpers.toJsonString(tempTable));
        }
    }
    PCollection<KV<TableDestination, String>> tempTablesPCollection = p.apply(Create.of(tempTables).withCoder(KvCoder.of(TableDestinationCoder.of(), IterableCoder.of(StringUtf8Coder.of())))).apply(ParDo.of(new DoFn<KV<TableDestination, Iterable<String>>, KV<TableDestination, String>>() {

        @ProcessElement
        public void processElement(ProcessContext c) {
            TableDestination tableDestination = c.element().getKey();
            for (String tempTable : c.element().getValue()) {
                c.output(KV.of(tableDestination, tempTable));
            }
        }
    }));
    PCollectionView<Map<TableDestination, Iterable<String>>> tempTablesView = PCollectionViews.multimapView(tempTablesPCollection, WindowingStrategy.globalDefault(), KvCoder.of(TableDestinationCoder.of(), StringUtf8Coder.of()));
    PCollectionView<String> jobIdTokenView = p.apply("CreateJobId", Create.of("jobId")).apply(View.<String>asSingleton());
    WriteRename writeRename = new WriteRename(fakeBqServices, jobIdTokenView, WriteDisposition.WRITE_EMPTY, CreateDisposition.CREATE_IF_NEEDED, tempTablesView);
    DoFnTester<Void, Void> tester = DoFnTester.of(writeRename);
    tester.setSideInput(tempTablesView, GlobalWindow.INSTANCE, tempTables);
    tester.setSideInput(jobIdTokenView, GlobalWindow.INSTANCE, jobIdToken);
    tester.processElement(null);
    for (Map.Entry<TableDestination, Iterable<String>> entry : tempTables.entrySet()) {
        TableDestination tableDestination = entry.getKey();
        TableReference tableReference = tableDestination.getTableReference();
        Table table = checkNotNull(datasetService.getTable(tableReference));
        assertEquals(tableReference.getTableId() + "_desc", tableDestination.getTableDescription());
        List<TableRow> expectedRows = expectedRowsPerTable.get(tableDestination);
        assertThat(datasetService.getAllRows(tableReference.getProjectId(), tableReference.getDatasetId(), tableReference.getTableId()), containsInAnyOrder(Iterables.toArray(expectedRows, TableRow.class)));
        // Temp tables should be deleted.
        for (String tempTableJson : entry.getValue()) {
            TableReference tempTable = BigQueryHelpers.fromJsonString(tempTableJson, TableReference.class);
            assertEquals(null, datasetService.getTable(tempTable));
        }
    }
}
Also used : BigQueryHelpers.toJsonString(org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.toJsonString) BigQueryHelpers.createTempTableReference(org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.createTempTableReference) TableReference(com.google.api.services.bigquery.model.TableReference) ArrayList(java.util.ArrayList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) HashBasedTable(com.google.common.collect.HashBasedTable) Table(com.google.api.services.bigquery.model.Table) KV(org.apache.beam.sdk.values.KV) DoFn(org.apache.beam.sdk.transforms.DoFn) TableRow(com.google.api.services.bigquery.model.TableRow) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) Test(org.junit.Test)

Aggregations

TableReference (com.google.api.services.bigquery.model.TableReference)2 TableRow (com.google.api.services.bigquery.model.TableRow)2 ImmutableMap (com.google.common.collect.ImmutableMap)2 ArrayList (java.util.ArrayList)2 Map (java.util.Map)2 BigQueryHelpers.createTempTableReference (org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.createTempTableReference)2 BigQueryHelpers.toJsonString (org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.toJsonString)2 Table (com.google.api.services.bigquery.model.Table)1 TableFieldSchema (com.google.api.services.bigquery.model.TableFieldSchema)1 TableSchema (com.google.api.services.bigquery.model.TableSchema)1 HashBasedTable (com.google.common.collect.HashBasedTable)1 ImmutableList (com.google.common.collect.ImmutableList)1 List (java.util.List)1 Pipeline (org.apache.beam.sdk.Pipeline)1 JsonSchemaToTableSchema (org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.JsonSchemaToTableSchema)1 TestPipeline (org.apache.beam.sdk.testing.TestPipeline)1 DoFn (org.apache.beam.sdk.transforms.DoFn)1 SerializableFunction (org.apache.beam.sdk.transforms.SerializableFunction)1 KV (org.apache.beam.sdk.values.KV)1 ValueInSingleWindow (org.apache.beam.sdk.values.ValueInSingleWindow)1