Search in sources :

Example 46 with TableRow

use of com.google.api.services.bigquery.model.TableRow in project beam by apache.

the class BigQueryIOTest method testWriteRename.

@Test
public void testWriteRename() throws Exception {
    p.enableAbandonedNodeEnforcement(false);
    FakeDatasetService datasetService = new FakeDatasetService();
    FakeBigQueryServices fakeBqServices = new FakeBigQueryServices().withJobService(new FakeJobService()).withDatasetService(datasetService);
    datasetService.createDataset("project-id", "dataset-id", "", "");
    final int numFinalTables = 3;
    final int numTempTablesPerFinalTable = 3;
    final int numRecordsPerTempTable = 10;
    Map<TableDestination, List<TableRow>> expectedRowsPerTable = Maps.newHashMap();
    String jobIdToken = "jobIdToken";
    Map<TableDestination, Iterable<String>> tempTables = Maps.newHashMap();
    for (int i = 0; i < numFinalTables; ++i) {
        String tableName = "project-id:dataset-id.table_" + i;
        TableDestination tableDestination = new TableDestination(tableName, "table_" + i + "_desc");
        List<String> tables = Lists.newArrayList();
        tempTables.put(tableDestination, tables);
        List<TableRow> expectedRows = expectedRowsPerTable.get(tableDestination);
        if (expectedRows == null) {
            expectedRows = Lists.newArrayList();
            expectedRowsPerTable.put(tableDestination, expectedRows);
        }
        for (int j = 0; i < numTempTablesPerFinalTable; ++i) {
            TableReference tempTable = new TableReference().setProjectId("project-id").setDatasetId("dataset-id").setTableId(String.format("%s_%05d_%05d", jobIdToken, i, j));
            datasetService.createTable(new Table().setTableReference(tempTable));
            List<TableRow> rows = Lists.newArrayList();
            for (int k = 0; k < numRecordsPerTempTable; ++k) {
                rows.add(new TableRow().set("number", j * numTempTablesPerFinalTable + k));
            }
            datasetService.insertAll(tempTable, rows, null);
            expectedRows.addAll(rows);
            tables.add(BigQueryHelpers.toJsonString(tempTable));
        }
    }
    PCollection<KV<TableDestination, String>> tempTablesPCollection = p.apply(Create.of(tempTables).withCoder(KvCoder.of(TableDestinationCoder.of(), IterableCoder.of(StringUtf8Coder.of())))).apply(ParDo.of(new DoFn<KV<TableDestination, Iterable<String>>, KV<TableDestination, String>>() {

        @ProcessElement
        public void processElement(ProcessContext c) {
            TableDestination tableDestination = c.element().getKey();
            for (String tempTable : c.element().getValue()) {
                c.output(KV.of(tableDestination, tempTable));
            }
        }
    }));
    PCollectionView<Map<TableDestination, Iterable<String>>> tempTablesView = PCollectionViews.multimapView(tempTablesPCollection, WindowingStrategy.globalDefault(), KvCoder.of(TableDestinationCoder.of(), StringUtf8Coder.of()));
    PCollectionView<String> jobIdTokenView = p.apply("CreateJobId", Create.of("jobId")).apply(View.<String>asSingleton());
    WriteRename writeRename = new WriteRename(fakeBqServices, jobIdTokenView, WriteDisposition.WRITE_EMPTY, CreateDisposition.CREATE_IF_NEEDED, tempTablesView);
    DoFnTester<Void, Void> tester = DoFnTester.of(writeRename);
    tester.setSideInput(tempTablesView, GlobalWindow.INSTANCE, tempTables);
    tester.setSideInput(jobIdTokenView, GlobalWindow.INSTANCE, jobIdToken);
    tester.processElement(null);
    for (Map.Entry<TableDestination, Iterable<String>> entry : tempTables.entrySet()) {
        TableDestination tableDestination = entry.getKey();
        TableReference tableReference = tableDestination.getTableReference();
        Table table = checkNotNull(datasetService.getTable(tableReference));
        assertEquals(tableReference.getTableId() + "_desc", tableDestination.getTableDescription());
        List<TableRow> expectedRows = expectedRowsPerTable.get(tableDestination);
        assertThat(datasetService.getAllRows(tableReference.getProjectId(), tableReference.getDatasetId(), tableReference.getTableId()), containsInAnyOrder(Iterables.toArray(expectedRows, TableRow.class)));
        // Temp tables should be deleted.
        for (String tempTableJson : entry.getValue()) {
            TableReference tempTable = BigQueryHelpers.fromJsonString(tempTableJson, TableReference.class);
            assertEquals(null, datasetService.getTable(tempTable));
        }
    }
}
Also used : BigQueryHelpers.toJsonString(org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.toJsonString) BigQueryHelpers.createTempTableReference(org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.createTempTableReference) TableReference(com.google.api.services.bigquery.model.TableReference) ArrayList(java.util.ArrayList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) HashBasedTable(com.google.common.collect.HashBasedTable) Table(com.google.api.services.bigquery.model.Table) KV(org.apache.beam.sdk.values.KV) DoFn(org.apache.beam.sdk.transforms.DoFn) TableRow(com.google.api.services.bigquery.model.TableRow) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) Test(org.junit.Test)

Example 47 with TableRow

use of com.google.api.services.bigquery.model.TableRow in project beam by apache.

the class BigQueryIOTest method testBuildWriteWithWriteWithTableDescription.

@Test
public void testBuildWriteWithWriteWithTableDescription() {
    final String tblDescription = "foo bar table";
    BigQueryIO.Write<TableRow> write = BigQueryIO.writeTableRows().to("foo.com:project:somedataset.sometable").withTableDescription(tblDescription);
    checkWriteObject(write, "foo.com:project", "somedataset", "sometable", null, CreateDisposition.CREATE_IF_NEEDED, WriteDisposition.WRITE_EMPTY, tblDescription);
}
Also used : TableRow(com.google.api.services.bigquery.model.TableRow) BigQueryHelpers.toJsonString(org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.toJsonString) Test(org.junit.Test)

Example 48 with TableRow

use of com.google.api.services.bigquery.model.TableRow in project beam by apache.

the class BigQueryIOTest method testStreamingWrite.

@Test
public void testStreamingWrite() throws Exception {
    BigQueryOptions bqOptions = TestPipeline.testingPipelineOptions().as(BigQueryOptions.class);
    bqOptions.setProject("defaultproject");
    bqOptions.setTempLocation(testFolder.newFolder("BigQueryIOTest").getAbsolutePath());
    FakeDatasetService datasetService = new FakeDatasetService();
    datasetService.createDataset("project-id", "dataset-id", "", "");
    FakeBigQueryServices fakeBqServices = new FakeBigQueryServices().withDatasetService(datasetService);
    Pipeline p = TestPipeline.create(bqOptions);
    p.apply(Create.of(new TableRow().set("name", "a").set("number", 1), new TableRow().set("name", "b").set("number", 2), new TableRow().set("name", "c").set("number", 3), new TableRow().set("name", "d").set("number", 4)).withCoder(TableRowJsonCoder.of())).setIsBoundedInternal(PCollection.IsBounded.UNBOUNDED).apply(BigQueryIO.writeTableRows().to("project-id:dataset-id.table-id").withCreateDisposition(CreateDisposition.CREATE_IF_NEEDED).withSchema(new TableSchema().setFields(ImmutableList.of(new TableFieldSchema().setName("name").setType("STRING"), new TableFieldSchema().setName("number").setType("INTEGER")))).withTestServices(fakeBqServices).withoutValidation());
    p.run();
    assertThat(datasetService.getAllRows("project-id", "dataset-id", "table-id"), containsInAnyOrder(new TableRow().set("name", "a").set("number", 1), new TableRow().set("name", "b").set("number", 2), new TableRow().set("name", "c").set("number", 3), new TableRow().set("name", "d").set("number", 4)));
}
Also used : TableSchema(com.google.api.services.bigquery.model.TableSchema) JsonSchemaToTableSchema(org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.JsonSchemaToTableSchema) TableRow(com.google.api.services.bigquery.model.TableRow) TableFieldSchema(com.google.api.services.bigquery.model.TableFieldSchema) TestPipeline(org.apache.beam.sdk.testing.TestPipeline) Pipeline(org.apache.beam.sdk.Pipeline) Test(org.junit.Test)

Example 49 with TableRow

use of com.google.api.services.bigquery.model.TableRow in project beam by apache.

the class BigQueryIOTest method testBigQueryTableSourceThroughJsonAPI.

@Test
public void testBigQueryTableSourceThroughJsonAPI() throws Exception {
    FakeDatasetService datasetService = new FakeDatasetService();
    FakeBigQueryServices fakeBqServices = new FakeBigQueryServices().withJobService(new FakeJobService()).withDatasetService(datasetService);
    List<TableRow> expected = ImmutableList.of(new TableRow().set("name", "a").set("number", "1"), new TableRow().set("name", "b").set("number", "2"), new TableRow().set("name", "c").set("number", "3"), new TableRow().set("name", "d").set("number", "4"), new TableRow().set("name", "e").set("number", "5"), new TableRow().set("name", "f").set("number", "6"));
    TableReference table = BigQueryHelpers.parseTableSpec("project:data_set.table_name");
    datasetService.createDataset(table.getProjectId(), table.getDatasetId(), "", "");
    datasetService.createTable(new Table().setTableReference(table));
    datasetService.insertAll(table, expected, null);
    Path baseDir = Files.createTempDirectory(tempFolder, "testBigQueryTableSourceThroughJsonAPI");
    String stepUuid = "testStepUuid";
    BoundedSource<TableRow> bqSource = BigQueryTableSource.create(stepUuid, StaticValueProvider.of(table), fakeBqServices);
    PipelineOptions options = PipelineOptionsFactory.create();
    options.setTempLocation(baseDir.toString());
    Assert.assertThat(SourceTestUtils.readFromSource(bqSource, options), CoreMatchers.is(expected));
    SourceTestUtils.assertSplitAtFractionBehavior(bqSource, 2, 0.3, ExpectedSplitOutcome.MUST_BE_CONSISTENT_IF_SUCCEEDS, options);
}
Also used : Path(java.nio.file.Path) BigQueryHelpers.createTempTableReference(org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.createTempTableReference) TableReference(com.google.api.services.bigquery.model.TableReference) HashBasedTable(com.google.common.collect.HashBasedTable) Table(com.google.api.services.bigquery.model.Table) PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) TableRow(com.google.api.services.bigquery.model.TableRow) BigQueryHelpers.toJsonString(org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.toJsonString) Test(org.junit.Test)

Example 50 with TableRow

use of com.google.api.services.bigquery.model.TableRow in project beam by apache.

the class BigQueryServicesImplTest method testInsertRetryPolicy.

/**
   * Tests that {@link DatasetServiceImpl#insertAll} uses the supplied {@link InsertRetryPolicy},
   * and returns the list of rows not retried.
   */
@Test
public void testInsertRetryPolicy() throws InterruptedException, IOException {
    TableReference ref = new TableReference().setProjectId("project").setDatasetId("dataset").setTableId("table");
    List<ValueInSingleWindow<TableRow>> rows = ImmutableList.of(wrapTableRow(new TableRow()), wrapTableRow(new TableRow()));
    // First time row0 fails with a retryable error, and row1 fails with a persistent error.
    final TableDataInsertAllResponse firstFailure = new TableDataInsertAllResponse().setInsertErrors(ImmutableList.of(new InsertErrors().setIndex(0L).setErrors(ImmutableList.of(new ErrorProto().setReason("timeout"))), new InsertErrors().setIndex(1L).setErrors(ImmutableList.of(new ErrorProto().setReason("invalid")))));
    // Second time there is only one row, which fails with a retryable error.
    final TableDataInsertAllResponse secondFialure = new TableDataInsertAllResponse().setInsertErrors(ImmutableList.of(new InsertErrors().setIndex(0L).setErrors(ImmutableList.of(new ErrorProto().setReason("timeout")))));
    // On the final attempt, no failures are returned.
    final TableDataInsertAllResponse allRowsSucceeded = new TableDataInsertAllResponse();
    when(response.getContentType()).thenReturn(Json.MEDIA_TYPE);
    // Always return 200.
    when(response.getStatusCode()).thenReturn(200);
    when(response.getContentType()).thenReturn(Json.MEDIA_TYPE);
    when(response.getStatusCode()).thenReturn(200).thenReturn(200);
    // First fail
    when(response.getContent()).thenReturn(toStream(firstFailure)).thenReturn(toStream(secondFialure)).thenReturn(toStream(allRowsSucceeded));
    DatasetServiceImpl dataService = new DatasetServiceImpl(bigquery, PipelineOptionsFactory.create());
    List<ValueInSingleWindow<TableRow>> failedInserts = Lists.newArrayList();
    dataService.insertAll(ref, rows, null, BackOffAdapter.toGcpBackOff(TEST_BACKOFF.backoff()), new MockSleeper(), InsertRetryPolicy.retryTransientErrors(), failedInserts);
    assertEquals(1, failedInserts.size());
    expectedLogs.verifyInfo("Retrying 1 failed inserts to BigQuery");
}
Also used : TableReference(com.google.api.services.bigquery.model.TableReference) ErrorProto(com.google.api.services.bigquery.model.ErrorProto) DatasetServiceImpl(org.apache.beam.sdk.io.gcp.bigquery.BigQueryServicesImpl.DatasetServiceImpl) TableRow(com.google.api.services.bigquery.model.TableRow) TableDataInsertAllResponse(com.google.api.services.bigquery.model.TableDataInsertAllResponse) ValueInSingleWindow(org.apache.beam.sdk.values.ValueInSingleWindow) InsertErrors(com.google.api.services.bigquery.model.TableDataInsertAllResponse.InsertErrors) MockSleeper(com.google.api.client.testing.util.MockSleeper) Test(org.junit.Test)

Aggregations

TableRow (com.google.api.services.bigquery.model.TableRow)73 Test (org.junit.Test)43 TableReference (com.google.api.services.bigquery.model.TableReference)24 TableSchema (com.google.api.services.bigquery.model.TableSchema)18 Pipeline (org.apache.beam.sdk.Pipeline)16 KV (org.apache.beam.sdk.values.KV)15 TableFieldSchema (com.google.api.services.bigquery.model.TableFieldSchema)14 JsonSchemaToTableSchema (org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.JsonSchemaToTableSchema)14 BigQueryHelpers.toJsonString (org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.toJsonString)13 TestPipeline (org.apache.beam.sdk.testing.TestPipeline)12 BigQueryHelpers.createTempTableReference (org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.createTempTableReference)11 Table (com.google.api.services.bigquery.model.Table)10 HashBasedTable (com.google.common.collect.HashBasedTable)10 JobStatus (com.google.api.services.bigquery.model.JobStatus)9 TableDataInsertAllResponse (com.google.api.services.bigquery.model.TableDataInsertAllResponse)8 ArrayList (java.util.ArrayList)8 List (java.util.List)8 Map (java.util.Map)8 ValueInSingleWindow (org.apache.beam.sdk.values.ValueInSingleWindow)7 JobStatistics (com.google.api.services.bigquery.model.JobStatistics)6