Search in sources :

Example 66 with TableReference

use of com.google.api.services.bigquery.model.TableReference in project beam by apache.

the class BigQueryIOTest method testReadFromTable.

@Test
public void testReadFromTable() throws IOException, InterruptedException {
    BigQueryOptions bqOptions = TestPipeline.testingPipelineOptions().as(BigQueryOptions.class);
    bqOptions.setProject("defaultproject");
    bqOptions.setTempLocation(testFolder.newFolder("BigQueryIOTest").getAbsolutePath());
    Job job = new Job();
    JobStatus status = new JobStatus();
    job.setStatus(status);
    JobStatistics jobStats = new JobStatistics();
    job.setStatistics(jobStats);
    JobStatistics4 extract = new JobStatistics4();
    jobStats.setExtract(extract);
    extract.setDestinationUriFileCounts(ImmutableList.of(1L));
    Table sometable = new Table();
    sometable.setSchema(new TableSchema().setFields(ImmutableList.of(new TableFieldSchema().setName("name").setType("STRING"), new TableFieldSchema().setName("number").setType("INTEGER"))));
    sometable.setTableReference(new TableReference().setProjectId("non-executing-project").setDatasetId("somedataset").setTableId("sometable"));
    sometable.setNumBytes(1024L * 1024L);
    FakeDatasetService fakeDatasetService = new FakeDatasetService();
    fakeDatasetService.createDataset("non-executing-project", "somedataset", "", "");
    fakeDatasetService.createTable(sometable);
    List<TableRow> records = Lists.newArrayList(new TableRow().set("name", "a").set("number", 1L), new TableRow().set("name", "b").set("number", 2L), new TableRow().set("name", "c").set("number", 3L));
    fakeDatasetService.insertAll(sometable.getTableReference(), records, null);
    FakeBigQueryServices fakeBqServices = new FakeBigQueryServices().withJobService(new FakeJobService()).withDatasetService(fakeDatasetService);
    Pipeline p = TestPipeline.create(bqOptions);
    PCollection<KV<String, Long>> output = p.apply(BigQueryIO.read().from("non-executing-project:somedataset.sometable").withTestServices(fakeBqServices).withoutValidation()).apply(ParDo.of(new DoFn<TableRow, KV<String, Long>>() {

        @ProcessElement
        public void processElement(ProcessContext c) throws Exception {
            c.output(KV.of((String) c.element().get("name"), Long.valueOf((String) c.element().get("number"))));
        }
    }));
    PAssert.that(output).containsInAnyOrder(ImmutableList.of(KV.of("a", 1L), KV.of("b", 2L), KV.of("c", 3L)));
    p.run();
}
Also used : JobStatistics(com.google.api.services.bigquery.model.JobStatistics) HashBasedTable(com.google.common.collect.HashBasedTable) Table(com.google.api.services.bigquery.model.Table) JobStatistics4(com.google.api.services.bigquery.model.JobStatistics4) TableSchema(com.google.api.services.bigquery.model.TableSchema) JsonSchemaToTableSchema(org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.JsonSchemaToTableSchema) BigQueryHelpers.toJsonString(org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.toJsonString) KV(org.apache.beam.sdk.values.KV) TableFieldSchema(com.google.api.services.bigquery.model.TableFieldSchema) TestPipeline(org.apache.beam.sdk.testing.TestPipeline) Pipeline(org.apache.beam.sdk.Pipeline) JobStatus(com.google.api.services.bigquery.model.JobStatus) BigQueryHelpers.createTempTableReference(org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.createTempTableReference) TableReference(com.google.api.services.bigquery.model.TableReference) DoFn(org.apache.beam.sdk.transforms.DoFn) TableRow(com.google.api.services.bigquery.model.TableRow) Job(com.google.api.services.bigquery.model.Job) Test(org.junit.Test)

Example 67 with TableReference

use of com.google.api.services.bigquery.model.TableReference in project beam by apache.

the class BigQueryIOTest method testRemoveTemporaryTables.

@Test
public void testRemoveTemporaryTables() throws Exception {
    FakeDatasetService datasetService = new FakeDatasetService();
    String projectId = "project";
    String datasetId = "dataset";
    datasetService.createDataset(projectId, datasetId, "", "");
    List<TableReference> tableRefs = Lists.newArrayList(BigQueryHelpers.parseTableSpec(String.format("%s:%s.%s", projectId, datasetId, "table1")), BigQueryHelpers.parseTableSpec(String.format("%s:%s.%s", projectId, datasetId, "table2")), BigQueryHelpers.parseTableSpec(String.format("%s:%s.%s", projectId, datasetId, "table3")));
    for (TableReference tableRef : tableRefs) {
        datasetService.createTable(new Table().setTableReference(tableRef));
    }
    // Add one more table to delete that does not actually exist.
    tableRefs.add(BigQueryHelpers.parseTableSpec(String.format("%s:%s.%s", projectId, datasetId, "table4")));
    WriteRename.removeTemporaryTables(datasetService, tableRefs);
    for (TableReference ref : tableRefs) {
        loggedWriteRename.verifyDebug("Deleting table " + toJsonString(ref));
        checkState(datasetService.getTable(ref) == null, "Table " + ref + " was not deleted!");
    }
}
Also used : BigQueryHelpers.createTempTableReference(org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.createTempTableReference) TableReference(com.google.api.services.bigquery.model.TableReference) HashBasedTable(com.google.common.collect.HashBasedTable) Table(com.google.api.services.bigquery.model.Table) BigQueryHelpers.toJsonString(org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.toJsonString) Test(org.junit.Test)

Example 68 with TableReference

use of com.google.api.services.bigquery.model.TableReference in project beam by apache.

the class BigQueryIOTest method testCreateNeverWithStreaming.

@Test
public void testCreateNeverWithStreaming() throws Exception {
    BigQueryOptions options = TestPipeline.testingPipelineOptions().as(BigQueryOptions.class);
    options.setProject("project");
    options.setStreaming(true);
    Pipeline p = TestPipeline.create(options);
    TableReference tableRef = new TableReference();
    tableRef.setDatasetId("dataset");
    tableRef.setTableId("sometable");
    PCollection<TableRow> tableRows = p.apply(GenerateSequence.from(0)).apply(MapElements.via(new SimpleFunction<Long, TableRow>() {

        @Override
        public TableRow apply(Long input) {
            return null;
        }
    })).setCoder(TableRowJsonCoder.of());
    tableRows.apply(BigQueryIO.writeTableRows().to(tableRef).withCreateDisposition(CreateDisposition.CREATE_NEVER).withoutValidation());
}
Also used : BigQueryHelpers.createTempTableReference(org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.createTempTableReference) TableReference(com.google.api.services.bigquery.model.TableReference) TableRow(com.google.api.services.bigquery.model.TableRow) TestPipeline(org.apache.beam.sdk.testing.TestPipeline) Pipeline(org.apache.beam.sdk.Pipeline) Test(org.junit.Test)

Example 69 with TableReference

use of com.google.api.services.bigquery.model.TableReference in project beam by apache.

the class BigQueryIOTest method testWriteRename.

@Test
public void testWriteRename() throws Exception {
    p.enableAbandonedNodeEnforcement(false);
    FakeDatasetService datasetService = new FakeDatasetService();
    FakeBigQueryServices fakeBqServices = new FakeBigQueryServices().withJobService(new FakeJobService()).withDatasetService(datasetService);
    datasetService.createDataset("project-id", "dataset-id", "", "");
    final int numFinalTables = 3;
    final int numTempTablesPerFinalTable = 3;
    final int numRecordsPerTempTable = 10;
    Map<TableDestination, List<TableRow>> expectedRowsPerTable = Maps.newHashMap();
    String jobIdToken = "jobIdToken";
    Map<TableDestination, Iterable<String>> tempTables = Maps.newHashMap();
    for (int i = 0; i < numFinalTables; ++i) {
        String tableName = "project-id:dataset-id.table_" + i;
        TableDestination tableDestination = new TableDestination(tableName, "table_" + i + "_desc");
        List<String> tables = Lists.newArrayList();
        tempTables.put(tableDestination, tables);
        List<TableRow> expectedRows = expectedRowsPerTable.get(tableDestination);
        if (expectedRows == null) {
            expectedRows = Lists.newArrayList();
            expectedRowsPerTable.put(tableDestination, expectedRows);
        }
        for (int j = 0; i < numTempTablesPerFinalTable; ++i) {
            TableReference tempTable = new TableReference().setProjectId("project-id").setDatasetId("dataset-id").setTableId(String.format("%s_%05d_%05d", jobIdToken, i, j));
            datasetService.createTable(new Table().setTableReference(tempTable));
            List<TableRow> rows = Lists.newArrayList();
            for (int k = 0; k < numRecordsPerTempTable; ++k) {
                rows.add(new TableRow().set("number", j * numTempTablesPerFinalTable + k));
            }
            datasetService.insertAll(tempTable, rows, null);
            expectedRows.addAll(rows);
            tables.add(BigQueryHelpers.toJsonString(tempTable));
        }
    }
    PCollection<KV<TableDestination, String>> tempTablesPCollection = p.apply(Create.of(tempTables).withCoder(KvCoder.of(TableDestinationCoder.of(), IterableCoder.of(StringUtf8Coder.of())))).apply(ParDo.of(new DoFn<KV<TableDestination, Iterable<String>>, KV<TableDestination, String>>() {

        @ProcessElement
        public void processElement(ProcessContext c) {
            TableDestination tableDestination = c.element().getKey();
            for (String tempTable : c.element().getValue()) {
                c.output(KV.of(tableDestination, tempTable));
            }
        }
    }));
    PCollectionView<Map<TableDestination, Iterable<String>>> tempTablesView = PCollectionViews.multimapView(tempTablesPCollection, WindowingStrategy.globalDefault(), KvCoder.of(TableDestinationCoder.of(), StringUtf8Coder.of()));
    PCollectionView<String> jobIdTokenView = p.apply("CreateJobId", Create.of("jobId")).apply(View.<String>asSingleton());
    WriteRename writeRename = new WriteRename(fakeBqServices, jobIdTokenView, WriteDisposition.WRITE_EMPTY, CreateDisposition.CREATE_IF_NEEDED, tempTablesView);
    DoFnTester<Void, Void> tester = DoFnTester.of(writeRename);
    tester.setSideInput(tempTablesView, GlobalWindow.INSTANCE, tempTables);
    tester.setSideInput(jobIdTokenView, GlobalWindow.INSTANCE, jobIdToken);
    tester.processElement(null);
    for (Map.Entry<TableDestination, Iterable<String>> entry : tempTables.entrySet()) {
        TableDestination tableDestination = entry.getKey();
        TableReference tableReference = tableDestination.getTableReference();
        Table table = checkNotNull(datasetService.getTable(tableReference));
        assertEquals(tableReference.getTableId() + "_desc", tableDestination.getTableDescription());
        List<TableRow> expectedRows = expectedRowsPerTable.get(tableDestination);
        assertThat(datasetService.getAllRows(tableReference.getProjectId(), tableReference.getDatasetId(), tableReference.getTableId()), containsInAnyOrder(Iterables.toArray(expectedRows, TableRow.class)));
        // Temp tables should be deleted.
        for (String tempTableJson : entry.getValue()) {
            TableReference tempTable = BigQueryHelpers.fromJsonString(tempTableJson, TableReference.class);
            assertEquals(null, datasetService.getTable(tempTable));
        }
    }
}
Also used : BigQueryHelpers.toJsonString(org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.toJsonString) BigQueryHelpers.createTempTableReference(org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.createTempTableReference) TableReference(com.google.api.services.bigquery.model.TableReference) ArrayList(java.util.ArrayList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) HashBasedTable(com.google.common.collect.HashBasedTable) Table(com.google.api.services.bigquery.model.Table) KV(org.apache.beam.sdk.values.KV) DoFn(org.apache.beam.sdk.transforms.DoFn) TableRow(com.google.api.services.bigquery.model.TableRow) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) Test(org.junit.Test)

Example 70 with TableReference

use of com.google.api.services.bigquery.model.TableReference in project beam by apache.

the class BigQueryIOTest method testBigQueryTableSourceThroughJsonAPI.

@Test
public void testBigQueryTableSourceThroughJsonAPI() throws Exception {
    FakeDatasetService datasetService = new FakeDatasetService();
    FakeBigQueryServices fakeBqServices = new FakeBigQueryServices().withJobService(new FakeJobService()).withDatasetService(datasetService);
    List<TableRow> expected = ImmutableList.of(new TableRow().set("name", "a").set("number", "1"), new TableRow().set("name", "b").set("number", "2"), new TableRow().set("name", "c").set("number", "3"), new TableRow().set("name", "d").set("number", "4"), new TableRow().set("name", "e").set("number", "5"), new TableRow().set("name", "f").set("number", "6"));
    TableReference table = BigQueryHelpers.parseTableSpec("project:data_set.table_name");
    datasetService.createDataset(table.getProjectId(), table.getDatasetId(), "", "");
    datasetService.createTable(new Table().setTableReference(table));
    datasetService.insertAll(table, expected, null);
    Path baseDir = Files.createTempDirectory(tempFolder, "testBigQueryTableSourceThroughJsonAPI");
    String stepUuid = "testStepUuid";
    BoundedSource<TableRow> bqSource = BigQueryTableSource.create(stepUuid, StaticValueProvider.of(table), fakeBqServices);
    PipelineOptions options = PipelineOptionsFactory.create();
    options.setTempLocation(baseDir.toString());
    Assert.assertThat(SourceTestUtils.readFromSource(bqSource, options), CoreMatchers.is(expected));
    SourceTestUtils.assertSplitAtFractionBehavior(bqSource, 2, 0.3, ExpectedSplitOutcome.MUST_BE_CONSISTENT_IF_SUCCEEDS, options);
}
Also used : Path(java.nio.file.Path) BigQueryHelpers.createTempTableReference(org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.createTempTableReference) TableReference(com.google.api.services.bigquery.model.TableReference) HashBasedTable(com.google.common.collect.HashBasedTable) Table(com.google.api.services.bigquery.model.Table) PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) TableRow(com.google.api.services.bigquery.model.TableRow) BigQueryHelpers.toJsonString(org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.toJsonString) Test(org.junit.Test)

Aggregations

TableReference (com.google.api.services.bigquery.model.TableReference)139 Test (org.junit.Test)75 TableRow (com.google.api.services.bigquery.model.TableRow)68 Table (com.google.api.services.bigquery.model.Table)61 TableSchema (com.google.api.services.bigquery.model.TableSchema)36 DatasetServiceImpl (org.apache.beam.sdk.io.gcp.bigquery.BigQueryServicesImpl.DatasetServiceImpl)29 TableFieldSchema (com.google.api.services.bigquery.model.TableFieldSchema)22 TableDataInsertAllResponse (com.google.api.services.bigquery.model.TableDataInsertAllResponse)19 FakeBigQueryServices (org.apache.beam.sdk.io.gcp.testing.FakeBigQueryServices)19 MockSleeper (com.google.api.client.testing.util.MockSleeper)17 BigQueryHelpers.createTempTableReference (org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.createTempTableReference)17 FailsafeValueInSingleWindow (org.apache.beam.sdk.values.FailsafeValueInSingleWindow)16 JobStatus (com.google.api.services.bigquery.model.JobStatus)15 ReadSession (com.google.cloud.bigquery.storage.v1.ReadSession)15 BigQueryResourceNaming.createTempTableReference (org.apache.beam.sdk.io.gcp.bigquery.BigQueryResourceNaming.createTempTableReference)15 ErrorProto (com.google.api.services.bigquery.model.ErrorProto)14 JobStatistics (com.google.api.services.bigquery.model.JobStatistics)14 CreateReadSessionRequest (com.google.cloud.bigquery.storage.v1.CreateReadSessionRequest)14 ByteString (com.google.protobuf.ByteString)14 IOException (java.io.IOException)14