Search in sources :

Example 1 with FakeDatasetService

use of org.apache.beam.sdk.io.gcp.testing.FakeDatasetService in project beam by apache.

the class BigQueryIOReadTest method testReadTableWithSchema.

@Test
public void testReadTableWithSchema() throws IOException, InterruptedException {
    // setup
    Table someTable = new Table();
    someTable.setSchema(new TableSchema().setFields(ImmutableList.of(new TableFieldSchema().setName("name").setType("STRING"), new TableFieldSchema().setName("number").setType("INTEGER"))));
    someTable.setTableReference(new TableReference().setProjectId("non-executing-project").setDatasetId("schema_dataset").setTableId("schema_table"));
    someTable.setNumBytes(1024L * 1024L);
    FakeDatasetService fakeDatasetService = new FakeDatasetService();
    fakeDatasetService.createDataset("non-executing-project", "schema_dataset", "", "", null);
    fakeDatasetService.createTable(someTable);
    List<TableRow> records = Lists.newArrayList(new TableRow().set("name", "a").set("number", 1L), new TableRow().set("name", "b").set("number", 2L), new TableRow().set("name", "c").set("number", 3L));
    fakeDatasetService.insertAll(someTable.getTableReference(), records, null);
    FakeBigQueryServices fakeBqServices = new FakeBigQueryServices().withJobService(new FakeJobService()).withDatasetService(fakeDatasetService);
    // test
    BigQueryIO.TypedRead<TableRow> read = BigQueryIO.readTableRowsWithSchema().from("non-executing-project:schema_dataset.schema_table").withTestServices(fakeBqServices).withoutValidation();
    PCollection<TableRow> bqRows = p.apply(read);
    Schema expectedSchema = Schema.of(Schema.Field.of("name", Schema.FieldType.STRING).withNullable(true), Schema.Field.of("number", Schema.FieldType.INT64).withNullable(true));
    assertEquals(expectedSchema, bqRows.getSchema());
    PCollection<Row> output = bqRows.apply(Select.fieldNames("name", "number"));
    PAssert.that(output).containsInAnyOrder(ImmutableList.of(Row.withSchema(expectedSchema).addValues("a", 1L).build(), Row.withSchema(expectedSchema).addValues("b", 2L).build(), Row.withSchema(expectedSchema).addValues("c", 3L).build()));
    p.run();
}
Also used : Table(com.google.api.services.bigquery.model.Table) TableSchema(com.google.api.services.bigquery.model.TableSchema) TableSchema(com.google.api.services.bigquery.model.TableSchema) TableFieldSchema(com.google.api.services.bigquery.model.TableFieldSchema) Schema(org.apache.beam.sdk.schemas.Schema) TableFieldSchema(com.google.api.services.bigquery.model.TableFieldSchema) TableReference(com.google.api.services.bigquery.model.TableReference) BigQueryResourceNaming.createTempTableReference(org.apache.beam.sdk.io.gcp.bigquery.BigQueryResourceNaming.createTempTableReference) FakeDatasetService(org.apache.beam.sdk.io.gcp.testing.FakeDatasetService) FakeJobService(org.apache.beam.sdk.io.gcp.testing.FakeJobService) TableRow(com.google.api.services.bigquery.model.TableRow) FakeBigQueryServices(org.apache.beam.sdk.io.gcp.testing.FakeBigQueryServices) TableRow(com.google.api.services.bigquery.model.TableRow) Row(org.apache.beam.sdk.values.Row) Test(org.junit.Test)

Example 2 with FakeDatasetService

use of org.apache.beam.sdk.io.gcp.testing.FakeDatasetService in project beam by apache.

the class BigQueryIOReadTest method testReadFromTable.

private void testReadFromTable(boolean useTemplateCompatibility, boolean useReadTableRows) throws IOException, InterruptedException {
    Table sometable = new Table();
    sometable.setSchema(new TableSchema().setFields(ImmutableList.of(new TableFieldSchema().setName("name").setType("STRING"), new TableFieldSchema().setName("number").setType("INTEGER"))));
    sometable.setTableReference(new TableReference().setProjectId("non-executing-project").setDatasetId("somedataset").setTableId("sometable"));
    sometable.setNumBytes(1024L * 1024L);
    FakeDatasetService fakeDatasetService = new FakeDatasetService();
    fakeDatasetService.createDataset("non-executing-project", "somedataset", "", "", null);
    fakeDatasetService.createTable(sometable);
    List<TableRow> records = Lists.newArrayList(new TableRow().set("name", "a").set("number", 1L), new TableRow().set("name", "b").set("number", 2L), new TableRow().set("name", "c").set("number", 3L));
    fakeDatasetService.insertAll(sometable.getTableReference(), records, null);
    FakeBigQueryServices fakeBqServices = new FakeBigQueryServices().withJobService(new FakeJobService()).withDatasetService(fakeDatasetService);
    PTransform<PBegin, PCollection<TableRow>> readTransform;
    if (useReadTableRows) {
        BigQueryIO.Read read = BigQueryIO.read().from("non-executing-project:somedataset.sometable").withTestServices(fakeBqServices).withoutValidation();
        readTransform = useTemplateCompatibility ? read.withTemplateCompatibility() : read;
    } else {
        BigQueryIO.TypedRead<TableRow> read = BigQueryIO.readTableRows().from("non-executing-project:somedataset.sometable").withTestServices(fakeBqServices).withoutValidation();
        readTransform = useTemplateCompatibility ? read.withTemplateCompatibility() : read;
    }
    PCollection<KV<String, Long>> output = p.apply(readTransform).apply(ParDo.of(new DoFn<TableRow, KV<String, Long>>() {

        @ProcessElement
        public void processElement(ProcessContext c) throws Exception {
            c.output(KV.of((String) c.element().get("name"), Long.valueOf((String) c.element().get("number"))));
        }
    }));
    PAssert.that(output).containsInAnyOrder(ImmutableList.of(KV.of("a", 1L), KV.of("b", 2L), KV.of("c", 3L)));
    p.run();
}
Also used : Table(com.google.api.services.bigquery.model.Table) TableSchema(com.google.api.services.bigquery.model.TableSchema) KV(org.apache.beam.sdk.values.KV) ByteString(com.google.protobuf.ByteString) PBegin(org.apache.beam.sdk.values.PBegin) TableFieldSchema(com.google.api.services.bigquery.model.TableFieldSchema) PCollection(org.apache.beam.sdk.values.PCollection) TableReference(com.google.api.services.bigquery.model.TableReference) BigQueryResourceNaming.createTempTableReference(org.apache.beam.sdk.io.gcp.bigquery.BigQueryResourceNaming.createTempTableReference) FakeDatasetService(org.apache.beam.sdk.io.gcp.testing.FakeDatasetService) DoFn(org.apache.beam.sdk.transforms.DoFn) FakeJobService(org.apache.beam.sdk.io.gcp.testing.FakeJobService) TableRow(com.google.api.services.bigquery.model.TableRow) FakeBigQueryServices(org.apache.beam.sdk.io.gcp.testing.FakeBigQueryServices)

Example 3 with FakeDatasetService

use of org.apache.beam.sdk.io.gcp.testing.FakeDatasetService in project beam by apache.

the class BigQueryIOWriteTest method testRemoveTemporaryTables.

@Test
public void testRemoveTemporaryTables() throws Exception {
    FakeDatasetService datasetService = new FakeDatasetService();
    String projectId = "project";
    String datasetId = "dataset";
    datasetService.createDataset(projectId, datasetId, "", "", null);
    List<TableReference> tableRefs = Lists.newArrayList(BigQueryHelpers.parseTableSpec(String.format("%s:%s.%s", projectId, datasetId, "table1")), BigQueryHelpers.parseTableSpec(String.format("%s:%s.%s", projectId, datasetId, "table2")), BigQueryHelpers.parseTableSpec(String.format("%s:%s.%s", projectId, datasetId, "table3")));
    for (TableReference tableRef : tableRefs) {
        datasetService.createTable(new Table().setTableReference(tableRef));
    }
    // Add one more table to delete that does not actually exist.
    tableRefs.add(BigQueryHelpers.parseTableSpec(String.format("%s:%s.%s", projectId, datasetId, "table4")));
    WriteRename.removeTemporaryTables(datasetService, tableRefs);
    for (TableReference ref : tableRefs) {
        loggedWriteRename.verifyDebug("Deleting table " + toJsonString(ref));
        checkState(datasetService.getTable(ref) == null, "Table " + ref + " was not deleted!");
    }
}
Also used : TableReference(com.google.api.services.bigquery.model.TableReference) FakeDatasetService(org.apache.beam.sdk.io.gcp.testing.FakeDatasetService) Table(com.google.api.services.bigquery.model.Table) BigQueryHelpers.toJsonString(org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.toJsonString) Test(org.junit.Test)

Aggregations

Table (com.google.api.services.bigquery.model.Table)3 TableReference (com.google.api.services.bigquery.model.TableReference)3 FakeDatasetService (org.apache.beam.sdk.io.gcp.testing.FakeDatasetService)3 TableFieldSchema (com.google.api.services.bigquery.model.TableFieldSchema)2 TableRow (com.google.api.services.bigquery.model.TableRow)2 TableSchema (com.google.api.services.bigquery.model.TableSchema)2 BigQueryResourceNaming.createTempTableReference (org.apache.beam.sdk.io.gcp.bigquery.BigQueryResourceNaming.createTempTableReference)2 FakeBigQueryServices (org.apache.beam.sdk.io.gcp.testing.FakeBigQueryServices)2 FakeJobService (org.apache.beam.sdk.io.gcp.testing.FakeJobService)2 Test (org.junit.Test)2 ByteString (com.google.protobuf.ByteString)1 BigQueryHelpers.toJsonString (org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.toJsonString)1 Schema (org.apache.beam.sdk.schemas.Schema)1 DoFn (org.apache.beam.sdk.transforms.DoFn)1 KV (org.apache.beam.sdk.values.KV)1 PBegin (org.apache.beam.sdk.values.PBegin)1 PCollection (org.apache.beam.sdk.values.PCollection)1 Row (org.apache.beam.sdk.values.Row)1