Search in sources :

Example 16 with TableSchema

use of com.google.api.services.bigquery.model.TableSchema in project beam by apache.

the class FilterExamples method buildWeatherSchemaProjection.

/**
   * Helper method to build the table schema for the output table.
   */
private static TableSchema buildWeatherSchemaProjection() {
    List<TableFieldSchema> fields = new ArrayList<>();
    fields.add(new TableFieldSchema().setName("year").setType("INTEGER"));
    fields.add(new TableFieldSchema().setName("month").setType("INTEGER"));
    fields.add(new TableFieldSchema().setName("day").setType("INTEGER"));
    fields.add(new TableFieldSchema().setName("mean_temp").setType("FLOAT"));
    TableSchema schema = new TableSchema().setFields(fields);
    return schema;
}
Also used : TableSchema(com.google.api.services.bigquery.model.TableSchema) ArrayList(java.util.ArrayList) TableFieldSchema(com.google.api.services.bigquery.model.TableFieldSchema)

Example 17 with TableSchema

use of com.google.api.services.bigquery.model.TableSchema in project beam by apache.

the class BigQueryIOTest method testReadFromTable.

@Test
public void testReadFromTable() throws IOException, InterruptedException {
    BigQueryOptions bqOptions = TestPipeline.testingPipelineOptions().as(BigQueryOptions.class);
    bqOptions.setProject("defaultproject");
    bqOptions.setTempLocation(testFolder.newFolder("BigQueryIOTest").getAbsolutePath());
    Job job = new Job();
    JobStatus status = new JobStatus();
    job.setStatus(status);
    JobStatistics jobStats = new JobStatistics();
    job.setStatistics(jobStats);
    JobStatistics4 extract = new JobStatistics4();
    jobStats.setExtract(extract);
    extract.setDestinationUriFileCounts(ImmutableList.of(1L));
    Table sometable = new Table();
    sometable.setSchema(new TableSchema().setFields(ImmutableList.of(new TableFieldSchema().setName("name").setType("STRING"), new TableFieldSchema().setName("number").setType("INTEGER"))));
    sometable.setTableReference(new TableReference().setProjectId("non-executing-project").setDatasetId("somedataset").setTableId("sometable"));
    sometable.setNumBytes(1024L * 1024L);
    FakeDatasetService fakeDatasetService = new FakeDatasetService();
    fakeDatasetService.createDataset("non-executing-project", "somedataset", "", "");
    fakeDatasetService.createTable(sometable);
    List<TableRow> records = Lists.newArrayList(new TableRow().set("name", "a").set("number", 1L), new TableRow().set("name", "b").set("number", 2L), new TableRow().set("name", "c").set("number", 3L));
    fakeDatasetService.insertAll(sometable.getTableReference(), records, null);
    FakeBigQueryServices fakeBqServices = new FakeBigQueryServices().withJobService(new FakeJobService()).withDatasetService(fakeDatasetService);
    Pipeline p = TestPipeline.create(bqOptions);
    PCollection<KV<String, Long>> output = p.apply(BigQueryIO.read().from("non-executing-project:somedataset.sometable").withTestServices(fakeBqServices).withoutValidation()).apply(ParDo.of(new DoFn<TableRow, KV<String, Long>>() {

        @ProcessElement
        public void processElement(ProcessContext c) throws Exception {
            c.output(KV.of((String) c.element().get("name"), Long.valueOf((String) c.element().get("number"))));
        }
    }));
    PAssert.that(output).containsInAnyOrder(ImmutableList.of(KV.of("a", 1L), KV.of("b", 2L), KV.of("c", 3L)));
    p.run();
}
Also used : JobStatistics(com.google.api.services.bigquery.model.JobStatistics) HashBasedTable(com.google.common.collect.HashBasedTable) Table(com.google.api.services.bigquery.model.Table) JobStatistics4(com.google.api.services.bigquery.model.JobStatistics4) TableSchema(com.google.api.services.bigquery.model.TableSchema) JsonSchemaToTableSchema(org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.JsonSchemaToTableSchema) BigQueryHelpers.toJsonString(org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.toJsonString) KV(org.apache.beam.sdk.values.KV) TableFieldSchema(com.google.api.services.bigquery.model.TableFieldSchema) TestPipeline(org.apache.beam.sdk.testing.TestPipeline) Pipeline(org.apache.beam.sdk.Pipeline) JobStatus(com.google.api.services.bigquery.model.JobStatus) BigQueryHelpers.createTempTableReference(org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.createTempTableReference) TableReference(com.google.api.services.bigquery.model.TableReference) DoFn(org.apache.beam.sdk.transforms.DoFn) TableRow(com.google.api.services.bigquery.model.TableRow) Job(com.google.api.services.bigquery.model.Job) Test(org.junit.Test)

Example 18 with TableSchema

use of com.google.api.services.bigquery.model.TableSchema in project beam by apache.

the class BigQueryIOTest method writeDynamicDestinations.

public void writeDynamicDestinations(boolean streaming) throws Exception {
    BigQueryOptions bqOptions = TestPipeline.testingPipelineOptions().as(BigQueryOptions.class);
    bqOptions.setProject("project-id");
    bqOptions.setTempLocation(testFolder.newFolder("BigQueryIOTest").getAbsolutePath());
    FakeDatasetService datasetService = new FakeDatasetService();
    FakeBigQueryServices fakeBqServices = new FakeBigQueryServices().withJobService(new FakeJobService()).withDatasetService(datasetService);
    datasetService.createDataset("project-id", "dataset-id", "", "");
    final Pattern userPattern = Pattern.compile("([a-z]+)([0-9]+)");
    Pipeline p = TestPipeline.create(bqOptions);
    final PCollectionView<List<String>> sideInput1 = p.apply("Create SideInput 1", Create.of("a", "b", "c").withCoder(StringUtf8Coder.of())).apply("asList", View.<String>asList());
    final PCollectionView<Map<String, String>> sideInput2 = p.apply("Create SideInput2", Create.of(KV.of("a", "a"), KV.of("b", "b"), KV.of("c", "c"))).apply("AsMap", View.<String, String>asMap());
    final List<String> allUsernames = ImmutableList.of("bill", "bob", "randolph");
    List<String> userList = Lists.newArrayList();
    // WriteGroupedRecordsToFiles.
    for (int i = 0; i < BatchLoads.DEFAULT_MAX_NUM_WRITERS_PER_BUNDLE * 10; ++i) {
        // Every user has 10 nicknames.
        for (int j = 0; j < 1; ++j) {
            String nickname = allUsernames.get(ThreadLocalRandom.current().nextInt(allUsernames.size()));
            userList.add(nickname + i);
        }
    }
    PCollection<String> users = p.apply("CreateUsers", Create.of(userList)).apply(Window.into(new PartitionedGlobalWindows<>(new SerializableFunction<String, String>() {

        @Override
        public String apply(String arg) {
            return arg;
        }
    })));
    if (streaming) {
        users = users.setIsBoundedInternal(PCollection.IsBounded.UNBOUNDED);
    }
    users.apply("WriteBigQuery", BigQueryIO.<String>write().withTestServices(fakeBqServices).withMaxFilesPerBundle(5).withMaxFileSize(10).withCreateDisposition(CreateDisposition.CREATE_IF_NEEDED).withFormatFunction(new SerializableFunction<String, TableRow>() {

        @Override
        public TableRow apply(String user) {
            Matcher matcher = userPattern.matcher(user);
            if (matcher.matches()) {
                return new TableRow().set("name", matcher.group(1)).set("id", Integer.valueOf(matcher.group(2)));
            }
            throw new RuntimeException("Unmatching element " + user);
        }
    }).to(new StringIntegerDestinations() {

        @Override
        public Integer getDestination(ValueInSingleWindow<String> element) {
            assertThat(element.getWindow(), Matchers.instanceOf(PartitionedGlobalWindow.class));
            Matcher matcher = userPattern.matcher(element.getValue());
            if (matcher.matches()) {
                // a table.
                return Integer.valueOf(matcher.group(2));
            }
            throw new RuntimeException("Unmatching destination " + element.getValue());
        }

        @Override
        public TableDestination getTable(Integer userId) {
            verifySideInputs();
            // Each user in it's own table.
            return new TableDestination("dataset-id.userid-" + userId, "table for userid " + userId);
        }

        @Override
        public TableSchema getSchema(Integer userId) {
            verifySideInputs();
            return new TableSchema().setFields(ImmutableList.of(new TableFieldSchema().setName("name").setType("STRING"), new TableFieldSchema().setName("id").setType("INTEGER")));
        }

        @Override
        public List<PCollectionView<?>> getSideInputs() {
            return ImmutableList.of(sideInput1, sideInput2);
        }

        private void verifySideInputs() {
            assertThat(sideInput(sideInput1), containsInAnyOrder("a", "b", "c"));
            Map<String, String> mapSideInput = sideInput(sideInput2);
            assertEquals(3, mapSideInput.size());
            assertThat(mapSideInput, allOf(hasEntry("a", "a"), hasEntry("b", "b"), hasEntry("c", "c")));
        }
    }).withoutValidation());
    p.run();
    File tempDir = new File(bqOptions.getTempLocation());
    testNumFiles(tempDir, 0);
    Map<Integer, List<TableRow>> expectedTableRows = Maps.newHashMap();
    for (int i = 0; i < userList.size(); ++i) {
        Matcher matcher = userPattern.matcher(userList.get(i));
        checkState(matcher.matches());
        String nickname = matcher.group(1);
        int userid = Integer.valueOf(matcher.group(2));
        List<TableRow> expected = expectedTableRows.get(userid);
        if (expected == null) {
            expected = Lists.newArrayList();
            expectedTableRows.put(userid, expected);
        }
        expected.add(new TableRow().set("name", nickname).set("id", userid));
    }
    for (Map.Entry<Integer, List<TableRow>> entry : expectedTableRows.entrySet()) {
        assertThat(datasetService.getAllRows("project-id", "dataset-id", "userid-" + entry.getKey()), containsInAnyOrder(Iterables.toArray(entry.getValue(), TableRow.class)));
    }
}
Also used : SerializableFunction(org.apache.beam.sdk.transforms.SerializableFunction) TableSchema(com.google.api.services.bigquery.model.TableSchema) JsonSchemaToTableSchema(org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.JsonSchemaToTableSchema) Matcher(java.util.regex.Matcher) BigQueryHelpers.toJsonString(org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.toJsonString) TableFieldSchema(com.google.api.services.bigquery.model.TableFieldSchema) ArrayList(java.util.ArrayList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) Pattern(java.util.regex.Pattern) TestPipeline(org.apache.beam.sdk.testing.TestPipeline) Pipeline(org.apache.beam.sdk.Pipeline) TableRow(com.google.api.services.bigquery.model.TableRow) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) File(java.io.File)

Example 19 with TableSchema

use of com.google.api.services.bigquery.model.TableSchema in project beam by apache.

the class BigQueryIOTest method testBuildWriteDisplayData.

@Test
public void testBuildWriteDisplayData() {
    String tableSpec = "project:dataset.table";
    TableSchema schema = new TableSchema().set("col1", "type1").set("col2", "type2");
    final String tblDescription = "foo bar table";
    BigQueryIO.Write<TableRow> write = BigQueryIO.writeTableRows().to(tableSpec).withSchema(schema).withCreateDisposition(CreateDisposition.CREATE_IF_NEEDED).withWriteDisposition(WriteDisposition.WRITE_APPEND).withTableDescription(tblDescription).withoutValidation();
    DisplayData displayData = DisplayData.from(write);
    assertThat(displayData, hasDisplayItem("table"));
    assertThat(displayData, hasDisplayItem("schema"));
    assertThat(displayData, hasDisplayItem("createDisposition", CreateDisposition.CREATE_IF_NEEDED.toString()));
    assertThat(displayData, hasDisplayItem("writeDisposition", WriteDisposition.WRITE_APPEND.toString()));
    assertThat(displayData, hasDisplayItem("tableDescription", tblDescription));
    assertThat(displayData, hasDisplayItem("validation", false));
}
Also used : TableSchema(com.google.api.services.bigquery.model.TableSchema) JsonSchemaToTableSchema(org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.JsonSchemaToTableSchema) TableRow(com.google.api.services.bigquery.model.TableRow) BigQueryHelpers.toJsonString(org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.toJsonString) DisplayData(org.apache.beam.sdk.transforms.display.DisplayData) Test(org.junit.Test)

Example 20 with TableSchema

use of com.google.api.services.bigquery.model.TableSchema in project beam by apache.

the class BigQueryIOTest method testStreamingWrite.

@Test
public void testStreamingWrite() throws Exception {
    BigQueryOptions bqOptions = TestPipeline.testingPipelineOptions().as(BigQueryOptions.class);
    bqOptions.setProject("defaultproject");
    bqOptions.setTempLocation(testFolder.newFolder("BigQueryIOTest").getAbsolutePath());
    FakeDatasetService datasetService = new FakeDatasetService();
    datasetService.createDataset("project-id", "dataset-id", "", "");
    FakeBigQueryServices fakeBqServices = new FakeBigQueryServices().withDatasetService(datasetService);
    Pipeline p = TestPipeline.create(bqOptions);
    p.apply(Create.of(new TableRow().set("name", "a").set("number", 1), new TableRow().set("name", "b").set("number", 2), new TableRow().set("name", "c").set("number", 3), new TableRow().set("name", "d").set("number", 4)).withCoder(TableRowJsonCoder.of())).setIsBoundedInternal(PCollection.IsBounded.UNBOUNDED).apply(BigQueryIO.writeTableRows().to("project-id:dataset-id.table-id").withCreateDisposition(CreateDisposition.CREATE_IF_NEEDED).withSchema(new TableSchema().setFields(ImmutableList.of(new TableFieldSchema().setName("name").setType("STRING"), new TableFieldSchema().setName("number").setType("INTEGER")))).withTestServices(fakeBqServices).withoutValidation());
    p.run();
    assertThat(datasetService.getAllRows("project-id", "dataset-id", "table-id"), containsInAnyOrder(new TableRow().set("name", "a").set("number", 1), new TableRow().set("name", "b").set("number", 2), new TableRow().set("name", "c").set("number", 3), new TableRow().set("name", "d").set("number", 4)));
}
Also used : TableSchema(com.google.api.services.bigquery.model.TableSchema) JsonSchemaToTableSchema(org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.JsonSchemaToTableSchema) TableRow(com.google.api.services.bigquery.model.TableRow) TableFieldSchema(com.google.api.services.bigquery.model.TableFieldSchema) TestPipeline(org.apache.beam.sdk.testing.TestPipeline) Pipeline(org.apache.beam.sdk.Pipeline) Test(org.junit.Test)

Aggregations

TableSchema (com.google.api.services.bigquery.model.TableSchema)31 TableFieldSchema (com.google.api.services.bigquery.model.TableFieldSchema)20 TableRow (com.google.api.services.bigquery.model.TableRow)18 JsonSchemaToTableSchema (org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.JsonSchemaToTableSchema)13 Test (org.junit.Test)13 TableReference (com.google.api.services.bigquery.model.TableReference)12 Pipeline (org.apache.beam.sdk.Pipeline)12 ArrayList (java.util.ArrayList)10 BigQueryHelpers.toJsonString (org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.toJsonString)9 TestPipeline (org.apache.beam.sdk.testing.TestPipeline)8 Table (com.google.api.services.bigquery.model.Table)7 BigQueryHelpers.createTempTableReference (org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.createTempTableReference)7 PipelineOptions (org.apache.beam.sdk.options.PipelineOptions)7 HashBasedTable (com.google.common.collect.HashBasedTable)6 JobStatus (com.google.api.services.bigquery.model.JobStatus)5 JobStatistics (com.google.api.services.bigquery.model.JobStatistics)4 JobStatistics4 (com.google.api.services.bigquery.model.JobStatistics4)4 Path (java.nio.file.Path)4 Map (java.util.Map)4 Job (com.google.api.services.bigquery.model.Job)3