Search in sources :

Example 21 with TableSchema

use of com.google.api.services.bigquery.model.TableSchema in project beam by apache.

the class FakeJobService method runExtractJob.

private JobStatus runExtractJob(Job job, JobConfigurationExtract extract) throws InterruptedException, IOException {
    TableReference sourceTable = extract.getSourceTable();
    List<TableRow> rows = datasetService.getAllRows(sourceTable.getProjectId(), sourceTable.getDatasetId(), sourceTable.getTableId());
    TableSchema schema = datasetService.getTable(sourceTable).getSchema();
    List<Long> destinationFileCounts = Lists.newArrayList();
    for (String destination : extract.getDestinationUris()) {
        destinationFileCounts.add(writeRows(sourceTable.getTableId(), rows, schema, destination));
    }
    job.setStatistics(new JobStatistics().setExtract(new JobStatistics4().setDestinationUriFileCounts(destinationFileCounts)));
    return new JobStatus().setState("DONE");
}
Also used : JobStatus(com.google.api.services.bigquery.model.JobStatus) JobStatistics(com.google.api.services.bigquery.model.JobStatistics) TableReference(com.google.api.services.bigquery.model.TableReference) TableSchema(com.google.api.services.bigquery.model.TableSchema) JobStatistics4(com.google.api.services.bigquery.model.JobStatistics4) TableRow(com.google.api.services.bigquery.model.TableRow)

Example 22 with TableSchema

use of com.google.api.services.bigquery.model.TableSchema in project beam by apache.

the class CombinePerKeyExamples method main.

public static void main(String[] args) throws Exception {
    Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
    Pipeline p = Pipeline.create(options);
    // Build the table schema for the output table.
    List<TableFieldSchema> fields = new ArrayList<>();
    fields.add(new TableFieldSchema().setName("word").setType("STRING"));
    fields.add(new TableFieldSchema().setName("all_plays").setType("STRING"));
    TableSchema schema = new TableSchema().setFields(fields);
    p.apply(BigQueryIO.read().from(options.getInput())).apply(new PlaysForWord()).apply(BigQueryIO.writeTableRows().to(options.getOutput()).withSchema(schema).withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED).withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_TRUNCATE));
    p.run().waitUntilFinish();
}
Also used : PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) TableSchema(com.google.api.services.bigquery.model.TableSchema) ArrayList(java.util.ArrayList) TableFieldSchema(com.google.api.services.bigquery.model.TableFieldSchema) Pipeline(org.apache.beam.sdk.Pipeline)

Example 23 with TableSchema

use of com.google.api.services.bigquery.model.TableSchema in project beam by apache.

the class FilterExamples method buildWeatherSchemaProjection.

/**
   * Helper method to build the table schema for the output table.
   */
private static TableSchema buildWeatherSchemaProjection() {
    List<TableFieldSchema> fields = new ArrayList<>();
    fields.add(new TableFieldSchema().setName("year").setType("INTEGER"));
    fields.add(new TableFieldSchema().setName("month").setType("INTEGER"));
    fields.add(new TableFieldSchema().setName("day").setType("INTEGER"));
    fields.add(new TableFieldSchema().setName("mean_temp").setType("FLOAT"));
    TableSchema schema = new TableSchema().setFields(fields);
    return schema;
}
Also used : TableSchema(com.google.api.services.bigquery.model.TableSchema) ArrayList(java.util.ArrayList) TableFieldSchema(com.google.api.services.bigquery.model.TableFieldSchema)

Example 24 with TableSchema

use of com.google.api.services.bigquery.model.TableSchema in project beam by apache.

the class BigQueryIOTest method testReadFromTable.

@Test
public void testReadFromTable() throws IOException, InterruptedException {
    BigQueryOptions bqOptions = TestPipeline.testingPipelineOptions().as(BigQueryOptions.class);
    bqOptions.setProject("defaultproject");
    bqOptions.setTempLocation(testFolder.newFolder("BigQueryIOTest").getAbsolutePath());
    Job job = new Job();
    JobStatus status = new JobStatus();
    job.setStatus(status);
    JobStatistics jobStats = new JobStatistics();
    job.setStatistics(jobStats);
    JobStatistics4 extract = new JobStatistics4();
    jobStats.setExtract(extract);
    extract.setDestinationUriFileCounts(ImmutableList.of(1L));
    Table sometable = new Table();
    sometable.setSchema(new TableSchema().setFields(ImmutableList.of(new TableFieldSchema().setName("name").setType("STRING"), new TableFieldSchema().setName("number").setType("INTEGER"))));
    sometable.setTableReference(new TableReference().setProjectId("non-executing-project").setDatasetId("somedataset").setTableId("sometable"));
    sometable.setNumBytes(1024L * 1024L);
    FakeDatasetService fakeDatasetService = new FakeDatasetService();
    fakeDatasetService.createDataset("non-executing-project", "somedataset", "", "");
    fakeDatasetService.createTable(sometable);
    List<TableRow> records = Lists.newArrayList(new TableRow().set("name", "a").set("number", 1L), new TableRow().set("name", "b").set("number", 2L), new TableRow().set("name", "c").set("number", 3L));
    fakeDatasetService.insertAll(sometable.getTableReference(), records, null);
    FakeBigQueryServices fakeBqServices = new FakeBigQueryServices().withJobService(new FakeJobService()).withDatasetService(fakeDatasetService);
    Pipeline p = TestPipeline.create(bqOptions);
    PCollection<KV<String, Long>> output = p.apply(BigQueryIO.read().from("non-executing-project:somedataset.sometable").withTestServices(fakeBqServices).withoutValidation()).apply(ParDo.of(new DoFn<TableRow, KV<String, Long>>() {

        @ProcessElement
        public void processElement(ProcessContext c) throws Exception {
            c.output(KV.of((String) c.element().get("name"), Long.valueOf((String) c.element().get("number"))));
        }
    }));
    PAssert.that(output).containsInAnyOrder(ImmutableList.of(KV.of("a", 1L), KV.of("b", 2L), KV.of("c", 3L)));
    p.run();
}
Also used : JobStatistics(com.google.api.services.bigquery.model.JobStatistics) HashBasedTable(com.google.common.collect.HashBasedTable) Table(com.google.api.services.bigquery.model.Table) JobStatistics4(com.google.api.services.bigquery.model.JobStatistics4) TableSchema(com.google.api.services.bigquery.model.TableSchema) JsonSchemaToTableSchema(org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.JsonSchemaToTableSchema) BigQueryHelpers.toJsonString(org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.toJsonString) KV(org.apache.beam.sdk.values.KV) TableFieldSchema(com.google.api.services.bigquery.model.TableFieldSchema) TestPipeline(org.apache.beam.sdk.testing.TestPipeline) Pipeline(org.apache.beam.sdk.Pipeline) JobStatus(com.google.api.services.bigquery.model.JobStatus) BigQueryHelpers.createTempTableReference(org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.createTempTableReference) TableReference(com.google.api.services.bigquery.model.TableReference) DoFn(org.apache.beam.sdk.transforms.DoFn) TableRow(com.google.api.services.bigquery.model.TableRow) Job(com.google.api.services.bigquery.model.Job) Test(org.junit.Test)

Example 25 with TableSchema

use of com.google.api.services.bigquery.model.TableSchema in project beam by apache.

the class BigQueryIOTest method writeDynamicDestinations.

public void writeDynamicDestinations(boolean streaming) throws Exception {
    BigQueryOptions bqOptions = TestPipeline.testingPipelineOptions().as(BigQueryOptions.class);
    bqOptions.setProject("project-id");
    bqOptions.setTempLocation(testFolder.newFolder("BigQueryIOTest").getAbsolutePath());
    FakeDatasetService datasetService = new FakeDatasetService();
    FakeBigQueryServices fakeBqServices = new FakeBigQueryServices().withJobService(new FakeJobService()).withDatasetService(datasetService);
    datasetService.createDataset("project-id", "dataset-id", "", "");
    final Pattern userPattern = Pattern.compile("([a-z]+)([0-9]+)");
    Pipeline p = TestPipeline.create(bqOptions);
    final PCollectionView<List<String>> sideInput1 = p.apply("Create SideInput 1", Create.of("a", "b", "c").withCoder(StringUtf8Coder.of())).apply("asList", View.<String>asList());
    final PCollectionView<Map<String, String>> sideInput2 = p.apply("Create SideInput2", Create.of(KV.of("a", "a"), KV.of("b", "b"), KV.of("c", "c"))).apply("AsMap", View.<String, String>asMap());
    final List<String> allUsernames = ImmutableList.of("bill", "bob", "randolph");
    List<String> userList = Lists.newArrayList();
    // WriteGroupedRecordsToFiles.
    for (int i = 0; i < BatchLoads.DEFAULT_MAX_NUM_WRITERS_PER_BUNDLE * 10; ++i) {
        // Every user has 10 nicknames.
        for (int j = 0; j < 1; ++j) {
            String nickname = allUsernames.get(ThreadLocalRandom.current().nextInt(allUsernames.size()));
            userList.add(nickname + i);
        }
    }
    PCollection<String> users = p.apply("CreateUsers", Create.of(userList)).apply(Window.into(new PartitionedGlobalWindows<>(new SerializableFunction<String, String>() {

        @Override
        public String apply(String arg) {
            return arg;
        }
    })));
    if (streaming) {
        users = users.setIsBoundedInternal(PCollection.IsBounded.UNBOUNDED);
    }
    users.apply("WriteBigQuery", BigQueryIO.<String>write().withTestServices(fakeBqServices).withMaxFilesPerBundle(5).withMaxFileSize(10).withCreateDisposition(CreateDisposition.CREATE_IF_NEEDED).withFormatFunction(new SerializableFunction<String, TableRow>() {

        @Override
        public TableRow apply(String user) {
            Matcher matcher = userPattern.matcher(user);
            if (matcher.matches()) {
                return new TableRow().set("name", matcher.group(1)).set("id", Integer.valueOf(matcher.group(2)));
            }
            throw new RuntimeException("Unmatching element " + user);
        }
    }).to(new StringIntegerDestinations() {

        @Override
        public Integer getDestination(ValueInSingleWindow<String> element) {
            assertThat(element.getWindow(), Matchers.instanceOf(PartitionedGlobalWindow.class));
            Matcher matcher = userPattern.matcher(element.getValue());
            if (matcher.matches()) {
                // a table.
                return Integer.valueOf(matcher.group(2));
            }
            throw new RuntimeException("Unmatching destination " + element.getValue());
        }

        @Override
        public TableDestination getTable(Integer userId) {
            verifySideInputs();
            // Each user in it's own table.
            return new TableDestination("dataset-id.userid-" + userId, "table for userid " + userId);
        }

        @Override
        public TableSchema getSchema(Integer userId) {
            verifySideInputs();
            return new TableSchema().setFields(ImmutableList.of(new TableFieldSchema().setName("name").setType("STRING"), new TableFieldSchema().setName("id").setType("INTEGER")));
        }

        @Override
        public List<PCollectionView<?>> getSideInputs() {
            return ImmutableList.of(sideInput1, sideInput2);
        }

        private void verifySideInputs() {
            assertThat(sideInput(sideInput1), containsInAnyOrder("a", "b", "c"));
            Map<String, String> mapSideInput = sideInput(sideInput2);
            assertEquals(3, mapSideInput.size());
            assertThat(mapSideInput, allOf(hasEntry("a", "a"), hasEntry("b", "b"), hasEntry("c", "c")));
        }
    }).withoutValidation());
    p.run();
    File tempDir = new File(bqOptions.getTempLocation());
    testNumFiles(tempDir, 0);
    Map<Integer, List<TableRow>> expectedTableRows = Maps.newHashMap();
    for (int i = 0; i < userList.size(); ++i) {
        Matcher matcher = userPattern.matcher(userList.get(i));
        checkState(matcher.matches());
        String nickname = matcher.group(1);
        int userid = Integer.valueOf(matcher.group(2));
        List<TableRow> expected = expectedTableRows.get(userid);
        if (expected == null) {
            expected = Lists.newArrayList();
            expectedTableRows.put(userid, expected);
        }
        expected.add(new TableRow().set("name", nickname).set("id", userid));
    }
    for (Map.Entry<Integer, List<TableRow>> entry : expectedTableRows.entrySet()) {
        assertThat(datasetService.getAllRows("project-id", "dataset-id", "userid-" + entry.getKey()), containsInAnyOrder(Iterables.toArray(entry.getValue(), TableRow.class)));
    }
}
Also used : SerializableFunction(org.apache.beam.sdk.transforms.SerializableFunction) TableSchema(com.google.api.services.bigquery.model.TableSchema) JsonSchemaToTableSchema(org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.JsonSchemaToTableSchema) Matcher(java.util.regex.Matcher) BigQueryHelpers.toJsonString(org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.toJsonString) TableFieldSchema(com.google.api.services.bigquery.model.TableFieldSchema) ArrayList(java.util.ArrayList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) Pattern(java.util.regex.Pattern) TestPipeline(org.apache.beam.sdk.testing.TestPipeline) Pipeline(org.apache.beam.sdk.Pipeline) TableRow(com.google.api.services.bigquery.model.TableRow) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) File(java.io.File)

Aggregations

TableSchema (com.google.api.services.bigquery.model.TableSchema)31 TableFieldSchema (com.google.api.services.bigquery.model.TableFieldSchema)20 TableRow (com.google.api.services.bigquery.model.TableRow)18 JsonSchemaToTableSchema (org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.JsonSchemaToTableSchema)13 Test (org.junit.Test)13 TableReference (com.google.api.services.bigquery.model.TableReference)12 Pipeline (org.apache.beam.sdk.Pipeline)12 ArrayList (java.util.ArrayList)10 BigQueryHelpers.toJsonString (org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.toJsonString)9 TestPipeline (org.apache.beam.sdk.testing.TestPipeline)8 Table (com.google.api.services.bigquery.model.Table)7 BigQueryHelpers.createTempTableReference (org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.createTempTableReference)7 PipelineOptions (org.apache.beam.sdk.options.PipelineOptions)7 HashBasedTable (com.google.common.collect.HashBasedTable)6 JobStatus (com.google.api.services.bigquery.model.JobStatus)5 JobStatistics (com.google.api.services.bigquery.model.JobStatistics)4 JobStatistics4 (com.google.api.services.bigquery.model.JobStatistics4)4 Path (java.nio.file.Path)4 Map (java.util.Map)4 Job (com.google.api.services.bigquery.model.Job)3