use of com.google.api.services.bigquery.model.TableReference in project beam by apache.
the class BigQueryIOTest method testReadFromTable.
@Test
public void testReadFromTable() throws IOException, InterruptedException {
BigQueryOptions bqOptions = TestPipeline.testingPipelineOptions().as(BigQueryOptions.class);
bqOptions.setProject("defaultproject");
bqOptions.setTempLocation(testFolder.newFolder("BigQueryIOTest").getAbsolutePath());
Job job = new Job();
JobStatus status = new JobStatus();
job.setStatus(status);
JobStatistics jobStats = new JobStatistics();
job.setStatistics(jobStats);
JobStatistics4 extract = new JobStatistics4();
jobStats.setExtract(extract);
extract.setDestinationUriFileCounts(ImmutableList.of(1L));
Table sometable = new Table();
sometable.setSchema(new TableSchema().setFields(ImmutableList.of(new TableFieldSchema().setName("name").setType("STRING"), new TableFieldSchema().setName("number").setType("INTEGER"))));
sometable.setTableReference(new TableReference().setProjectId("non-executing-project").setDatasetId("somedataset").setTableId("sometable"));
sometable.setNumBytes(1024L * 1024L);
FakeDatasetService fakeDatasetService = new FakeDatasetService();
fakeDatasetService.createDataset("non-executing-project", "somedataset", "", "");
fakeDatasetService.createTable(sometable);
List<TableRow> records = Lists.newArrayList(new TableRow().set("name", "a").set("number", 1L), new TableRow().set("name", "b").set("number", 2L), new TableRow().set("name", "c").set("number", 3L));
fakeDatasetService.insertAll(sometable.getTableReference(), records, null);
FakeBigQueryServices fakeBqServices = new FakeBigQueryServices().withJobService(new FakeJobService()).withDatasetService(fakeDatasetService);
Pipeline p = TestPipeline.create(bqOptions);
PCollection<KV<String, Long>> output = p.apply(BigQueryIO.read().from("non-executing-project:somedataset.sometable").withTestServices(fakeBqServices).withoutValidation()).apply(ParDo.of(new DoFn<TableRow, KV<String, Long>>() {
@ProcessElement
public void processElement(ProcessContext c) throws Exception {
c.output(KV.of((String) c.element().get("name"), Long.valueOf((String) c.element().get("number"))));
}
}));
PAssert.that(output).containsInAnyOrder(ImmutableList.of(KV.of("a", 1L), KV.of("b", 2L), KV.of("c", 3L)));
p.run();
}
use of com.google.api.services.bigquery.model.TableReference in project beam by apache.
the class BigQueryIOTest method testRemoveTemporaryTables.
@Test
public void testRemoveTemporaryTables() throws Exception {
FakeDatasetService datasetService = new FakeDatasetService();
String projectId = "project";
String datasetId = "dataset";
datasetService.createDataset(projectId, datasetId, "", "");
List<TableReference> tableRefs = Lists.newArrayList(BigQueryHelpers.parseTableSpec(String.format("%s:%s.%s", projectId, datasetId, "table1")), BigQueryHelpers.parseTableSpec(String.format("%s:%s.%s", projectId, datasetId, "table2")), BigQueryHelpers.parseTableSpec(String.format("%s:%s.%s", projectId, datasetId, "table3")));
for (TableReference tableRef : tableRefs) {
datasetService.createTable(new Table().setTableReference(tableRef));
}
// Add one more table to delete that does not actually exist.
tableRefs.add(BigQueryHelpers.parseTableSpec(String.format("%s:%s.%s", projectId, datasetId, "table4")));
WriteRename.removeTemporaryTables(datasetService, tableRefs);
for (TableReference ref : tableRefs) {
loggedWriteRename.verifyDebug("Deleting table " + toJsonString(ref));
checkState(datasetService.getTable(ref) == null, "Table " + ref + " was not deleted!");
}
}
use of com.google.api.services.bigquery.model.TableReference in project beam by apache.
the class BigQueryIOTest method testCreateNeverWithStreaming.
@Test
public void testCreateNeverWithStreaming() throws Exception {
BigQueryOptions options = TestPipeline.testingPipelineOptions().as(BigQueryOptions.class);
options.setProject("project");
options.setStreaming(true);
Pipeline p = TestPipeline.create(options);
TableReference tableRef = new TableReference();
tableRef.setDatasetId("dataset");
tableRef.setTableId("sometable");
PCollection<TableRow> tableRows = p.apply(GenerateSequence.from(0)).apply(MapElements.via(new SimpleFunction<Long, TableRow>() {
@Override
public TableRow apply(Long input) {
return null;
}
})).setCoder(TableRowJsonCoder.of());
tableRows.apply(BigQueryIO.writeTableRows().to(tableRef).withCreateDisposition(CreateDisposition.CREATE_NEVER).withoutValidation());
}
use of com.google.api.services.bigquery.model.TableReference in project beam by apache.
the class BigQueryIOTest method testWriteRename.
@Test
public void testWriteRename() throws Exception {
p.enableAbandonedNodeEnforcement(false);
FakeDatasetService datasetService = new FakeDatasetService();
FakeBigQueryServices fakeBqServices = new FakeBigQueryServices().withJobService(new FakeJobService()).withDatasetService(datasetService);
datasetService.createDataset("project-id", "dataset-id", "", "");
final int numFinalTables = 3;
final int numTempTablesPerFinalTable = 3;
final int numRecordsPerTempTable = 10;
Map<TableDestination, List<TableRow>> expectedRowsPerTable = Maps.newHashMap();
String jobIdToken = "jobIdToken";
Map<TableDestination, Iterable<String>> tempTables = Maps.newHashMap();
for (int i = 0; i < numFinalTables; ++i) {
String tableName = "project-id:dataset-id.table_" + i;
TableDestination tableDestination = new TableDestination(tableName, "table_" + i + "_desc");
List<String> tables = Lists.newArrayList();
tempTables.put(tableDestination, tables);
List<TableRow> expectedRows = expectedRowsPerTable.get(tableDestination);
if (expectedRows == null) {
expectedRows = Lists.newArrayList();
expectedRowsPerTable.put(tableDestination, expectedRows);
}
for (int j = 0; i < numTempTablesPerFinalTable; ++i) {
TableReference tempTable = new TableReference().setProjectId("project-id").setDatasetId("dataset-id").setTableId(String.format("%s_%05d_%05d", jobIdToken, i, j));
datasetService.createTable(new Table().setTableReference(tempTable));
List<TableRow> rows = Lists.newArrayList();
for (int k = 0; k < numRecordsPerTempTable; ++k) {
rows.add(new TableRow().set("number", j * numTempTablesPerFinalTable + k));
}
datasetService.insertAll(tempTable, rows, null);
expectedRows.addAll(rows);
tables.add(BigQueryHelpers.toJsonString(tempTable));
}
}
PCollection<KV<TableDestination, String>> tempTablesPCollection = p.apply(Create.of(tempTables).withCoder(KvCoder.of(TableDestinationCoder.of(), IterableCoder.of(StringUtf8Coder.of())))).apply(ParDo.of(new DoFn<KV<TableDestination, Iterable<String>>, KV<TableDestination, String>>() {
@ProcessElement
public void processElement(ProcessContext c) {
TableDestination tableDestination = c.element().getKey();
for (String tempTable : c.element().getValue()) {
c.output(KV.of(tableDestination, tempTable));
}
}
}));
PCollectionView<Map<TableDestination, Iterable<String>>> tempTablesView = PCollectionViews.multimapView(tempTablesPCollection, WindowingStrategy.globalDefault(), KvCoder.of(TableDestinationCoder.of(), StringUtf8Coder.of()));
PCollectionView<String> jobIdTokenView = p.apply("CreateJobId", Create.of("jobId")).apply(View.<String>asSingleton());
WriteRename writeRename = new WriteRename(fakeBqServices, jobIdTokenView, WriteDisposition.WRITE_EMPTY, CreateDisposition.CREATE_IF_NEEDED, tempTablesView);
DoFnTester<Void, Void> tester = DoFnTester.of(writeRename);
tester.setSideInput(tempTablesView, GlobalWindow.INSTANCE, tempTables);
tester.setSideInput(jobIdTokenView, GlobalWindow.INSTANCE, jobIdToken);
tester.processElement(null);
for (Map.Entry<TableDestination, Iterable<String>> entry : tempTables.entrySet()) {
TableDestination tableDestination = entry.getKey();
TableReference tableReference = tableDestination.getTableReference();
Table table = checkNotNull(datasetService.getTable(tableReference));
assertEquals(tableReference.getTableId() + "_desc", tableDestination.getTableDescription());
List<TableRow> expectedRows = expectedRowsPerTable.get(tableDestination);
assertThat(datasetService.getAllRows(tableReference.getProjectId(), tableReference.getDatasetId(), tableReference.getTableId()), containsInAnyOrder(Iterables.toArray(expectedRows, TableRow.class)));
// Temp tables should be deleted.
for (String tempTableJson : entry.getValue()) {
TableReference tempTable = BigQueryHelpers.fromJsonString(tempTableJson, TableReference.class);
assertEquals(null, datasetService.getTable(tempTable));
}
}
}
use of com.google.api.services.bigquery.model.TableReference in project beam by apache.
the class BigQueryIOTest method testBigQueryTableSourceThroughJsonAPI.
@Test
public void testBigQueryTableSourceThroughJsonAPI() throws Exception {
FakeDatasetService datasetService = new FakeDatasetService();
FakeBigQueryServices fakeBqServices = new FakeBigQueryServices().withJobService(new FakeJobService()).withDatasetService(datasetService);
List<TableRow> expected = ImmutableList.of(new TableRow().set("name", "a").set("number", "1"), new TableRow().set("name", "b").set("number", "2"), new TableRow().set("name", "c").set("number", "3"), new TableRow().set("name", "d").set("number", "4"), new TableRow().set("name", "e").set("number", "5"), new TableRow().set("name", "f").set("number", "6"));
TableReference table = BigQueryHelpers.parseTableSpec("project:data_set.table_name");
datasetService.createDataset(table.getProjectId(), table.getDatasetId(), "", "");
datasetService.createTable(new Table().setTableReference(table));
datasetService.insertAll(table, expected, null);
Path baseDir = Files.createTempDirectory(tempFolder, "testBigQueryTableSourceThroughJsonAPI");
String stepUuid = "testStepUuid";
BoundedSource<TableRow> bqSource = BigQueryTableSource.create(stepUuid, StaticValueProvider.of(table), fakeBqServices);
PipelineOptions options = PipelineOptionsFactory.create();
options.setTempLocation(baseDir.toString());
Assert.assertThat(SourceTestUtils.readFromSource(bqSource, options), CoreMatchers.is(expected));
SourceTestUtils.assertSplitAtFractionBehavior(bqSource, 2, 0.3, ExpectedSplitOutcome.MUST_BE_CONSISTENT_IF_SUCCEEDS, options);
}
Aggregations