use of com.google.api.services.bigquery.model.TableRow in project beam by apache.
the class BigQueryIOTest method testWriteTables.
@Test
public void testWriteTables() throws Exception {
p.enableAbandonedNodeEnforcement(false);
FakeDatasetService datasetService = new FakeDatasetService();
FakeBigQueryServices fakeBqServices = new FakeBigQueryServices().withJobService(new FakeJobService()).withDatasetService(datasetService);
datasetService.createDataset("project-id", "dataset-id", "", "");
long numTables = 3;
long numPartitions = 3;
long numFilesPerPartition = 10;
String jobIdToken = "jobIdToken";
String stepUuid = "stepUuid";
Map<TableDestination, List<String>> expectedTempTables = Maps.newHashMap();
Path baseDir = Files.createTempDirectory(tempFolder, "testWriteTables");
List<KV<ShardedKey<String>, List<String>>> partitions = Lists.newArrayList();
for (int i = 0; i < numTables; ++i) {
String tableName = String.format("project-id:dataset-id.table%05d", i);
TableDestination tableDestination = new TableDestination(tableName, tableName);
for (int j = 0; j < numPartitions; ++j) {
String tempTableId = BigQueryHelpers.createJobId(jobIdToken, tableDestination, j);
List<String> filesPerPartition = Lists.newArrayList();
for (int k = 0; k < numFilesPerPartition; ++k) {
String filename = Paths.get(baseDir.toString(), String.format("files0x%08x_%05d", tempTableId.hashCode(), k)).toString();
ResourceId fileResource = FileSystems.matchNewResource(filename, false);
try (WritableByteChannel channel = FileSystems.create(fileResource, MimeTypes.TEXT)) {
try (OutputStream output = Channels.newOutputStream(channel)) {
TableRow tableRow = new TableRow().set("name", tableName);
TableRowJsonCoder.of().encode(tableRow, output, Context.OUTER);
output.write("\n".getBytes(StandardCharsets.UTF_8));
}
}
filesPerPartition.add(filename);
}
partitions.add(KV.of(ShardedKey.of(tableDestination.getTableSpec(), j), filesPerPartition));
List<String> expectedTables = expectedTempTables.get(tableDestination);
if (expectedTables == null) {
expectedTables = Lists.newArrayList();
expectedTempTables.put(tableDestination, expectedTables);
}
String json = String.format("{\"datasetId\":\"dataset-id\",\"projectId\":\"project-id\",\"tableId\":\"%s\"}", tempTableId);
expectedTables.add(json);
}
}
PCollectionView<String> jobIdTokenView = p.apply("CreateJobId", Create.of("jobId")).apply(View.<String>asSingleton());
PCollectionView<Map<String, String>> schemaMapView = p.apply("CreateEmptySchema", Create.empty(new TypeDescriptor<KV<String, String>>() {
})).apply(View.<String, String>asMap());
WriteTables<String> writeTables = new WriteTables<>(false, fakeBqServices, jobIdTokenView, schemaMapView, WriteDisposition.WRITE_EMPTY, CreateDisposition.CREATE_IF_NEEDED, new IdentityDynamicTables());
DoFnTester<KV<ShardedKey<String>, List<String>>, KV<TableDestination, String>> tester = DoFnTester.of(writeTables);
tester.setSideInput(jobIdTokenView, GlobalWindow.INSTANCE, jobIdToken);
tester.setSideInput(schemaMapView, GlobalWindow.INSTANCE, ImmutableMap.<String, String>of());
tester.getPipelineOptions().setTempLocation("tempLocation");
for (KV<ShardedKey<String>, List<String>> partition : partitions) {
tester.processElement(partition);
}
Map<TableDestination, List<String>> tempTablesResult = Maps.newHashMap();
for (KV<TableDestination, String> element : tester.takeOutputElements()) {
List<String> tables = tempTablesResult.get(element.getKey());
if (tables == null) {
tables = Lists.newArrayList();
tempTablesResult.put(element.getKey(), tables);
}
tables.add(element.getValue());
}
assertEquals(expectedTempTables, tempTablesResult);
}
use of com.google.api.services.bigquery.model.TableRow in project beam by apache.
the class BigQueryIOTest method testWriteFailedJobs.
@Test
public void testWriteFailedJobs() throws Exception {
BigQueryOptions bqOptions = TestPipeline.testingPipelineOptions().as(BigQueryOptions.class);
bqOptions.setProject("defaultproject");
bqOptions.setTempLocation(testFolder.newFolder("BigQueryIOTest").getAbsolutePath());
FakeDatasetService datasetService = new FakeDatasetService();
FakeBigQueryServices fakeBqServices = new FakeBigQueryServices().withJobService(new FakeJobService()).withDatasetService(datasetService);
Pipeline p = TestPipeline.create(bqOptions);
p.apply(Create.of(new TableRow().set("name", "a").set("number", 1), new TableRow().set("name", "b").set("number", 2), new TableRow().set("name", "c").set("number", 3)).withCoder(TableRowJsonCoder.of())).apply(BigQueryIO.writeTableRows().to("dataset-id.table-id").withCreateDisposition(CreateDisposition.CREATE_NEVER).withTestServices(fakeBqServices).withoutValidation());
thrown.expect(RuntimeException.class);
thrown.expectMessage("Failed to create load job with id prefix");
thrown.expectMessage("reached max retries");
thrown.expectMessage("last failed load job");
try {
p.run();
} finally {
File tempDir = new File(bqOptions.getTempLocation());
testNumFiles(tempDir, 0);
}
}
use of com.google.api.services.bigquery.model.TableRow in project beam by apache.
the class BigQueryIOTest method convertBigDecimaslToLong.
List<TableRow> convertBigDecimaslToLong(List<TableRow> toConvert) {
// The numbers come back as BigDecimal objects after JSON serialization. Change them back to
// longs so that we can assert the output.
List<TableRow> converted = Lists.newArrayList();
for (TableRow entry : toConvert) {
TableRow convertedEntry = entry.clone();
Object num = convertedEntry.get("number");
if (num instanceof BigDecimal) {
convertedEntry.set("number", ((BigDecimal) num).longValue());
}
converted.add(convertedEntry);
}
return converted;
}
use of com.google.api.services.bigquery.model.TableRow in project beam by apache.
the class BigQueryIOTest method testRetryPolicy.
@Test
public void testRetryPolicy() throws Exception {
BigQueryOptions bqOptions = TestPipeline.testingPipelineOptions().as(BigQueryOptions.class);
bqOptions.setProject("project-id");
bqOptions.setTempLocation(testFolder.newFolder("BigQueryIOTest").getAbsolutePath());
FakeDatasetService datasetService = new FakeDatasetService();
FakeBigQueryServices fakeBqServices = new FakeBigQueryServices().withJobService(new FakeJobService()).withDatasetService(datasetService);
datasetService.createDataset("project-id", "dataset-id", "", "");
TableRow row1 = new TableRow().set("name", "a").set("number", "1");
TableRow row2 = new TableRow().set("name", "b").set("number", "2");
TableRow row3 = new TableRow().set("name", "c").set("number", "3");
TableDataInsertAllResponse.InsertErrors ephemeralError = new TableDataInsertAllResponse.InsertErrors().setErrors(ImmutableList.of(new ErrorProto().setReason("timeout")));
TableDataInsertAllResponse.InsertErrors persistentError = new TableDataInsertAllResponse.InsertErrors().setErrors(ImmutableList.of(new ErrorProto().setReason("invalidQuery")));
datasetService.failOnInsert(ImmutableMap.<TableRow, List<TableDataInsertAllResponse.InsertErrors>>of(row1, ImmutableList.of(ephemeralError, ephemeralError), row2, ImmutableList.of(ephemeralError, ephemeralError, persistentError)));
Pipeline p = TestPipeline.create(bqOptions);
PCollection<TableRow> failedRows = p.apply(Create.of(row1, row2, row3)).setIsBoundedInternal(IsBounded.UNBOUNDED).apply(BigQueryIO.writeTableRows().to("project-id:dataset-id.table-id").withCreateDisposition(CreateDisposition.CREATE_IF_NEEDED).withSchema(new TableSchema().setFields(ImmutableList.of(new TableFieldSchema().setName("name").setType("STRING"), new TableFieldSchema().setName("number").setType("INTEGER")))).withFailedInsertRetryPolicy(InsertRetryPolicy.retryTransientErrors()).withTestServices(fakeBqServices).withoutValidation()).getFailedInserts();
// row2 finally fails with a non-retryable error, so we expect to see it in the collection of
// failed rows.
PAssert.that(failedRows).containsInAnyOrder(row2);
p.run();
// Only row1 and row3 were successfully inserted.
assertThat(datasetService.getAllRows("project-id", "dataset-id", "table-id"), containsInAnyOrder(row1, row3));
}
use of com.google.api.services.bigquery.model.TableRow in project beam by apache.
the class BigQueryIOTest method testBigQueryNoTableQuerySourceInitSplit.
@Test
public void testBigQueryNoTableQuerySourceInitSplit() throws Exception {
TableReference dryRunTable = new TableReference();
Job queryJob = new Job();
JobStatistics queryJobStats = new JobStatistics();
JobStatistics2 queryStats = new JobStatistics2();
queryStats.setReferencedTables(ImmutableList.of(dryRunTable));
queryJobStats.setQuery(queryStats);
queryJob.setStatus(new JobStatus()).setStatistics(queryJobStats);
Job extractJob = new Job();
JobStatistics extractJobStats = new JobStatistics();
JobStatistics4 extractStats = new JobStatistics4();
extractStats.setDestinationUriFileCounts(ImmutableList.of(1L));
extractJobStats.setExtract(extractStats);
extractJob.setStatus(new JobStatus()).setStatistics(extractJobStats);
FakeDatasetService datasetService = new FakeDatasetService();
FakeJobService jobService = new FakeJobService();
FakeBigQueryServices fakeBqServices = new FakeBigQueryServices().withJobService(jobService).withDatasetService(datasetService);
PipelineOptions options = PipelineOptionsFactory.create();
BigQueryOptions bqOptions = options.as(BigQueryOptions.class);
bqOptions.setProject("project");
String stepUuid = "testStepUuid";
TableReference tempTableReference = createTempTableReference(bqOptions.getProject(), createJobIdToken(bqOptions.getJobName(), stepUuid));
List<TableRow> expected = ImmutableList.of(new TableRow().set("name", "a").set("number", 1L), new TableRow().set("name", "b").set("number", 2L), new TableRow().set("name", "c").set("number", 3L), new TableRow().set("name", "d").set("number", 4L), new TableRow().set("name", "e").set("number", 5L), new TableRow().set("name", "f").set("number", 6L));
datasetService.createDataset(tempTableReference.getProjectId(), tempTableReference.getDatasetId(), "", "");
Table table = new Table().setTableReference(tempTableReference).setSchema(new TableSchema().setFields(ImmutableList.of(new TableFieldSchema().setName("name").setType("STRING"), new TableFieldSchema().setName("number").setType("INTEGER"))));
datasetService.createTable(table);
String query = FakeBigQueryServices.encodeQuery(expected);
jobService.expectDryRunQuery("project", query, new JobStatistics().setQuery(new JobStatistics2().setTotalBytesProcessed(100L).setReferencedTables(ImmutableList.of(table.getTableReference()))));
Path baseDir = Files.createTempDirectory(tempFolder, "testBigQueryNoTableQuerySourceInitSplit");
BoundedSource<TableRow> bqSource = BigQueryQuerySource.create(stepUuid, StaticValueProvider.of(query), true, /* flattenResults */
true, /* useLegacySql */
fakeBqServices);
options.setTempLocation(baseDir.toString());
List<TableRow> read = convertBigDecimaslToLong(SourceTestUtils.readFromSource(bqSource, options));
assertThat(read, containsInAnyOrder(Iterables.toArray(expected, TableRow.class)));
SourceTestUtils.assertSplitAtFractionBehavior(bqSource, 2, 0.3, ExpectedSplitOutcome.MUST_BE_CONSISTENT_IF_SUCCEEDS, options);
List<? extends BoundedSource<TableRow>> sources = bqSource.split(100, options);
assertEquals(2, sources.size());
BoundedSource<TableRow> actual = sources.get(0);
assertThat(actual, CoreMatchers.instanceOf(TransformingSource.class));
}
Aggregations