use of com.google.api.services.bigquery.model.Clustering in project beam by apache.
the class BigQueryIOWriteTest method testTimePartitioningClustering.
void testTimePartitioningClustering(BigQueryIO.Write.Method insertMethod, boolean enablePartitioning, boolean enableClustering) throws Exception {
TableRow row1 = new TableRow().set("date", "2018-01-01").set("number", "1");
TableRow row2 = new TableRow().set("date", "2018-01-02").set("number", "2");
TimePartitioning timePartitioning = new TimePartitioning().setType("DAY").setField("date");
Clustering clustering = new Clustering().setFields(ImmutableList.of("date"));
TableSchema schema = new TableSchema().setFields(ImmutableList.of(new TableFieldSchema().setName("date").setType("DATE"), new TableFieldSchema().setName("number").setType("INTEGER")));
Write<TableRow> writeTransform = BigQueryIO.writeTableRows().to("project-id:dataset-id.table-id").withTestServices(fakeBqServices).withMethod(insertMethod).withSchema(schema).withoutValidation();
if (enablePartitioning) {
writeTransform = writeTransform.withTimePartitioning(timePartitioning);
}
if (enableClustering) {
writeTransform = writeTransform.withClustering(clustering);
}
p.apply(Create.of(row1, row2)).apply(writeTransform);
p.run();
Table table = fakeDatasetService.getTable(BigQueryHelpers.parseTableSpec("project-id:dataset-id.table-id"));
assertEquals(schema, table.getSchema());
if (enablePartitioning) {
assertEquals(timePartitioning, table.getTimePartitioning());
}
if (enableClustering) {
assertEquals(clustering, table.getClustering());
}
}
use of com.google.api.services.bigquery.model.Clustering in project beam by apache.
the class BigQueryIOWriteTest method testClusteringTableFunction.
@Test
public void testClusteringTableFunction() throws Exception {
TableRow row1 = new TableRow().set("date", "2018-01-01").set("number", "1");
TableRow row2 = new TableRow().set("date", "2018-01-02").set("number", "2");
TimePartitioning timePartitioning = new TimePartitioning().setType("DAY").setField("date");
Clustering clustering = new Clustering().setFields(ImmutableList.of("date"));
TableSchema schema = new TableSchema().setFields(ImmutableList.of(new TableFieldSchema().setName("date").setType("DATE"), new TableFieldSchema().setName("number").setType("INTEGER")));
// withMethod overrides the pipeline option, so we need to explicitly request
// STORAGE_API_WRITES.
BigQueryIO.Write.Method method = useStorageApi ? (useStorageApiApproximate ? Method.STORAGE_API_AT_LEAST_ONCE : Method.STORAGE_WRITE_API) : Method.FILE_LOADS;
p.apply(Create.of(row1, row2)).apply(BigQueryIO.writeTableRows().to((ValueInSingleWindow<TableRow> vsw) -> {
String tableSpec = "project-id:dataset-id.table-" + vsw.getValue().get("number");
return new TableDestination(tableSpec, null, new TimePartitioning().setType("DAY").setField("date"), new Clustering().setFields(ImmutableList.of("date")));
}).withTestServices(fakeBqServices).withMethod(method).withSchema(schema).withClustering().withoutValidation());
p.run();
Table table = fakeDatasetService.getTable(BigQueryHelpers.parseTableSpec("project-id:dataset-id.table-1"));
assertEquals(schema, table.getSchema());
assertEquals(timePartitioning, table.getTimePartitioning());
assertEquals(clustering, table.getClustering());
}
use of com.google.api.services.bigquery.model.Clustering in project beam by apache.
the class FakeJobService method runCopyJob.
private JobStatus runCopyJob(JobConfigurationTableCopy copy) throws InterruptedException, IOException {
List<TableReference> sources = copy.getSourceTables();
TableReference destination = copy.getDestinationTable();
WriteDisposition writeDisposition = WriteDisposition.valueOf(copy.getWriteDisposition());
CreateDisposition createDisposition = CreateDisposition.valueOf(copy.getCreateDisposition());
Table existingTable = datasetService.getTable(destination);
if (!validateDispositions(existingTable, createDisposition, writeDisposition)) {
return new JobStatus().setState("FAILED").setErrorResult(new ErrorProto());
}
TimePartitioning partitioning = null;
Clustering clustering = null;
TableSchema schema = null;
boolean first = true;
List<TableRow> allRows = Lists.newArrayList();
for (TableReference source : sources) {
Table table = checkNotNull(datasetService.getTable(source));
if (!first) {
if (!Objects.equals(partitioning, table.getTimePartitioning())) {
return new JobStatus().setState("FAILED").setErrorResult(new ErrorProto());
}
if (!Objects.equals(clustering, table.getClustering())) {
return new JobStatus().setState("FAILED").setErrorResult(new ErrorProto());
}
if (!Objects.equals(schema, table.getSchema())) {
return new JobStatus().setState("FAILED").setErrorResult(new ErrorProto());
}
}
partitioning = table.getTimePartitioning();
clustering = table.getClustering();
schema = table.getSchema();
first = false;
allRows.addAll(datasetService.getAllRows(source.getProjectId(), source.getDatasetId(), source.getTableId()));
}
datasetService.createTable(new Table().setTableReference(destination).setSchema(schema).setTimePartitioning(partitioning).setClustering(clustering).setEncryptionConfiguration(copy.getDestinationEncryptionConfiguration()));
datasetService.insertAll(destination, allRows, null);
return new JobStatus().setState("DONE");
}
Aggregations