use of com.google.api.services.bigquery.model.TableSchema in project beam by apache.
the class WriteToBigQuery method getSchema.
/** Build the output table schema. */
protected TableSchema getSchema() {
List<TableFieldSchema> fields = new ArrayList<>();
for (Map.Entry<String, FieldInfo<InputT>> entry : fieldInfo.entrySet()) {
String key = entry.getKey();
FieldInfo<InputT> fcnInfo = entry.getValue();
String bqType = fcnInfo.getFieldType();
fields.add(new TableFieldSchema().setName(key).setType(bqType));
}
return new TableSchema().setFields(fields);
}
use of com.google.api.services.bigquery.model.TableSchema in project beam by apache.
the class MaxPerKeyExamples method main.
public static void main(String[] args) throws Exception {
Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
Pipeline p = Pipeline.create(options);
// Build the table schema for the output table.
List<TableFieldSchema> fields = new ArrayList<>();
fields.add(new TableFieldSchema().setName("month").setType("INTEGER"));
fields.add(new TableFieldSchema().setName("max_mean_temp").setType("FLOAT"));
TableSchema schema = new TableSchema().setFields(fields);
p.apply(BigQueryIO.read().from(options.getInput())).apply(new MaxMeanTemp()).apply(BigQueryIO.writeTableRows().to(options.getOutput()).withSchema(schema).withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED).withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_TRUNCATE));
p.run().waitUntilFinish();
}
use of com.google.api.services.bigquery.model.TableSchema in project beam by apache.
the class BigQuerySourceBase method split.
@Override
public List<BoundedSource<TableRow>> split(long desiredBundleSizeBytes, PipelineOptions options) throws Exception {
// another BigQuery extract job for the repeated split() calls.
if (cachedSplitResult == null) {
BigQueryOptions bqOptions = options.as(BigQueryOptions.class);
TableReference tableToExtract = getTableToExtract(bqOptions);
JobService jobService = bqServices.getJobService(bqOptions);
final String extractDestinationDir = resolveTempLocation(bqOptions.getTempLocation(), "BigQueryExtractTemp", stepUuid);
String extractJobId = getExtractJobId(createJobIdToken(options.getJobName(), stepUuid));
List<ResourceId> tempFiles = executeExtract(extractJobId, tableToExtract, jobService, bqOptions.getProject(), extractDestinationDir);
TableSchema tableSchema = bqServices.getDatasetService(bqOptions).getTable(tableToExtract).getSchema();
cleanupTempResource(bqOptions);
cachedSplitResult = checkNotNull(createSources(tempFiles, tableSchema));
}
return cachedSplitResult;
}
use of com.google.api.services.bigquery.model.TableSchema in project beam by apache.
the class BigQuerySourceBase method createSources.
private List<BoundedSource<TableRow>> createSources(List<ResourceId> files, TableSchema tableSchema) throws IOException, InterruptedException {
final String jsonSchema = BigQueryIO.JSON_FACTORY.toString(tableSchema);
SerializableFunction<GenericRecord, TableRow> function = new SerializableFunction<GenericRecord, TableRow>() {
@Override
public TableRow apply(GenericRecord input) {
return BigQueryAvroUtils.convertGenericRecordToTableRow(input, BigQueryHelpers.fromJsonString(jsonSchema, TableSchema.class));
}
};
List<BoundedSource<TableRow>> avroSources = Lists.newArrayList();
for (ResourceId file : files) {
avroSources.add(new TransformingSource<>(AvroSource.from(file.toString()), function, getDefaultOutputCoder()));
}
return ImmutableList.copyOf(avroSources);
}
use of com.google.api.services.bigquery.model.TableSchema in project beam by apache.
the class WriteTables method processElement.
@ProcessElement
public void processElement(ProcessContext c) throws Exception {
dynamicDestinations.setSideInputAccessorFromProcessContext(c);
DestinationT destination = c.element().getKey().getKey();
TableSchema tableSchema = BigQueryHelpers.fromJsonString(c.sideInput(schemasView).get(destination), TableSchema.class);
TableDestination tableDestination = dynamicDestinations.getTable(destination);
TableReference tableReference = tableDestination.getTableReference();
if (Strings.isNullOrEmpty(tableReference.getProjectId())) {
tableReference.setProjectId(c.getPipelineOptions().as(BigQueryOptions.class).getProject());
tableDestination = new TableDestination(tableReference, tableDestination.getTableDescription());
}
Integer partition = c.element().getKey().getShardNumber();
List<String> partitionFiles = Lists.newArrayList(c.element().getValue());
String jobIdPrefix = BigQueryHelpers.createJobId(c.sideInput(jobIdToken), tableDestination, partition);
if (!singlePartition) {
tableReference.setTableId(jobIdPrefix);
}
load(bqServices.getJobService(c.getPipelineOptions().as(BigQueryOptions.class)), bqServices.getDatasetService(c.getPipelineOptions().as(BigQueryOptions.class)), jobIdPrefix, tableReference, tableSchema, partitionFiles, writeDisposition, createDisposition, tableDestination.getTableDescription());
c.output(KV.of(tableDestination, BigQueryHelpers.toJsonString(tableReference)));
removeTemporaryFiles(partitionFiles);
}
Aggregations