use of com.google.api.services.bigquery.model.TableRow in project zeppelin by apache.
the class BigQueryInterpreter method printRows.
//Function that generates and returns the schema and the rows as string
public static String printRows(final GetQueryResultsResponse response) {
StringBuilder msg = null;
msg = new StringBuilder();
try {
for (TableFieldSchema schem : response.getSchema().getFields()) {
msg.append(schem.getName());
msg.append(TAB);
}
msg.append(NEWLINE);
for (TableRow row : response.getRows()) {
for (TableCell field : row.getF()) {
msg.append(field.getV().toString());
msg.append(TAB);
}
msg.append(NEWLINE);
}
return msg.toString();
} catch (NullPointerException ex) {
throw new NullPointerException("SQL Execution returned an error!");
}
}
use of com.google.api.services.bigquery.model.TableRow in project beam by apache.
the class StreamingWriteFn method finishBundle.
/** Writes the accumulated rows into BigQuery with streaming API. */
@FinishBundle
public void finishBundle(FinishBundleContext context) throws Exception {
List<ValueInSingleWindow<TableRow>> failedInserts = Lists.newArrayList();
BigQueryOptions options = context.getPipelineOptions().as(BigQueryOptions.class);
for (Map.Entry<String, List<ValueInSingleWindow<TableRow>>> entry : tableRows.entrySet()) {
TableReference tableReference = BigQueryHelpers.parseTableSpec(entry.getKey());
flushRows(tableReference, entry.getValue(), uniqueIdsForTableRows.get(entry.getKey()), options, failedInserts);
}
tableRows.clear();
uniqueIdsForTableRows.clear();
for (ValueInSingleWindow<TableRow> row : failedInserts) {
context.output(failedOutputTag, row.getValue(), row.getTimestamp(), row.getWindow());
}
}
use of com.google.api.services.bigquery.model.TableRow in project beam by apache.
the class StreamingWriteTables method expand.
@Override
public WriteResult expand(PCollection<KV<TableDestination, TableRow>> input) {
// A naive implementation would be to simply stream data directly to BigQuery.
// However, this could occasionally lead to duplicated data, e.g., when
// a VM that runs this code is restarted and the code is re-run.
// The above risk is mitigated in this implementation by relying on
// BigQuery built-in best effort de-dup mechanism.
// To use this mechanism, each input TableRow is tagged with a generated
// unique id, which is then passed to BigQuery and used to ignore duplicates
// We create 50 keys per BigQuery table to generate output on. This is few enough that we
// get good batching into BigQuery's insert calls, and enough that we can max out the
// streaming insert quota.
PCollection<KV<ShardedKey<String>, TableRowInfo>> tagged = input.apply("ShardTableWrites", ParDo.of(new GenerateShardedTable(50))).setCoder(KvCoder.of(ShardedKeyCoder.of(StringUtf8Coder.of()), TableRowJsonCoder.of())).apply("TagWithUniqueIds", ParDo.of(new TagWithUniqueIds()));
// To prevent having the same TableRow processed more than once with regenerated
// different unique ids, this implementation relies on "checkpointing", which is
// achieved as a side effect of having StreamingWriteFn immediately follow a GBK,
// performed by Reshuffle.
TupleTag<Void> mainOutputTag = new TupleTag<>("mainOutput");
TupleTag<TableRow> failedInsertsTag = new TupleTag<>("failedInserts");
PCollectionTuple tuple = tagged.setCoder(KvCoder.of(ShardedKeyCoder.of(StringUtf8Coder.of()), TableRowInfoCoder.of())).apply(Reshuffle.<ShardedKey<String>, TableRowInfo>of()).apply("GlobalWindow", Window.<KV<ShardedKey<String>, TableRowInfo>>into(new GlobalWindows()).triggering(DefaultTrigger.of()).discardingFiredPanes()).apply("StreamingWrite", ParDo.of(new StreamingWriteFn(bigQueryServices, retryPolicy, failedInsertsTag)).withOutputTags(mainOutputTag, TupleTagList.of(failedInsertsTag)));
PCollection<TableRow> failedInserts = tuple.get(failedInsertsTag);
failedInserts.setCoder(TableRowJsonCoder.of());
return WriteResult.in(input.getPipeline(), failedInsertsTag, failedInserts);
}
use of com.google.api.services.bigquery.model.TableRow in project beam by apache.
the class WriteGroupedRecordsToFiles method processElement.
@ProcessElement
public void processElement(ProcessContext c) throws Exception {
String tempFilePrefix = c.sideInput(this.tempFilePrefix);
TableRowWriter writer = new TableRowWriter(tempFilePrefix);
try (TableRowWriter ignored = writer) {
for (TableRow tableRow : c.element().getValue()) {
if (writer.getByteSize() > maxFileSize) {
writer.close();
TableRowWriter.Result result = writer.getResult();
c.output(new WriteBundlesToFiles.Result<>(result.resourceId.toString(), result.byteSize, c.element().getKey().getKey()));
writer = new TableRowWriter(tempFilePrefix);
}
writer.write(tableRow);
}
}
TableRowWriter.Result result = writer.getResult();
c.output(new WriteBundlesToFiles.Result<>(result.resourceId.toString(), result.byteSize, c.element().getKey().getKey()));
}
use of com.google.api.services.bigquery.model.TableRow in project beam by apache.
the class BigQuerySourceBase method createSources.
private List<BoundedSource<TableRow>> createSources(List<ResourceId> files, TableSchema tableSchema) throws IOException, InterruptedException {
final String jsonSchema = BigQueryIO.JSON_FACTORY.toString(tableSchema);
SerializableFunction<GenericRecord, TableRow> function = new SerializableFunction<GenericRecord, TableRow>() {
@Override
public TableRow apply(GenericRecord input) {
return BigQueryAvroUtils.convertGenericRecordToTableRow(input, BigQueryHelpers.fromJsonString(jsonSchema, TableSchema.class));
}
};
List<BoundedSource<TableRow>> avroSources = Lists.newArrayList();
for (ResourceId file : files) {
avroSources.add(new TransformingSource<>(AvroSource.from(file.toString()), function, getDefaultOutputCoder()));
}
return ImmutableList.copyOf(avroSources);
}
Aggregations