Search in sources :

Example 41 with TableRow

use of com.google.api.services.bigquery.model.TableRow in project zeppelin by apache.

the class BigQueryInterpreter method printRows.

//Function that generates and returns the schema and the rows as string
public static String printRows(final GetQueryResultsResponse response) {
    StringBuilder msg = null;
    msg = new StringBuilder();
    try {
        for (TableFieldSchema schem : response.getSchema().getFields()) {
            msg.append(schem.getName());
            msg.append(TAB);
        }
        msg.append(NEWLINE);
        for (TableRow row : response.getRows()) {
            for (TableCell field : row.getF()) {
                msg.append(field.getV().toString());
                msg.append(TAB);
            }
            msg.append(NEWLINE);
        }
        return msg.toString();
    } catch (NullPointerException ex) {
        throw new NullPointerException("SQL Execution returned an error!");
    }
}
Also used : TableCell(com.google.api.services.bigquery.model.TableCell) TableRow(com.google.api.services.bigquery.model.TableRow) TableFieldSchema(com.google.api.services.bigquery.model.TableFieldSchema)

Example 42 with TableRow

use of com.google.api.services.bigquery.model.TableRow in project beam by apache.

the class StreamingWriteFn method finishBundle.

/** Writes the accumulated rows into BigQuery with streaming API. */
@FinishBundle
public void finishBundle(FinishBundleContext context) throws Exception {
    List<ValueInSingleWindow<TableRow>> failedInserts = Lists.newArrayList();
    BigQueryOptions options = context.getPipelineOptions().as(BigQueryOptions.class);
    for (Map.Entry<String, List<ValueInSingleWindow<TableRow>>> entry : tableRows.entrySet()) {
        TableReference tableReference = BigQueryHelpers.parseTableSpec(entry.getKey());
        flushRows(tableReference, entry.getValue(), uniqueIdsForTableRows.get(entry.getKey()), options, failedInserts);
    }
    tableRows.clear();
    uniqueIdsForTableRows.clear();
    for (ValueInSingleWindow<TableRow> row : failedInserts) {
        context.output(failedOutputTag, row.getValue(), row.getTimestamp(), row.getWindow());
    }
}
Also used : TableReference(com.google.api.services.bigquery.model.TableReference) TableRow(com.google.api.services.bigquery.model.TableRow) ValueInSingleWindow(org.apache.beam.sdk.values.ValueInSingleWindow) List(java.util.List) HashMap(java.util.HashMap) Map(java.util.Map)

Example 43 with TableRow

use of com.google.api.services.bigquery.model.TableRow in project beam by apache.

the class StreamingWriteTables method expand.

@Override
public WriteResult expand(PCollection<KV<TableDestination, TableRow>> input) {
    // A naive implementation would be to simply stream data directly to BigQuery.
    // However, this could occasionally lead to duplicated data, e.g., when
    // a VM that runs this code is restarted and the code is re-run.
    // The above risk is mitigated in this implementation by relying on
    // BigQuery built-in best effort de-dup mechanism.
    // To use this mechanism, each input TableRow is tagged with a generated
    // unique id, which is then passed to BigQuery and used to ignore duplicates
    // We create 50 keys per BigQuery table to generate output on. This is few enough that we
    // get good batching into BigQuery's insert calls, and enough that we can max out the
    // streaming insert quota.
    PCollection<KV<ShardedKey<String>, TableRowInfo>> tagged = input.apply("ShardTableWrites", ParDo.of(new GenerateShardedTable(50))).setCoder(KvCoder.of(ShardedKeyCoder.of(StringUtf8Coder.of()), TableRowJsonCoder.of())).apply("TagWithUniqueIds", ParDo.of(new TagWithUniqueIds()));
    // To prevent having the same TableRow processed more than once with regenerated
    // different unique ids, this implementation relies on "checkpointing", which is
    // achieved as a side effect of having StreamingWriteFn immediately follow a GBK,
    // performed by Reshuffle.
    TupleTag<Void> mainOutputTag = new TupleTag<>("mainOutput");
    TupleTag<TableRow> failedInsertsTag = new TupleTag<>("failedInserts");
    PCollectionTuple tuple = tagged.setCoder(KvCoder.of(ShardedKeyCoder.of(StringUtf8Coder.of()), TableRowInfoCoder.of())).apply(Reshuffle.<ShardedKey<String>, TableRowInfo>of()).apply("GlobalWindow", Window.<KV<ShardedKey<String>, TableRowInfo>>into(new GlobalWindows()).triggering(DefaultTrigger.of()).discardingFiredPanes()).apply("StreamingWrite", ParDo.of(new StreamingWriteFn(bigQueryServices, retryPolicy, failedInsertsTag)).withOutputTags(mainOutputTag, TupleTagList.of(failedInsertsTag)));
    PCollection<TableRow> failedInserts = tuple.get(failedInsertsTag);
    failedInserts.setCoder(TableRowJsonCoder.of());
    return WriteResult.in(input.getPipeline(), failedInsertsTag, failedInserts);
}
Also used : GlobalWindows(org.apache.beam.sdk.transforms.windowing.GlobalWindows) TupleTag(org.apache.beam.sdk.values.TupleTag) KV(org.apache.beam.sdk.values.KV) TableRow(com.google.api.services.bigquery.model.TableRow) PCollectionTuple(org.apache.beam.sdk.values.PCollectionTuple)

Example 44 with TableRow

use of com.google.api.services.bigquery.model.TableRow in project beam by apache.

the class WriteGroupedRecordsToFiles method processElement.

@ProcessElement
public void processElement(ProcessContext c) throws Exception {
    String tempFilePrefix = c.sideInput(this.tempFilePrefix);
    TableRowWriter writer = new TableRowWriter(tempFilePrefix);
    try (TableRowWriter ignored = writer) {
        for (TableRow tableRow : c.element().getValue()) {
            if (writer.getByteSize() > maxFileSize) {
                writer.close();
                TableRowWriter.Result result = writer.getResult();
                c.output(new WriteBundlesToFiles.Result<>(result.resourceId.toString(), result.byteSize, c.element().getKey().getKey()));
                writer = new TableRowWriter(tempFilePrefix);
            }
            writer.write(tableRow);
        }
    }
    TableRowWriter.Result result = writer.getResult();
    c.output(new WriteBundlesToFiles.Result<>(result.resourceId.toString(), result.byteSize, c.element().getKey().getKey()));
}
Also used : TableRow(com.google.api.services.bigquery.model.TableRow)

Example 45 with TableRow

use of com.google.api.services.bigquery.model.TableRow in project beam by apache.

the class BigQuerySourceBase method createSources.

private List<BoundedSource<TableRow>> createSources(List<ResourceId> files, TableSchema tableSchema) throws IOException, InterruptedException {
    final String jsonSchema = BigQueryIO.JSON_FACTORY.toString(tableSchema);
    SerializableFunction<GenericRecord, TableRow> function = new SerializableFunction<GenericRecord, TableRow>() {

        @Override
        public TableRow apply(GenericRecord input) {
            return BigQueryAvroUtils.convertGenericRecordToTableRow(input, BigQueryHelpers.fromJsonString(jsonSchema, TableSchema.class));
        }
    };
    List<BoundedSource<TableRow>> avroSources = Lists.newArrayList();
    for (ResourceId file : files) {
        avroSources.add(new TransformingSource<>(AvroSource.from(file.toString()), function, getDefaultOutputCoder()));
    }
    return ImmutableList.copyOf(avroSources);
}
Also used : BoundedSource(org.apache.beam.sdk.io.BoundedSource) SerializableFunction(org.apache.beam.sdk.transforms.SerializableFunction) TableSchema(com.google.api.services.bigquery.model.TableSchema) ResourceId(org.apache.beam.sdk.io.fs.ResourceId) TableRow(com.google.api.services.bigquery.model.TableRow) GenericRecord(org.apache.avro.generic.GenericRecord)

Aggregations

TableRow (com.google.api.services.bigquery.model.TableRow)73 Test (org.junit.Test)43 TableReference (com.google.api.services.bigquery.model.TableReference)24 TableSchema (com.google.api.services.bigquery.model.TableSchema)18 Pipeline (org.apache.beam.sdk.Pipeline)16 KV (org.apache.beam.sdk.values.KV)15 TableFieldSchema (com.google.api.services.bigquery.model.TableFieldSchema)14 JsonSchemaToTableSchema (org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.JsonSchemaToTableSchema)14 BigQueryHelpers.toJsonString (org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.toJsonString)13 TestPipeline (org.apache.beam.sdk.testing.TestPipeline)12 BigQueryHelpers.createTempTableReference (org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.createTempTableReference)11 Table (com.google.api.services.bigquery.model.Table)10 HashBasedTable (com.google.common.collect.HashBasedTable)10 JobStatus (com.google.api.services.bigquery.model.JobStatus)9 TableDataInsertAllResponse (com.google.api.services.bigquery.model.TableDataInsertAllResponse)8 ArrayList (java.util.ArrayList)8 List (java.util.List)8 Map (java.util.Map)8 ValueInSingleWindow (org.apache.beam.sdk.values.ValueInSingleWindow)7 JobStatistics (com.google.api.services.bigquery.model.JobStatistics)6