Search in sources :

Example 1 with BigQueryInputProperties

use of org.talend.components.bigquery.input.BigQueryInputProperties in project components by Talend.

the class BigQueryBeamRuntimeTestIT method testAllTypesInputOutput.

private void testAllTypesInputOutput(Pipeline pipeline) throws UnsupportedEncodingException {
    String tableName = "testalltypes";
    BigQueryOutputProperties outputProperties = createOutput(createDatasetFromTable(datastore, datasetName, tableName));
    outputProperties.tableOperation.setValue(BigQueryOutputProperties.TableOperation.DROP_IF_EXISTS_AND_CREATE);
    Schema schema = new Schema.Parser().parse("{\"type\":\"record\",\"name\":\"BigQuerySchema\",\"fields\":[{\"name\":\"strCol\",\"type\":[\"string\",\"null\"]},{\"name\":\"bytesCol\",\"type\":[\"bytes\",\"null\"]},{\"name\":\"intCol\",\"type\":[\"long\",\"null\"]},{\"name\":\"floatCol\",\"type\":[\"double\",\"null\"]},{\"name\":\"boolCol\",\"type\":[\"boolean\",\"null\"]},{\"name\":\"timestampCol\",\"type\":[{\"type\":\"string\",\"talend.field.dbType\":\"TIMESTAMP\"},\"null\"]},{\"name\":\"dateCol\",\"type\":[{\"type\":\"string\",\"talend.field.dbType\":\"DATE\"},\"null\"]},{\"name\":\"timeCol\",\"type\":[{\"type\":\"string\",\"talend.field.dbType\":\"TIME\"},\"null\"]},{\"name\":\"datetimeCol\",\"type\":[{\"type\":\"string\",\"talend.field.dbType\":\"DATETIME\"},\"null\"]},{\"name\":\"strListCol\",\"type\":{\"type\":\"array\",\"items\":\"string\"}},{\"name\":\"person\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"person\",\"fields\":[{\"name\":\"name\",\"type\":\"string\"},{\"name\":\"age\",\"type\":\"long\"},{\"name\":\"phones\",\"type\":{\"type\":\"array\",\"items\":\"string\"}},{\"name\":\"addresses\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"addresses\",\"fields\":[{\"name\":\"city\",\"type\":\"string\"},{\"name\":\"address\",\"type\":\"string\"}]}}}]}}}]}");
    outputProperties.getDatasetProperties().main.schema.setValue(schema);
    BigQueryOutputRuntime outputRuntime = new BigQueryOutputRuntime();
    outputRuntime.initialize(runtimeContainer, outputProperties);
    List<TableRow> rows = new ArrayList<>();
    for (int i = 1; i <= 5; i++) {
        rows.add(// 
        new TableRow().set("strCol", // 
        "strValue" + i).set("bytesCol", // 
        ("bytesCol" + i).getBytes("utf8")).set("intCol", // 
        i).set("floatCol", // 
        i * 9.9).set("boolCol", // 
        i % 3 > 1).set("timestampCol", // 
        "2016-10-17 15:21:23.135792 UTC").set("dateCol", // 
        "2016-10-17").set("timeCol", // 
        "15:21:23.123456").set("datetimeCol", // 
        "2016-10-17T15:21:23.654321").set("strListCol", // 
        Arrays.asList("a" + i, "b" + i, "c" + i)).set(// 
        "person", // 
        Arrays.asList(// 
        new TableRow().set("name", // 
        "n1_" + i).set("age", // 
        i).set("phones", // 
        Arrays.asList("111" + i, "222" + i, "333" + i)).set(// 
        "addresses", // 
        Arrays.asList(// 
        new TableRow().set("city", // 
        "Beijing").set("address", // 
        "who care" + i), // 
        new TableRow().set("city", // 
        "Tianjin").set("address", // 
        "don't know"))), // 
        // 
        new TableRow().set("name", // 
        "n2_" + i).set("age", // 
        i).set("phones", // 
        Arrays.asList("111" + i, "222" + i, "333" + i)).set(// 
        "addresses", // 
        Arrays.asList(// 
        new TableRow().set("city", // 
        "Beijing").set("address", // 
        "I care" + i), // 
        new TableRow().set("city", // 
        "Tianjin").set("address", // 
        "I know"))))));
    }
    pipeline.apply(Create.of(rows).withCoder(TableRowJsonCoder.of())).apply(ParDo.of(new BigQueryInputRuntime.TableRowToIndexedRecordFn(schema))).setCoder(LazyAvroCoder.of()).apply(outputRuntime);
    pipeline.run().waitUntilFinish();
    // finish output
    // start input
    BigQueryInputProperties inputProperties = createInput(createDatasetFromTable(datastore, datasetName, tableName));
    BigQueryInputRuntime inputRuntime = new BigQueryInputRuntime();
    inputRuntime.initialize(runtimeContainer, inputProperties);
    PCollection<TableRow> tableRowPCollection = pipeline.apply(inputRuntime).apply(ParDo.of(new BigQueryOutputRuntime.IndexedRecordToTableRowFn())).setCoder(TableRowJsonCoder.of());
    PAssert.that(tableRowPCollection).containsInAnyOrder(rows);
    pipeline.run().waitUntilFinish();
}
Also used : Schema(org.apache.avro.Schema) ArrayList(java.util.ArrayList) BigQueryOutputProperties(org.talend.components.bigquery.output.BigQueryOutputProperties) TableRow(com.google.api.services.bigquery.model.TableRow) BigQueryInputProperties(org.talend.components.bigquery.input.BigQueryInputProperties)

Example 2 with BigQueryInputProperties

use of org.talend.components.bigquery.input.BigQueryInputProperties in project components by Talend.

the class BigQueryTestConstants method createInput.

public static BigQueryInputProperties createInput(BigQueryDatasetProperties dataset) {
    BigQueryInputProperties input = new BigQueryInputProperties("input");
    input.init();
    input.setDatasetProperties(dataset);
    return input;
}
Also used : BigQueryInputProperties(org.talend.components.bigquery.input.BigQueryInputProperties)

Example 3 with BigQueryInputProperties

use of org.talend.components.bigquery.input.BigQueryInputProperties in project components by Talend.

the class BigQueryDatasetRuntime method getSample.

@Override
public void getSample(int limit, Consumer<IndexedRecord> consumer) {
    // Create a pipeline using the input component to get records.
    DirectOptions options = BeamLocalRunnerOption.getOptions();
    final Pipeline p = Pipeline.create(options);
    // Create an input runtime based on the properties.
    BigQueryInputRuntime inputRuntime = new BigQueryInputRuntime();
    BigQueryInputProperties inputProperties = new BigQueryInputProperties(null);
    inputProperties.init();
    inputProperties.setDatasetProperties(properties);
    inputRuntime.initialize(new BeamJobRuntimeContainer(options), inputProperties);
    try (DirectConsumerCollector<IndexedRecord> collector = DirectConsumerCollector.of(consumer)) {
        // Collect a sample of the input records.
        // 
        p.apply(inputRuntime).apply(Sample.<IndexedRecord>any(limit)).apply(collector);
        PipelineResult pr = p.run();
        pr.waitUntilFinish();
    }
}
Also used : BeamJobRuntimeContainer(org.talend.components.adapter.beam.BeamJobRuntimeContainer) IndexedRecord(org.apache.avro.generic.IndexedRecord) PipelineResult(org.apache.beam.sdk.PipelineResult) BigQueryInputProperties(org.talend.components.bigquery.input.BigQueryInputProperties) DirectOptions(org.apache.beam.runners.direct.DirectOptions) Pipeline(org.apache.beam.sdk.Pipeline)

Aggregations

BigQueryInputProperties (org.talend.components.bigquery.input.BigQueryInputProperties)3 TableRow (com.google.api.services.bigquery.model.TableRow)1 ArrayList (java.util.ArrayList)1 Schema (org.apache.avro.Schema)1 IndexedRecord (org.apache.avro.generic.IndexedRecord)1 DirectOptions (org.apache.beam.runners.direct.DirectOptions)1 Pipeline (org.apache.beam.sdk.Pipeline)1 PipelineResult (org.apache.beam.sdk.PipelineResult)1 BeamJobRuntimeContainer (org.talend.components.adapter.beam.BeamJobRuntimeContainer)1 BigQueryOutputProperties (org.talend.components.bigquery.output.BigQueryOutputProperties)1