use of org.talend.components.bigquery.input.BigQueryInputProperties in project components by Talend.
the class BigQueryBeamRuntimeTestIT method testAllTypesInputOutput.
private void testAllTypesInputOutput(Pipeline pipeline) throws UnsupportedEncodingException {
String tableName = "testalltypes";
BigQueryOutputProperties outputProperties = createOutput(createDatasetFromTable(datastore, datasetName, tableName));
outputProperties.tableOperation.setValue(BigQueryOutputProperties.TableOperation.DROP_IF_EXISTS_AND_CREATE);
Schema schema = new Schema.Parser().parse("{\"type\":\"record\",\"name\":\"BigQuerySchema\",\"fields\":[{\"name\":\"strCol\",\"type\":[\"string\",\"null\"]},{\"name\":\"bytesCol\",\"type\":[\"bytes\",\"null\"]},{\"name\":\"intCol\",\"type\":[\"long\",\"null\"]},{\"name\":\"floatCol\",\"type\":[\"double\",\"null\"]},{\"name\":\"boolCol\",\"type\":[\"boolean\",\"null\"]},{\"name\":\"timestampCol\",\"type\":[{\"type\":\"string\",\"talend.field.dbType\":\"TIMESTAMP\"},\"null\"]},{\"name\":\"dateCol\",\"type\":[{\"type\":\"string\",\"talend.field.dbType\":\"DATE\"},\"null\"]},{\"name\":\"timeCol\",\"type\":[{\"type\":\"string\",\"talend.field.dbType\":\"TIME\"},\"null\"]},{\"name\":\"datetimeCol\",\"type\":[{\"type\":\"string\",\"talend.field.dbType\":\"DATETIME\"},\"null\"]},{\"name\":\"strListCol\",\"type\":{\"type\":\"array\",\"items\":\"string\"}},{\"name\":\"person\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"person\",\"fields\":[{\"name\":\"name\",\"type\":\"string\"},{\"name\":\"age\",\"type\":\"long\"},{\"name\":\"phones\",\"type\":{\"type\":\"array\",\"items\":\"string\"}},{\"name\":\"addresses\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"addresses\",\"fields\":[{\"name\":\"city\",\"type\":\"string\"},{\"name\":\"address\",\"type\":\"string\"}]}}}]}}}]}");
outputProperties.getDatasetProperties().main.schema.setValue(schema);
BigQueryOutputRuntime outputRuntime = new BigQueryOutputRuntime();
outputRuntime.initialize(runtimeContainer, outputProperties);
List<TableRow> rows = new ArrayList<>();
for (int i = 1; i <= 5; i++) {
rows.add(//
new TableRow().set("strCol", //
"strValue" + i).set("bytesCol", //
("bytesCol" + i).getBytes("utf8")).set("intCol", //
i).set("floatCol", //
i * 9.9).set("boolCol", //
i % 3 > 1).set("timestampCol", //
"2016-10-17 15:21:23.135792 UTC").set("dateCol", //
"2016-10-17").set("timeCol", //
"15:21:23.123456").set("datetimeCol", //
"2016-10-17T15:21:23.654321").set("strListCol", //
Arrays.asList("a" + i, "b" + i, "c" + i)).set(//
"person", //
Arrays.asList(//
new TableRow().set("name", //
"n1_" + i).set("age", //
i).set("phones", //
Arrays.asList("111" + i, "222" + i, "333" + i)).set(//
"addresses", //
Arrays.asList(//
new TableRow().set("city", //
"Beijing").set("address", //
"who care" + i), //
new TableRow().set("city", //
"Tianjin").set("address", //
"don't know"))), //
//
new TableRow().set("name", //
"n2_" + i).set("age", //
i).set("phones", //
Arrays.asList("111" + i, "222" + i, "333" + i)).set(//
"addresses", //
Arrays.asList(//
new TableRow().set("city", //
"Beijing").set("address", //
"I care" + i), //
new TableRow().set("city", //
"Tianjin").set("address", //
"I know"))))));
}
pipeline.apply(Create.of(rows).withCoder(TableRowJsonCoder.of())).apply(ParDo.of(new BigQueryInputRuntime.TableRowToIndexedRecordFn(schema))).setCoder(LazyAvroCoder.of()).apply(outputRuntime);
pipeline.run().waitUntilFinish();
// finish output
// start input
BigQueryInputProperties inputProperties = createInput(createDatasetFromTable(datastore, datasetName, tableName));
BigQueryInputRuntime inputRuntime = new BigQueryInputRuntime();
inputRuntime.initialize(runtimeContainer, inputProperties);
PCollection<TableRow> tableRowPCollection = pipeline.apply(inputRuntime).apply(ParDo.of(new BigQueryOutputRuntime.IndexedRecordToTableRowFn())).setCoder(TableRowJsonCoder.of());
PAssert.that(tableRowPCollection).containsInAnyOrder(rows);
pipeline.run().waitUntilFinish();
}
use of org.talend.components.bigquery.input.BigQueryInputProperties in project components by Talend.
the class BigQueryTestConstants method createInput.
public static BigQueryInputProperties createInput(BigQueryDatasetProperties dataset) {
BigQueryInputProperties input = new BigQueryInputProperties("input");
input.init();
input.setDatasetProperties(dataset);
return input;
}
use of org.talend.components.bigquery.input.BigQueryInputProperties in project components by Talend.
the class BigQueryDatasetRuntime method getSample.
@Override
public void getSample(int limit, Consumer<IndexedRecord> consumer) {
// Create a pipeline using the input component to get records.
DirectOptions options = BeamLocalRunnerOption.getOptions();
final Pipeline p = Pipeline.create(options);
// Create an input runtime based on the properties.
BigQueryInputRuntime inputRuntime = new BigQueryInputRuntime();
BigQueryInputProperties inputProperties = new BigQueryInputProperties(null);
inputProperties.init();
inputProperties.setDatasetProperties(properties);
inputRuntime.initialize(new BeamJobRuntimeContainer(options), inputProperties);
try (DirectConsumerCollector<IndexedRecord> collector = DirectConsumerCollector.of(consumer)) {
// Collect a sample of the input records.
//
p.apply(inputRuntime).apply(Sample.<IndexedRecord>any(limit)).apply(collector);
PipelineResult pr = p.run();
pr.waitUntilFinish();
}
}
Aggregations