use of org.apache.beam.runners.direct.DirectOptions in project components by Talend.
the class S3DatasetRuntime method getSample.
@Override
public void getSample(int limit, Consumer<IndexedRecord> consumer) {
// Create an input runtime based on the properties.
S3InputRuntime inputRuntime = new S3InputRuntime();
S3InputProperties inputProperties = new S3InputProperties(null);
inputProperties.limit.setValue(limit);
inputProperties.init();
inputProperties.setDatasetProperties(properties);
inputRuntime.initialize(null, inputProperties);
// Create a pipeline using the input component to get records.
DirectOptions options = BeamLocalRunnerOption.getOptions();
final Pipeline p = Pipeline.create(options);
try (DirectConsumerCollector<IndexedRecord> collector = DirectConsumerCollector.of(consumer)) {
// Collect a sample of the input records.
//
p.apply(inputRuntime).apply(//
Sample.<IndexedRecord>any(limit)).apply(collector);
p.run().waitUntilFinish();
}
}
use of org.apache.beam.runners.direct.DirectOptions in project components by Talend.
the class KinesisDatasetRuntime method getSample.
@Override
public void getSample(int limit, Consumer<IndexedRecord> consumer) {
// Create an input runtime based on the properties.
KinesisInputRuntime inputRuntime = new KinesisInputRuntime();
KinesisInputProperties inputProperties = new KinesisInputProperties(null);
inputProperties.init();
inputProperties.setDatasetProperties(properties);
inputProperties.useMaxNumRecords.setValue(true);
inputProperties.maxNumRecords.setValue(limit);
inputProperties.useMaxReadTime.setValue(true);
inputProperties.maxReadTime.setValue(10000l);
inputProperties.position.setValue(KinesisInputProperties.OffsetType.EARLIEST);
inputRuntime.initialize(null, inputProperties);
// Create a pipeline using the input component to get records.
DirectOptions options = BeamLocalRunnerOption.getOptions();
final Pipeline p = Pipeline.create(options);
try (DirectConsumerCollector<IndexedRecord> collector = DirectConsumerCollector.of(consumer)) {
// Collect a sample of the input records.
p.apply(//
inputRuntime).apply(Sample.<IndexedRecord>any(limit)).apply(collector);
p.run().waitUntilFinish();
}
}
use of org.apache.beam.runners.direct.DirectOptions in project components by Talend.
the class PubSubDatasetRuntime method getSample.
@Override
public void getSample(int limit, Consumer<IndexedRecord> consumer) {
// Because PubSub do not have offset, and the message will be deleted after
// read, so have to create a dumy reader which do not call ack after read
// Create an input runtime based on the properties.
PubSubInputRuntime inputRuntime = new PubSubInputRuntime();
PubSubInputProperties inputProperties = new PubSubInputProperties(null);
inputProperties.init();
inputProperties.setDatasetProperties(properties);
inputProperties.useMaxNumRecords.setValue(true);
inputProperties.maxNumRecords.setValue(limit);
inputProperties.useMaxReadTime.setValue(true);
// 10s, the value is better to depends on ack deadline for small dataset
inputProperties.maxReadTime.setValue(10000l);
inputProperties.noACK.setValue(true);
inputRuntime.initialize(null, inputProperties);
DirectOptions options = BeamLocalRunnerOption.getOptions();
final Pipeline p = Pipeline.create(options);
try (DirectConsumerCollector<IndexedRecord> collector = DirectConsumerCollector.of(consumer)) {
// Collect a sample of the input records.
//
p.apply(inputRuntime).apply(Sample.<IndexedRecord>any(limit)).apply(collector);
p.run().waitUntilFinish();
}
}
use of org.apache.beam.runners.direct.DirectOptions in project components by Talend.
the class ElasticsearchDatasetRuntime method getSample.
@Override
public void getSample(int limit, Consumer<IndexedRecord> consumer) {
// Create an input runtime based on the properties.
ElasticsearchInputRuntime inputRuntime = new ElasticsearchInputRuntime();
ElasticsearchInputProperties inputProperties = new ElasticsearchInputProperties(null);
inputProperties.init();
inputProperties.setDatasetProperties(properties);
inputRuntime.initialize(null, inputProperties);
DirectOptions options = BeamLocalRunnerOption.getOptions();
final Pipeline p = Pipeline.create(options);
try (DirectConsumerCollector<IndexedRecord> collector = DirectConsumerCollector.of(consumer)) {
// Collect a sample of the input records.
//
p.apply(inputRuntime).apply(Sample.<IndexedRecord>any(limit)).apply(collector);
p.run().waitUntilFinish();
}
}
use of org.apache.beam.runners.direct.DirectOptions in project components by Talend.
the class BigQueryDatasetRuntime method getSample.
@Override
public void getSample(int limit, Consumer<IndexedRecord> consumer) {
// Create a pipeline using the input component to get records.
DirectOptions options = BeamLocalRunnerOption.getOptions();
final Pipeline p = Pipeline.create(options);
// Create an input runtime based on the properties.
BigQueryInputRuntime inputRuntime = new BigQueryInputRuntime();
BigQueryInputProperties inputProperties = new BigQueryInputProperties(null);
inputProperties.init();
inputProperties.setDatasetProperties(properties);
inputRuntime.initialize(new BeamJobRuntimeContainer(options), inputProperties);
try (DirectConsumerCollector<IndexedRecord> collector = DirectConsumerCollector.of(consumer)) {
// Collect a sample of the input records.
//
p.apply(inputRuntime).apply(Sample.<IndexedRecord>any(limit)).apply(collector);
PipelineResult pr = p.run();
pr.waitUntilFinish();
}
}
Aggregations