use of org.apache.beam.sdk.io.synthetic.SyntheticSourceOptions in project beam by apache.
the class SyntheticDataPublisher method main.
public static void main(String[] args) throws IOException {
options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
SyntheticSourceOptions sourceOptions = SyntheticOptions.fromJsonString(options.getSourceOptions(), SyntheticSourceOptions.class);
Pipeline pipeline = Pipeline.create(options);
PCollection<KV<byte[], byte[]>> syntheticData = pipeline.apply("Read synthetic data", Read.from(new SyntheticBoundedSource(sourceOptions)));
if (options.getKafkaBootstrapServerAddress() != null && options.getKafkaTopic() != null) {
writeToKafka(syntheticData);
}
if (options.getPubSubTopic() != null) {
writeToPubSub(syntheticData);
}
if (allKinesisOptionsConfigured()) {
writeToKinesis(syntheticData);
}
pipeline.run().waitUntilFinish();
}
use of org.apache.beam.sdk.io.synthetic.SyntheticSourceOptions in project beam by apache.
the class CoGroupByKeyLoadTest method loadTest.
@Override
void loadTest() throws IOException {
SyntheticSourceOptions coSourceOptions = fromJsonString(options.getCoSourceOptions(), SyntheticSourceOptions.class);
Optional<SyntheticStep> syntheticStep = createStep(options.getStepOptions());
PCollection<KV<byte[], byte[]>> input = pipeline.apply("Read input", readFromSource(sourceOptions));
input = input.apply("Collect start time metrics (input)", ParDo.of(runtimeMonitor));
input = applyWindowing(input);
input = applyStepIfPresent(input, "Synthetic step for input", syntheticStep);
PCollection<KV<byte[], byte[]>> coInput = pipeline.apply("Read co-input", readFromSource(coSourceOptions));
coInput = coInput.apply("Collect start time metrics (co-input)", ParDo.of(runtimeMonitor));
coInput = applyWindowing(coInput, options.getCoInputWindowDurationSec());
coInput = applyStepIfPresent(coInput, "Synthetic step for co-input", syntheticStep);
KeyedPCollectionTuple.of(INPUT_TAG, input).and(CO_INPUT_TAG, coInput).apply("CoGroupByKey", CoGroupByKey.create()).apply("Ungroup and reiterate", ParDo.of(new UngroupAndReiterate(options.getIterations()))).apply("Collect total bytes", ParDo.of(new ByteMonitor(METRICS_NAMESPACE, "totalBytes.count"))).apply("Collect end time metrics", ParDo.of(runtimeMonitor));
}
Aggregations