use of org.apache.beam.runners.twister2.Twister2PipelineOptions in project twister2 by DSC-SPIDAL.
the class WordCount method execute.
@Override
public void execute(WorkerEnvironment workerEnv) {
BatchEnvironment env = TSetEnvironment.initBatch(workerEnv);
Config config = env.getConfig();
String input = config.getStringValue("input");
String output = config.getStringValue("output");
System.out.println("Rank " + env.getWorkerID());
Twister2PipelineOptions options = PipelineOptionsFactory.as(Twister2PipelineOptions.class);
options.setTSetEnvironment(env);
options.as(Twister2PipelineOptions.class).setRunner(Twister2LegacyRunner.class);
runWordCount(options, input, output);
}
use of org.apache.beam.runners.twister2.Twister2PipelineOptions in project twister2 by DSC-SPIDAL.
the class MinimalWordCount method execute.
@Override
public void execute(WorkerEnvironment workerEnv) {
BatchEnvironment env = TSetEnvironment.initBatch(workerEnv);
System.out.println("Rank " + env.getWorkerID());
Twister2PipelineOptions options = PipelineOptionsFactory.as(Twister2PipelineOptions.class);
options.setTSetEnvironment(env);
options.as(Twister2PipelineOptions.class).setRunner(Twister2LegacyRunner.class);
// Create the Pipeline object with the options we defined above
Pipeline p = Pipeline.create(options);
// Concept #1: Apply a root transform to the pipeline; in this case, TextIO.Read to read a set
// of input text files. TextIO.Read returns a PCollection where each element is one line from
// the input text (a set of Shakespeare's texts).
// This example reads a public data set consisting of the complete works of Shakespeare.
p.apply(TextIO.read().from("gs://apache-beam-samples/shakespeare/*")).apply(FlatMapElements.into(TypeDescriptors.strings()).via((String word) -> Arrays.asList(word.split("[^\\p{L}]+")))).apply(Filter.by((String word) -> !word.isEmpty())).apply(Count.perElement()).apply(MapElements.into(TypeDescriptors.strings()).via((KV<String, Long> wordCount) -> wordCount.getKey() + ": " + wordCount.getValue())).apply(TextIO.write().to("wordcounts"));
p.run().waitUntilFinish();
}
use of org.apache.beam.runners.twister2.Twister2PipelineOptions in project twister2 by DSC-SPIDAL.
the class ReadSourceTest method execute.
@Override
public void execute(WorkerEnvironment workerEnv) {
BatchEnvironment env = TSetEnvironment.initBatch(workerEnv);
System.out.println("Rank " + env.getWorkerID());
Twister2PipelineOptions options = PipelineOptionsFactory.as(Twister2PipelineOptions.class);
options.setTSetEnvironment(env);
options.as(Twister2PipelineOptions.class).setRunner(Twister2LegacyRunner.class);
String resultPath = "/tmp/testdir";
Pipeline p = Pipeline.create(options);
PCollection<String> result = p.apply(GenerateSequence.from(0).to(10)).apply(ParDo.of(new DoFn<Long, String>() {
@ProcessElement
public void processElement(ProcessContext c) throws Exception {
c.output(c.element().toString());
}
}));
try {
result.apply(TextIO.write().to(new URI(resultPath).getPath() + "/part"));
} catch (URISyntaxException e) {
LOG.info(e.getMessage());
}
p.run();
System.out.println("Result " + result.toString());
}
Aggregations