use of edu.iu.dsc.tws.tset.env.BatchEnvironment in project twister2 by DSC-SPIDAL.
the class TSetCachingExample method execute.
@Override
public void execute(WorkerEnvironment workerEnv) {
BatchEnvironment env = TSetEnvironment.initBatch(workerEnv);
LOG.info(String.format("Hello from worker %d", env.getWorkerID()));
SourceTSet<Integer> sourceX = env.createSource(new SourceFunc<Integer>() {
private int count = 0;
@Override
public boolean hasNext() {
return count < 10;
}
@Override
public Integer next() {
return count++;
}
}, 4);
ComputeTSet<Object> twoComputes = sourceX.direct().compute((itr, c) -> {
itr.forEachRemaining(i -> {
c.collect(i * 5);
});
}).direct().compute((itr, c) -> {
itr.forEachRemaining(i -> {
c.collect((int) i + 2);
});
});
CachedTSet<Object> cached = twoComputes.cache();
// when cache is called, twister2 will run everything upto this point and cache the result
// into the memory. Cached TSets can be added as inputs for other TSets and operations.
SourceTSet<Integer> sourceZ = env.createSource(new SourceFunc<Integer>() {
private int count = 0;
@Override
public boolean hasNext() {
return count < 10;
}
@Override
public Integer next() {
return count++;
}
}, 4);
ComputeTSet<Integer> calc = sourceZ.direct().compute(new ComputeCollectorFunc<Iterator<Integer>, Integer>() {
private DataPartitionConsumer<Integer> xValues;
@Override
public void prepare(TSetContext context) {
this.xValues = (DataPartitionConsumer<Integer>) context.getInput("x").getConsumer();
}
@Override
public void compute(Iterator<Integer> zValues, RecordCollector<Integer> output) {
while (zValues.hasNext()) {
output.collect(xValues.next() + zValues.next());
}
}
});
calc.addInput("x", cached);
calc.direct().forEach(i -> {
LOG.info("(x * 5) + 2 + z =" + i);
});
}
use of edu.iu.dsc.tws.tset.env.BatchEnvironment in project twister2 by DSC-SPIDAL.
the class TSetCommunicationExample method execute.
@Override
public void execute(WorkerEnvironment workerEnv) {
BatchEnvironment env = TSetEnvironment.initBatch(workerEnv);
LOG.info(String.format("Hello from worker %d", env.getWorkerID()));
SourceTSet<Integer> sourceX = env.createSource(new SourceFunc<Integer>() {
private int count = 0;
@Override
public boolean hasNext() {
return count < 10;
}
@Override
public Integer next() {
return count++;
}
}, 4);
sourceX.direct().compute((itr, collector) -> {
itr.forEachRemaining(i -> {
collector.collect(i * 5);
});
}).direct().compute((itr, collector) -> {
itr.forEachRemaining(i -> {
collector.collect((int) i + 2);
});
}).reduce((i1, i2) -> {
return (int) i1 + (int) i2;
}).forEach(i -> {
LOG.info("SUM=" + i);
});
}
use of edu.iu.dsc.tws.tset.env.BatchEnvironment in project twister2 by DSC-SPIDAL.
the class TSetGatherExample method execute.
@Override
public void execute(WorkerEnvironment workerEnv) {
super.execute(workerEnv);
BatchEnvironment env = TSetEnvironment.initBatch(workerEnv);
// set the parallelism of source to task stage 0
int srcPara = jobParameters.getTaskStages().get(0);
int sinkPara = jobParameters.getTaskStages().get(1);
SourceTSet<int[]> source = env.createSource(new TestBaseSource(), srcPara).setName("Source");
GatherTLink<int[]> gather = source.gather();
SinkTSet<Iterator<Tuple<Integer, int[]>>> sink = gather.sink((SinkFunc<Iterator<Tuple<Integer, int[]>>>) val -> {
int[] value = null;
while (val.hasNext()) {
value = val.next().getValue();
}
experimentData.setOutput(value);
LOG.info("Results " + Arrays.toString(value));
try {
verify(OperationNames.GATHER);
} catch (VerificationException e) {
LOG.info("Exception Message : " + e.getMessage());
}
return true;
});
env.run(sink);
}
use of edu.iu.dsc.tws.tset.env.BatchEnvironment in project twister2 by DSC-SPIDAL.
the class WordCount method execute.
@Override
public void execute(WorkerEnvironment workerEnv) {
BatchEnvironment env = TSetEnvironment.initBatch(workerEnv);
Config config = env.getConfig();
String input = config.getStringValue("input");
String output = config.getStringValue("output");
System.out.println("Rank " + env.getWorkerID());
Twister2PipelineOptions options = PipelineOptionsFactory.as(Twister2PipelineOptions.class);
options.setTSetEnvironment(env);
options.as(Twister2PipelineOptions.class).setRunner(Twister2LegacyRunner.class);
runWordCount(options, input, output);
}
use of edu.iu.dsc.tws.tset.env.BatchEnvironment in project twister2 by DSC-SPIDAL.
the class MinimalWordCount method execute.
@Override
public void execute(WorkerEnvironment workerEnv) {
BatchEnvironment env = TSetEnvironment.initBatch(workerEnv);
System.out.println("Rank " + env.getWorkerID());
Twister2PipelineOptions options = PipelineOptionsFactory.as(Twister2PipelineOptions.class);
options.setTSetEnvironment(env);
options.as(Twister2PipelineOptions.class).setRunner(Twister2LegacyRunner.class);
// Create the Pipeline object with the options we defined above
Pipeline p = Pipeline.create(options);
// Concept #1: Apply a root transform to the pipeline; in this case, TextIO.Read to read a set
// of input text files. TextIO.Read returns a PCollection where each element is one line from
// the input text (a set of Shakespeare's texts).
// This example reads a public data set consisting of the complete works of Shakespeare.
p.apply(TextIO.read().from("gs://apache-beam-samples/shakespeare/*")).apply(FlatMapElements.into(TypeDescriptors.strings()).via((String word) -> Arrays.asList(word.split("[^\\p{L}]+")))).apply(Filter.by((String word) -> !word.isEmpty())).apply(Count.perElement()).apply(MapElements.into(TypeDescriptors.strings()).via((KV<String, Long> wordCount) -> wordCount.getKey() + ": " + wordCount.getValue())).apply(TextIO.write().to("wordcounts"));
p.run().waitUntilFinish();
}
Aggregations