use of edu.iu.dsc.tws.tset.sets.batch.KeyedTSet in project twister2 by DSC-SPIDAL.
the class BranchingExample method execute.
@Override
public void execute(WorkerEnvironment workerEnv) {
BatchEnvironment env = TSetEnvironment.initBatch(workerEnv);
int para = 2;
SourceTSet<Integer> src = dummySource(env, COUNT, para).setName("src0");
KeyedTSet<Integer, Integer> left = src.mapToTuple(i -> new Tuple<>(i % 2, i)).setName("left");
KeyedTSet<Integer, Integer> right = src.mapToTuple(i -> new Tuple<>(i % 2, i + 1)).setName("right");
JoinTLink<Integer, Integer, Integer> join = left.join(right, CommunicationContext.JoinType.INNER, Integer::compareTo).setName("join");
ComputeTSet<String> map = join.map(t -> "(" + t.getKey() + " " + t.getLeftValue() + " " + t.getRightValue() + ")").setName("map***");
ComputeTSet<String> map1 = map.direct().map(s -> "###" + s).setName("map@@");
ComputeTSet<String> union = map.union(map1).setName("union");
union.direct().forEach(s -> LOG.info(s));
}
use of edu.iu.dsc.tws.tset.sets.batch.KeyedTSet in project twister2 by DSC-SPIDAL.
the class FileBasedWordCount method execute.
@Override
public void execute(WorkerEnvironment workerEnv) {
BatchEnvironment env = TSetEnvironment.initBatch(workerEnv);
int sourcePar = (int) env.getConfig().get("PAR");
// read the file line by line by using a single worker
SourceTSet<String> lines = env.createSource(new WordCountFileSource(), 1);
// distribute the lines among the workers and performs a flatmap operation to extract words
ComputeTSet<String> words = lines.partition(new HashingPartitioner<>(), sourcePar).flatmap((FlatMapFunc<String, String>) (l, collector) -> {
StringTokenizer itr = new StringTokenizer(l);
while (itr.hasMoreTokens()) {
collector.collect(itr.nextToken());
}
});
// attach count as 1 for each word
KeyedTSet<String, Integer> groupedWords = words.mapToTuple(w -> new Tuple<>(w, 1));
// performs reduce by key at each worker
KeyedReduceTLink<String, Integer> keyedReduce = groupedWords.keyedReduce(Integer::sum);
// gather the results to worker0 (there is a dummy map op here to pass the values to edges)
// and write to a file
keyedReduce.map(i -> i).gather().forEach(new WordcountFileWriter());
}
use of edu.iu.dsc.tws.tset.sets.batch.KeyedTSet in project twister2 by DSC-SPIDAL.
the class JoinExample method execute.
@Override
public void execute(WorkerEnvironment workerEnv) {
BatchEnvironment env = TSetEnvironment.initBatch(workerEnv);
int para = 2;
int workerID = env.getWorkerID();
SourceTSet<Integer> src0 = dummySource(env, COUNT, para).setName("src0");
KeyedTSet<Integer, Integer> left = src0.mapToTuple(i -> new Tuple<>(i % 2, i)).setName("left");
left.keyedDirect().forEach(i -> LOG.info(workerID + "L " + i.toString()));
SourceTSet<Integer> src1 = dummySource(env, COUNT, para).setName("src1");
KeyedTSet<Integer, Integer> right = src1.mapToTuple(i -> new Tuple<>(i % 2, i)).setName("right");
right.keyedDirect().forEach(i -> LOG.info(workerID + "R " + i.toString()));
JoinTLink<Integer, Integer, Integer> join = left.join(right, CommunicationContext.JoinType.INNER, Integer::compareTo).setName("join");
join.forEach(t -> LOG.info(workerID + "out: " + t.toString()));
}
Aggregations