use of edu.iu.dsc.tws.tset.fn.HashingPartitioner in project twister2 by DSC-SPIDAL.
the class BTKeyedGatherExample method buildTaskGraph.
@Override
public ComputeGraphBuilder buildTaskGraph() {
List<Integer> taskStages = jobParameters.getTaskStages();
int sourceParallelism = taskStages.get(0);
int sinkParallelism = taskStages.get(1);
MessageType keyType = MessageTypes.INTEGER;
MessageType dataType = MessageTypes.INTEGER_ARRAY;
String edge = "edge";
BaseSource g = new SourceTask(edge, true);
ICompute r = new KeyedGatherGroupedSinkTask();
computeGraphBuilder.addSource(SOURCE, g, sourceParallelism);
computeConnection = computeGraphBuilder.addCompute(SINK, r, sinkParallelism);
computeConnection.keyedGather(SOURCE).viaEdge(edge).withKeyType(keyType).withTaskPartitioner(new HashingPartitioner()).withDataType(dataType);
return computeGraphBuilder;
}
use of edu.iu.dsc.tws.tset.fn.HashingPartitioner in project twister2 by DSC-SPIDAL.
the class WordCount method execute.
@Override
public void execute(WorkerEnvironment workerEnvironment) {
StreamingEnvironment cEnv = TSetEnvironment.initStreaming(workerEnvironment);
// create source and aggregator
cEnv.createSource(new SourceFunc<String>() {
// sample words
private List<String> sampleWords = new ArrayList<>();
// the random used to pick he words
private Random random;
@Override
public void prepare(TSetContext context) {
this.random = new Random();
RandomString randomString = new RandomString(MAX_CHARS, random, RandomString.ALPHANUM);
for (int i = 0; i < NO_OF_SAMPLE_WORDS; i++) {
sampleWords.add(randomString.nextRandomSizeString());
}
}
@Override
public boolean hasNext() {
return true;
}
@Override
public String next() {
return sampleWords.get(random.nextInt(sampleWords.size()));
}
}, 4).partition(new HashingPartitioner<>()).sink(new SinkFunc<String>() {
// keep track of the counts
private Map<String, Integer> counts = new HashMap<>();
private TSetContext context;
@Override
public void prepare(TSetContext context) {
this.context = context;
}
@Override
public boolean add(String word) {
int count = 1;
if (counts.containsKey(word)) {
count = counts.get(word);
count++;
}
counts.put(word, count);
LOG.log(Level.INFO, String.format("%d Word %s count %s", context.getIndex(), word, count));
return true;
}
});
// start executing the streaming graph
cEnv.run();
}
use of edu.iu.dsc.tws.tset.fn.HashingPartitioner in project twister2 by DSC-SPIDAL.
the class FileBasedWordCount method execute.
@Override
public void execute(WorkerEnvironment workerEnv) {
BatchEnvironment env = TSetEnvironment.initBatch(workerEnv);
int sourcePar = (int) env.getConfig().get("PAR");
// read the file line by line by using a single worker
SourceTSet<String> lines = env.createSource(new WordCountFileSource(), 1);
// distribute the lines among the workers and performs a flatmap operation to extract words
ComputeTSet<String> words = lines.partition(new HashingPartitioner<>(), sourcePar).flatmap((FlatMapFunc<String, String>) (l, collector) -> {
StringTokenizer itr = new StringTokenizer(l);
while (itr.hasMoreTokens()) {
collector.collect(itr.nextToken());
}
});
// attach count as 1 for each word
KeyedTSet<String, Integer> groupedWords = words.mapToTuple(w -> new Tuple<>(w, 1));
// performs reduce by key at each worker
KeyedReduceTLink<String, Integer> keyedReduce = groupedWords.keyedReduce(Integer::sum);
// gather the results to worker0 (there is a dummy map op here to pass the values to edges)
// and write to a file
keyedReduce.map(i -> i).gather().forEach(new WordcountFileWriter());
}
Aggregations