use of edu.iu.dsc.tws.api.tset.TSetContext in project twister2 by DSC-SPIDAL.
the class TSetTeraSort method execute.
@Override
public void execute(WorkerEnvironment workerEnv) {
BatchEnvironment env = TSetEnvironment.initBatch(workerEnv);
final int parallelism = env.getConfig().getIntegerValue(PARAM_PARALLELISM);
final int dataSize = env.getConfig().getIntegerValue(PARAM_DATA_SIZE_GB);
KeyedSourceTSet<byte[], byte[]> keyedSource = env.createKeyedSource(new SourceFunc<Tuple<byte[], byte[]>>() {
private Queue<byte[]> keys = new LinkedList<>();
private byte[] data = new byte[90];
@Override
public void prepare(TSetContext context) {
Arrays.fill(data, (byte) 1);
Random random = new Random();
int noOfTuples = (int) ((dataSize * 1024 * 1024 * 1024 * 1.0d) / parallelism / 100);
if (context.getIndex() == 0) {
LOG.info(noOfTuples + " tuples will be produced in each source");
}
for (int i = 0; i < noOfTuples; i++) {
byte[] key = new byte[10];
random.nextBytes(key);
keys.add(key);
}
}
@Override
public boolean hasNext() {
return !keys.isEmpty();
}
@Override
public Tuple<byte[], byte[]> next() {
return new Tuple<>(keys.poll(), data);
}
}, parallelism);
keyedSource.keyedGather(new PartitionFunc<byte[]>() {
protected int keysToOneTask;
protected int[] destinationsList;
@Override
public void prepare(Set<Integer> sources, Set<Integer> destinations) {
// considering only most significant bytes of array
int totalPossibilities = 256 * 256;
this.keysToOneTask = (int) Math.ceil(totalPossibilities / (double) destinations.size());
this.destinationsList = new int[destinations.size()];
int index = 0;
for (int i : destinations) {
destinationsList[index++] = i;
}
Arrays.sort(this.destinationsList);
}
int getIndex(byte[] array) {
int key = ((array[0] & 0xff) << 8) + (array[1] & 0xff);
return key / keysToOneTask;
}
@Override
public int partition(int sourceIndex, byte[] val) {
return this.destinationsList[this.getIndex(val)];
}
@Override
public void commit(int source, int partition) {
}
}, (left, right) -> ByteArrayComparator.getInstance().compare(left, right)).useDisk().forEach(new ApplyFunc<Tuple<byte[], Iterator<byte[]>>>() {
private byte[] previousKey;
@Override
public void apply(Tuple<byte[], Iterator<byte[]>> data) {
if (previousKey != null) {
int compare = ByteArrayComparator.getInstance().compare(previousKey, data.getKey());
if (compare > 0) {
LOG.warning("Unsorted keys detected. TeraSort has failed. " + compare);
}
}
previousKey = data.getKey();
}
});
}
use of edu.iu.dsc.tws.api.tset.TSetContext in project twister2 by DSC-SPIDAL.
the class SetSchemaExample method execute.
@Override
public void execute(WorkerEnvironment workerEnv) {
BatchEnvironment env = TSetEnvironment.initBatch(workerEnv);
SourceTSet<Integer> src = env.createSource(new BaseSourceFunc<Integer>() {
private int i = 0;
@Override
public void prepare(TSetContext ctx) {
super.prepare(ctx);
LOG.info("schemas0: " + ctx.getInputSchema() + " -> " + ctx.getOutputSchema());
}
@Override
public boolean hasNext() {
return i == 0;
}
@Override
public Integer next() {
return ++i;
}
}, 2).setName("src");
src.direct().forEach(ii -> LOG.info("out0: " + ii));
src.withSchema(PrimitiveSchemas.INTEGER).direct().forEach(ii -> LOG.info("out1: " + ii));
ComputeTSet<String> map = src.allReduce(Integer::sum).map(new BaseMapFunc<Integer, String>() {
@Override
public void prepare(TSetContext ctx) {
super.prepare(ctx);
LOG.info("schemas1: " + ctx.getInputSchema() + " -> " + ctx.getOutputSchema());
}
@Override
public String map(Integer input) {
return input.toString();
}
});
map.direct().forEach(ii -> LOG.info("out2: " + ii));
map.withSchema(PrimitiveSchemas.STRING).direct().forEach(ii -> LOG.info("out3: " + ii));
KeyedTSet<String, Integer> keyed = map.mapToTuple(new BaseMapFunc<String, Tuple<String, Integer>>() {
@Override
public void prepare(TSetContext ctx) {
super.prepare(ctx);
LOG.info("schemas2: " + ctx.getInputSchema() + " -> " + ctx.getOutputSchema());
}
@Override
public Tuple<String, Integer> map(String input) {
return new Tuple<>(input, Integer.parseInt(input));
}
});
keyed.keyedDirect().forEach(ii -> LOG.info("out4: " + ii));
keyed.withSchema(new KeyedSchema(MessageTypes.STRING, MessageTypes.INTEGER)).keyedDirect().forEach(ii -> LOG.info("out5: " + ii));
}
use of edu.iu.dsc.tws.api.tset.TSetContext in project twister2 by DSC-SPIDAL.
the class CheckpointingExample method execute.
@Override
public void execute(WorkerEnvironment workerEnvironment) {
BatchChkPntEnvironment env = TSetEnvironment.initCheckpointing(workerEnvironment);
int count = 5;
SourceTSet<Integer> src = dummySource(env, count, 100 * env.getWorkerID());
PersistedTSet<Integer> persist = src.direct().persist();
SourceTSet<Integer> src1 = dummySource(env, count, 100 * env.getWorkerID() + 10);
src1.direct().compute(new BaseComputeFunc<Iterator<Integer>, String>() {
private DataPartitionConsumer<Integer> in;
@Override
public void prepare(TSetContext ctx) {
super.prepare(ctx);
in = (DataPartitionConsumer<Integer>) ctx.getInput("in").getConsumer();
}
@Override
public String compute(Iterator<Integer> input) {
StringBuilder out = new StringBuilder();
while (input.hasNext() && in.hasNext()) {
out.append("(").append(input.next()).append(",").append(in.next()).append(") ");
}
return out.toString();
}
}).addInput("in", persist).direct().forEach(i -> LOG.info(i));
persist.direct().forEach(i -> LOG.info(i.toString()));
}
use of edu.iu.dsc.tws.api.tset.TSetContext in project twister2 by DSC-SPIDAL.
the class WordCount method execute.
@Override
public void execute(WorkerEnvironment workerEnvironment) {
StreamingEnvironment cEnv = TSetEnvironment.initStreaming(workerEnvironment);
// create source and aggregator
cEnv.createSource(new SourceFunc<String>() {
// sample words
private List<String> sampleWords = new ArrayList<>();
// the random used to pick he words
private Random random;
@Override
public void prepare(TSetContext context) {
this.random = new Random();
RandomString randomString = new RandomString(MAX_CHARS, random, RandomString.ALPHANUM);
for (int i = 0; i < NO_OF_SAMPLE_WORDS; i++) {
sampleWords.add(randomString.nextRandomSizeString());
}
}
@Override
public boolean hasNext() {
return true;
}
@Override
public String next() {
return sampleWords.get(random.nextInt(sampleWords.size()));
}
}, 4).partition(new HashingPartitioner<>()).sink(new SinkFunc<String>() {
// keep track of the counts
private Map<String, Integer> counts = new HashMap<>();
private TSetContext context;
@Override
public void prepare(TSetContext context) {
this.context = context;
}
@Override
public boolean add(String word) {
int count = 1;
if (counts.containsKey(word)) {
count = counts.get(word);
count++;
}
counts.put(word, count);
LOG.log(Level.INFO, String.format("%d Word %s count %s", context.getIndex(), word, count));
return true;
}
});
// start executing the streaming graph
cEnv.run();
}
Aggregations