use of edu.iu.dsc.tws.api.tset.fn.MapFunc in project twister2 by DSC-SPIDAL.
the class ArrowTSetSourceExample method execute.
@Override
public void execute(WorkerEnvironment workerEnv) {
BatchEnvironment env = TSetEnvironment.initBatch(workerEnv);
Config config = env.getConfig();
String csvInputDirectory = config.getStringValue(DataObjectConstants.DINPUT_DIRECTORY);
String arrowInputDirectory = config.getStringValue(DataObjectConstants.ARROW_DIRECTORY);
String arrowFileName = config.getStringValue(DataObjectConstants.FILE_NAME);
int workers = config.getIntegerValue(DataObjectConstants.WORKERS);
int parallel = config.getIntegerValue(DataObjectConstants.PARALLELISM_VALUE);
int dsize = config.getIntegerValue(DataObjectConstants.DSIZE);
LOG.info("arrow input file:" + arrowFileName + "\t" + arrowInputDirectory + "\t" + csvInputDirectory + "\t" + workers + "\t" + parallel);
Schema schema = makeSchema();
SourceTSet<String[]> csvSource = env.createCSVSource(csvInputDirectory, dsize, parallel, "split");
SinkTSet<Iterator<Integer>> sinkTSet = csvSource.direct().map((MapFunc<String[], Integer>) input -> Integer.parseInt(input[0])).direct().sink(new ArrowBasedSinkFunction<>(arrowInputDirectory, arrowFileName, schema.toJson()));
env.run(sinkTSet);
// Source Function Call
env.createArrowSource(arrowInputDirectory, arrowFileName, parallel, schema.toJson()).direct().compute((ComputeFunc<Iterator<Object>, List<Integer>>) input -> {
List<Integer> integers = new ArrayList<>();
input.forEachRemaining(i -> integers.add((Integer) i));
return integers;
}).direct().forEach(s -> LOG.info("Integer Array Size:" + s.size() + "\tvalues:" + s));
}
use of edu.iu.dsc.tws.api.tset.fn.MapFunc in project twister2 by DSC-SPIDAL.
the class KGatherUngroupedExample method execute.
@Override
public void execute(WorkerEnvironment workerEnv) {
BatchEnvironment env = TSetEnvironment.initBatch(workerEnv);
SourceTSet<Integer> src = dummySource(env, COUNT, PARALLELISM);
KeyedGatherUngroupedTLink<Integer, Integer> klink = src.mapToTuple(i -> new Tuple<>(i % 4, i)).keyedGatherUngrouped();
LOG.info("test foreach");
klink.forEach((ApplyFunc<Tuple<Integer, Integer>>) data -> LOG.info(data.getKey() + " -> " + data.getValue()));
LOG.info("test map");
klink.map((MapFunc<Tuple<Integer, Integer>, String>) input -> input.getKey() + " -> " + input.getValue()).direct().forEach(s -> LOG.info("map: " + s));
LOG.info("test compute");
klink.compute((ComputeFunc<Iterator<Tuple<Integer, Integer>>, String>) input -> {
StringBuilder sb = new StringBuilder();
while (input.hasNext()) {
Tuple<Integer, Integer> next = input.next();
sb.append("[").append(next.getKey()).append("->").append(next.getValue()).append("]");
}
return sb.toString();
}).direct().forEach(s -> LOG.info("compute: " + s));
LOG.info("test computec");
klink.compute((ComputeCollectorFunc<Iterator<Tuple<Integer, Integer>>, String>) (input, output) -> {
while (input.hasNext()) {
Tuple<Integer, Integer> next = input.next();
output.collect(next.getKey() + " -> " + next.getValue() * 2);
}
}).direct().forEach(s -> LOG.info("computec: " + s));
}
use of edu.iu.dsc.tws.api.tset.fn.MapFunc in project twister2 by DSC-SPIDAL.
the class HadoopTSet method execute.
@Override
public void execute(Config config, JobAPI.Job job, IWorkerController workerController, IPersistentVolume persistentVolume, IVolatileVolume volatileVolume) {
int workerId = workerController.getWorkerInfo().getWorkerID();
WorkerEnvironment workerEnv = WorkerEnvironment.init(config, job, workerController, persistentVolume, volatileVolume);
BatchEnvironment tSetEnv = TSetEnvironment.initBatch(workerEnv);
Configuration configuration = new Configuration();
configuration.addResource(new Path(HdfsDataContext.getHdfsConfigDirectory(config)));
configuration.set(TextInputFormat.INPUT_DIR, "/input4");
SourceTSet<String> source = tSetEnv.createHadoopSource(configuration, TextInputFormat.class, 4, new MapFunc<Tuple<LongWritable, Text>, String>() {
@Override
public String map(Tuple<LongWritable, Text> input) {
return input.getKey().toString() + " : " + input.getValue().toString();
}
});
SinkTSet<Iterator<String>> sink = source.direct().sink((SinkFunc<Iterator<String>>) value -> {
while (value.hasNext()) {
String next = value.next();
LOG.info("Received value: " + next);
}
return true;
});
tSetEnv.run(sink);
}
use of edu.iu.dsc.tws.api.tset.fn.MapFunc in project twister2 by DSC-SPIDAL.
the class HelloTSet method execute.
@Override
public void execute(WorkerEnvironment workerEnv) {
BatchEnvironment env = TSetEnvironment.initBatch(workerEnv);
LOG.info("Strating Hello TSet Example");
int para = env.getConfig().getIntegerValue("para", 4);
SourceTSet<int[]> source = env.createSource(new SourceFunc<int[]>() {
private int count = 0;
@Override
public boolean hasNext() {
return count < para;
}
@Override
public int[] next() {
count++;
return new int[] { 1, 1, 1 };
}
}, para).setName("source");
PartitionTLink<int[]> partitioned = source.partition(new LoadBalancePartitioner<>());
ComputeTSet<int[]> mapedPartition = partitioned.map((MapFunc<int[], int[]>) input -> Arrays.stream(input).map(a -> a * 2).toArray());
ReduceTLink<int[]> reduce = mapedPartition.reduce((t1, t2) -> {
int[] ret = new int[t1.length];
for (int i = 0; i < t1.length; i++) {
ret[i] = t1[i] + t2[i];
}
return ret;
});
SinkTSet<int[]> sink = reduce.sink(value -> {
LOG.info("Results " + Arrays.toString(value));
return false;
});
env.run(sink);
LOG.info("Ending Hello TSet Example");
}
use of edu.iu.dsc.tws.api.tset.fn.MapFunc in project twister2 by DSC-SPIDAL.
the class KGatherExample method execute.
@Override
public void execute(WorkerEnvironment workerEnv) {
BatchEnvironment env = TSetEnvironment.initBatch(workerEnv);
int start = env.getWorkerID() * 100;
SourceTSet<Integer> src = dummySource(env, start, COUNT, PARALLELISM);
KeyedGatherTLink<Integer, Integer> klink = src.mapToTuple(i -> new Tuple<>(i % 10, i)).keyedGather();
LOG.info("test foreach");
klink.forEach((ApplyFunc<Tuple<Integer, Iterator<Integer>>>) data -> LOG.info(data.getKey() + " -> " + iterToString(data.getValue())));
LOG.info("test map");
klink.map((MapFunc<Tuple<Integer, Iterator<Integer>>, String>) input -> {
int s = 0;
while (input.getValue().hasNext()) {
s += input.getValue().next();
}
return input.getKey() + " -> " + s;
}).direct().forEach(s -> LOG.info("map: " + s));
LOG.info("test compute");
klink.compute((ComputeFunc<Iterator<Tuple<Integer, Iterator<Integer>>>, String>) input -> {
StringBuilder s = new StringBuilder();
while (input.hasNext()) {
Tuple<Integer, Iterator<Integer>> next = input.next();
s.append(" [").append(next.getKey()).append(" -> ").append(iterToString(next.getValue())).append("] ");
}
return s.toString();
}).direct().forEach(s -> LOG.info("compute: concat " + s));
LOG.info("test computec");
klink.compute((ComputeCollectorFunc<Iterator<Tuple<Integer, Iterator<Integer>>>, String>) (input, output) -> {
while (input.hasNext()) {
Tuple<Integer, Iterator<Integer>> next = input.next();
output.collect(next.getKey() + " -> " + iterToString(next.getValue()));
}
}).direct().forEach(s -> LOG.info("computec: " + s));
// Test byte[] key value pairs for KeyedGather
SourceTSet<String> srcString = dummyStringSource(env, 25, PARALLELISM);
KeyedGatherTLink<byte[], Integer> keyedGatherLink = srcString.mapToTuple(s -> new Tuple<>(s.getBytes(), 1)).keyedGather();
LOG.info("test foreach");
keyedGatherLink.forEach((ApplyFunc<Tuple<byte[], Iterator<Integer>>>) data -> LOG.info(new String(data.getKey()) + " -> " + iterToString(data.getValue())));
}
Aggregations