use of edu.iu.dsc.tws.api.tset.fn.ComputeFunc in project twister2 by DSC-SPIDAL.
the class ArrowTSetSourceExample method execute.
@Override
public void execute(WorkerEnvironment workerEnv) {
BatchEnvironment env = TSetEnvironment.initBatch(workerEnv);
Config config = env.getConfig();
String csvInputDirectory = config.getStringValue(DataObjectConstants.DINPUT_DIRECTORY);
String arrowInputDirectory = config.getStringValue(DataObjectConstants.ARROW_DIRECTORY);
String arrowFileName = config.getStringValue(DataObjectConstants.FILE_NAME);
int workers = config.getIntegerValue(DataObjectConstants.WORKERS);
int parallel = config.getIntegerValue(DataObjectConstants.PARALLELISM_VALUE);
int dsize = config.getIntegerValue(DataObjectConstants.DSIZE);
LOG.info("arrow input file:" + arrowFileName + "\t" + arrowInputDirectory + "\t" + csvInputDirectory + "\t" + workers + "\t" + parallel);
Schema schema = makeSchema();
SourceTSet<String[]> csvSource = env.createCSVSource(csvInputDirectory, dsize, parallel, "split");
SinkTSet<Iterator<Integer>> sinkTSet = csvSource.direct().map((MapFunc<String[], Integer>) input -> Integer.parseInt(input[0])).direct().sink(new ArrowBasedSinkFunction<>(arrowInputDirectory, arrowFileName, schema.toJson()));
env.run(sinkTSet);
// Source Function Call
env.createArrowSource(arrowInputDirectory, arrowFileName, parallel, schema.toJson()).direct().compute((ComputeFunc<Iterator<Object>, List<Integer>>) input -> {
List<Integer> integers = new ArrayList<>();
input.forEachRemaining(i -> integers.add((Integer) i));
return integers;
}).direct().forEach(s -> LOG.info("Integer Array Size:" + s.size() + "\tvalues:" + s));
}
use of edu.iu.dsc.tws.api.tset.fn.ComputeFunc in project twister2 by DSC-SPIDAL.
the class ReduceExample method execute.
@Override
public void execute(WorkerEnvironment workerEnv) {
BatchEnvironment env = TSetEnvironment.initBatch(workerEnv);
int start = env.getWorkerID() * 100;
SourceTSet<Integer> src = dummySource(env, start, COUNT, PARALLELISM);
ReduceTLink<Integer> reduce = src.reduce(Integer::sum);
LOG.info("test foreach");
reduce.forEach(i -> LOG.info("foreach: " + i));
LOG.info("test map");
reduce.map(i -> i.toString() + "$$").withSchema(PrimitiveSchemas.STRING).direct().forEach(s -> LOG.info("map: " + s));
LOG.info("test flat map");
reduce.flatmap((i, c) -> c.collect(i.toString() + "##")).withSchema(PrimitiveSchemas.STRING).direct().forEach(s -> LOG.info("flat:" + s));
LOG.info("test compute");
reduce.compute((ComputeFunc<Integer, String>) input -> "sum=" + input).withSchema(PrimitiveSchemas.STRING).direct().forEach(s -> LOG.info("compute: " + s));
LOG.info("test computec");
reduce.compute((ComputeCollectorFunc<Integer, String>) (input, output) -> output.collect("sum=" + input)).withSchema(PrimitiveSchemas.STRING).direct().forEach(s -> LOG.info("computec: " + s));
LOG.info("test map2tup");
reduce.mapToTuple(i -> new Tuple<>(i, i.toString())).keyedDirect().forEach(i -> LOG.info("mapToTuple: " + i.toString()));
LOG.info("test sink");
SinkTSet<Integer> sink = reduce.sink((SinkFunc<Integer>) value -> {
LOG.info("val =" + value);
return true;
});
env.run(sink);
}
use of edu.iu.dsc.tws.api.tset.fn.ComputeFunc in project twister2 by DSC-SPIDAL.
the class KGatherUngroupedExample method execute.
@Override
public void execute(WorkerEnvironment workerEnv) {
BatchEnvironment env = TSetEnvironment.initBatch(workerEnv);
SourceTSet<Integer> src = dummySource(env, COUNT, PARALLELISM);
KeyedGatherUngroupedTLink<Integer, Integer> klink = src.mapToTuple(i -> new Tuple<>(i % 4, i)).keyedGatherUngrouped();
LOG.info("test foreach");
klink.forEach((ApplyFunc<Tuple<Integer, Integer>>) data -> LOG.info(data.getKey() + " -> " + data.getValue()));
LOG.info("test map");
klink.map((MapFunc<Tuple<Integer, Integer>, String>) input -> input.getKey() + " -> " + input.getValue()).direct().forEach(s -> LOG.info("map: " + s));
LOG.info("test compute");
klink.compute((ComputeFunc<Iterator<Tuple<Integer, Integer>>, String>) input -> {
StringBuilder sb = new StringBuilder();
while (input.hasNext()) {
Tuple<Integer, Integer> next = input.next();
sb.append("[").append(next.getKey()).append("->").append(next.getValue()).append("]");
}
return sb.toString();
}).direct().forEach(s -> LOG.info("compute: " + s));
LOG.info("test computec");
klink.compute((ComputeCollectorFunc<Iterator<Tuple<Integer, Integer>>, String>) (input, output) -> {
while (input.hasNext()) {
Tuple<Integer, Integer> next = input.next();
output.collect(next.getKey() + " -> " + next.getValue() * 2);
}
}).direct().forEach(s -> LOG.info("computec: " + s));
}
use of edu.iu.dsc.tws.api.tset.fn.ComputeFunc in project twister2 by DSC-SPIDAL.
the class KReduceExample method execute.
@Override
public void execute(WorkerEnvironment workerEnv) {
BatchEnvironment env = TSetEnvironment.initBatch(workerEnv);
int start = env.getWorkerID() * 100;
SourceTSet<Integer> src = dummySource(env, start, COUNT, PARALLELISM);
KeyedReduceTLink<Integer, Integer> kreduce = src.mapToTuple(i -> new Tuple<>(i % 10, i)).keyedReduce(Integer::sum);
LOG.info("test foreach");
kreduce.forEach(t -> LOG.info("sum by key=" + t.getKey() + ", " + t.getValue()));
LOG.info("test map");
kreduce.map(i -> i.toString() + "$$").direct().forEach(s -> LOG.info("map: " + s));
LOG.info("test compute");
kreduce.compute((ComputeFunc<Iterator<Tuple<Integer, Integer>>, String>) input -> {
StringBuilder s = new StringBuilder();
while (input.hasNext()) {
s.append(input.next().toString()).append(" ");
}
return s.toString();
}).direct().forEach(s -> LOG.info("compute: concat " + s));
LOG.info("test computec");
kreduce.compute((ComputeCollectorFunc<Iterator<Tuple<Integer, Integer>>, String>) (input, output) -> {
while (input.hasNext()) {
output.collect(input.next().toString());
}
}).direct().forEach(s -> LOG.info("computec: " + s));
}
use of edu.iu.dsc.tws.api.tset.fn.ComputeFunc in project twister2 by DSC-SPIDAL.
the class PartitionExample method execute.
@Override
public void execute(WorkerEnvironment workerEnv) {
BatchEnvironment env = TSetEnvironment.initBatch(workerEnv);
int start = env.getWorkerID() * 100;
SourceTSet<Integer> src = dummySource(env, start, COUNT, PARALLELISM);
LOG.info("test foreach");
src.partition(new LoadBalancePartitioner<>()).forEach(i -> LOG.info("foreach: " + i));
LOG.info("test map");
src.partition(new LoadBalancePartitioner<>()).map(i -> i.toString() + "$$").direct().forEach(s -> LOG.info("map: " + s));
LOG.info("test flat map");
src.partition(new LoadBalancePartitioner<>()).flatmap((i, c) -> c.collect(i.toString() + "##")).direct().forEach(s -> LOG.info("flat:" + s));
LOG.info("test compute");
src.partition(new LoadBalancePartitioner<>()).compute((ComputeFunc<Iterator<Integer>, Integer>) input -> {
int sum = 0;
while (input.hasNext()) {
sum += input.next();
}
return sum;
}).direct().forEach(i -> LOG.info("comp: " + i));
LOG.info("test computec");
src.partition(new LoadBalancePartitioner<>()).compute((ComputeCollectorFunc<Iterator<Integer>, String>) (input, output) -> {
int sum = 0;
while (input.hasNext()) {
sum += input.next();
}
output.collect("sum" + sum);
}).direct().forEach(s -> LOG.info("computec: " + s));
}
Aggregations