use of edu.iu.dsc.tws.tset.env.BatchChkPntEnvironment in project twister2 by DSC-SPIDAL.
the class KeyedPipeTLink method persist.
@Override
public KeyedPersistedTSet<K, V> persist() {
// handling checkpointing
if (getTSetEnv().isCheckpointingEnabled()) {
String persistVariableName = this.getId() + "-persisted";
BatchChkPntEnvironment chkEnv = (BatchChkPntEnvironment) getTSetEnv();
Boolean persisted = chkEnv.initVariable(persistVariableName, false);
if (persisted) {
// create a source function with the capability to read from disk
DiskPartitionBackedSource<Tuple<K, V>> sourceFn = new DiskPartitionBackedSource<>(this.getId());
// pass the source fn to the checkpointed tset (that would create a source tset from the
// source function, the same way as a persisted tset. This preserves the order of tsets
// that are being created in the checkpointed env)
KeyedCheckpointedTSet<K, V> checkTSet = new KeyedCheckpointedTSet<>(getTSetEnv(), sourceFn, this.getTargetParallelism(), getSchema());
// adding checkpointed tset to the graph, so that the IDs would not change
addChildToGraph(checkTSet);
// run only the checkpointed tset so that it would populate the inputs in the executor
getTSetEnv().runOne(checkTSet);
return checkTSet;
} else {
KeyedPersistedTSet<K, V> storable = this.doPersist();
chkEnv.updateVariable(persistVariableName, true);
chkEnv.commit();
return storable;
}
}
return doPersist();
}
use of edu.iu.dsc.tws.tset.env.BatchChkPntEnvironment in project twister2 by DSC-SPIDAL.
the class WordCount method execute.
@Override
@SuppressWarnings("RegexpSinglelineJava")
public void execute(WorkerEnvironment workerEnvironment) {
BatchChkPntEnvironment env = TSetEnvironment.initCheckpointing(workerEnvironment);
int sourcePar = 4;
Config config = env.getConfig();
// create a source with fixed number of random words
WordGenerator wordGenerator = new WordGenerator((int) config.get("NO_OF_SAMPLE_WORDS"), (int) config.get("MAX_CHARS"));
SourceTSet<String> source = env.createSource(wordGenerator, sourcePar).setName("source");
// persist raw data
PersistedTSet<String> persisted = source.direct().persist();
LOG.info("worker-" + env.getWorkerID() + " persisted initial raw data");
// if (env.getWorkerID() == 1
// && workerEnvironment.getWorkerController().workerRestartCount() == 0) {
// try {
// Thread.sleep(6000);
// } catch (InterruptedException e) {
// }
// throw new RuntimeException("intentionally killed");
// }
// map the words to a tuple, with <word, 1>, 1 is the count
KeyedTSet<String, Integer> groupedWords = persisted.mapToTuple(w -> new Tuple<>(w, 1));
// reduce using the sum operation
KeyedReduceTLink<String, Integer> keyedReduce = groupedWords.keyedReduce(Integer::sum);
// persist the counts
KeyedPersistedTSet<String, Integer> persistedKeyedReduced = keyedReduce.persist();
LOG.info("worker-" + env.getWorkerID() + " persisted keyedReduced data");
// if (env.getWorkerID() == 2
// && workerEnvironment.getWorkerController().workerRestartCount() == 0
// && !CheckpointingContext.startingFromACheckpoint(config)) {
// try {
// Thread.sleep(10000);
// } catch (InterruptedException e) {
// }
// throw new RuntimeException("intentionally killed");
// }
// write to log for testing
persistedKeyedReduced.keyedDirect().forEach(c -> LOG.info(c.toString()));
}
use of edu.iu.dsc.tws.tset.env.BatchChkPntEnvironment in project twister2 by DSC-SPIDAL.
the class CheckpointingExample method execute.
@Override
public void execute(WorkerEnvironment workerEnvironment) {
BatchChkPntEnvironment env = TSetEnvironment.initCheckpointing(workerEnvironment);
int count = 5;
SourceTSet<Integer> src = dummySource(env, count, 100 * env.getWorkerID());
PersistedTSet<Integer> persist = src.direct().persist();
SourceTSet<Integer> src1 = dummySource(env, count, 100 * env.getWorkerID() + 10);
src1.direct().compute(new BaseComputeFunc<Iterator<Integer>, String>() {
private DataPartitionConsumer<Integer> in;
@Override
public void prepare(TSetContext ctx) {
super.prepare(ctx);
in = (DataPartitionConsumer<Integer>) ctx.getInput("in").getConsumer();
}
@Override
public String compute(Iterator<Integer> input) {
StringBuilder out = new StringBuilder();
while (input.hasNext() && in.hasNext()) {
out.append("(").append(input.next()).append(",").append(in.next()).append(") ");
}
return out.toString();
}
}).addInput("in", persist).direct().forEach(i -> LOG.info(i));
persist.direct().forEach(i -> LOG.info(i.toString()));
}
use of edu.iu.dsc.tws.tset.env.BatchChkPntEnvironment in project twister2 by DSC-SPIDAL.
the class TSetFTExample method execute.
@Override
public void execute(WorkerEnvironment workerEnvironment) {
BatchChkPntEnvironment env = TSetEnvironment.initCheckpointing(workerEnvironment);
LOG.info("Starting worker...");
// testing variable loading
long timeNow = System.currentTimeMillis();
long initTime = env.initVariable("test-time-var", timeNow);
if (initTime == timeNow) {
LOG.info("Variable [not] loaded from snapshot");
} else {
LOG.info("Variable [loaded] from snapshot");
}
env.commit();
SourceTSet<Integer> source = env.createSource(new SourceFunc<Integer>() {
private int count = 0;
@Override
public boolean hasNext() {
return count < 1024 * 1024 * 10;
}
@Override
public Integer next() {
return count++;
}
}, 2);
long t1 = System.currentTimeMillis();
PersistedTSet<Integer> cache = source.direct().persist();
// persist will take under a second, when loading from a checkpoint
LOG.info("Persist took : " + (System.currentTimeMillis() - t1));
cache.direct().forEach(i -> {
// LOG.info("i : " + i);
});
}
Aggregations