use of scala.Tuple3 in project beam by apache.
the class SparkGroupAlsoByWindowViaWindowSet method groupAlsoByWindow.
public static <K, InputT, W extends BoundedWindow> JavaDStream<WindowedValue<KV<K, Iterable<InputT>>>> groupAlsoByWindow(JavaDStream<WindowedValue<KV<K, Iterable<WindowedValue<InputT>>>>> inputDStream, final Coder<K> keyCoder, final Coder<WindowedValue<InputT>> wvCoder, final WindowingStrategy<?, W> windowingStrategy, final SparkRuntimeContext runtimeContext, final List<Integer> sourceIds) {
final IterableCoder<WindowedValue<InputT>> itrWvCoder = IterableCoder.of(wvCoder);
final Coder<InputT> iCoder = ((FullWindowedValueCoder<InputT>) wvCoder).getValueCoder();
final Coder<? extends BoundedWindow> wCoder = ((FullWindowedValueCoder<InputT>) wvCoder).getWindowCoder();
final Coder<WindowedValue<KV<K, Iterable<InputT>>>> wvKvIterCoder = FullWindowedValueCoder.of(KvCoder.of(keyCoder, IterableCoder.of(iCoder)), wCoder);
final TimerInternals.TimerDataCoder timerDataCoder = TimerInternals.TimerDataCoder.of(windowingStrategy.getWindowFn().windowCoder());
long checkpointDurationMillis = runtimeContext.getPipelineOptions().as(SparkPipelineOptions.class).getCheckpointDurationMillis();
// we have to switch to Scala API to avoid Optional in the Java API, see: SPARK-4819.
// we also have a broader API for Scala (access to the actual key and entire iterator).
// we use coders to convert objects in the PCollection to byte arrays, so they
// can be transferred over the network for the shuffle and be in serialized form
// for checkpointing.
// for readability, we add comments with actual type next to byte[].
// to shorten line length, we use:
//---- WV: WindowedValue
//---- Iterable: Itr
//---- AccumT: A
//---- InputT: I
DStream<Tuple2<ByteArray, byte[]>> /*Itr<WV<I>>*/
pairDStream = inputDStream.transformToPair(new Function<JavaRDD<WindowedValue<KV<K, Iterable<WindowedValue<InputT>>>>>, JavaPairRDD<ByteArray, byte[]>>() {
// we use mapPartitions with the RDD API because its the only available API
// that allows to preserve partitioning.
@Override
public JavaPairRDD<ByteArray, byte[]> call(JavaRDD<WindowedValue<KV<K, Iterable<WindowedValue<InputT>>>>> rdd) throws Exception {
return rdd.mapPartitions(TranslationUtils.functionToFlatMapFunction(WindowingHelpers.<KV<K, Iterable<WindowedValue<InputT>>>>unwindowFunction()), true).mapPartitionsToPair(TranslationUtils.<K, Iterable<WindowedValue<InputT>>>toPairFlatMapFunction(), true).mapPartitionsToPair(TranslationUtils.pairFunctionToPairFlatMapFunction(CoderHelpers.toByteFunction(keyCoder, itrWvCoder)), true);
}
}).dstream();
PairDStreamFunctions<ByteArray, byte[]> pairDStreamFunctions = DStream.toPairDStreamFunctions(pairDStream, JavaSparkContext$.MODULE$.<ByteArray>fakeClassTag(), JavaSparkContext$.MODULE$.<byte[]>fakeClassTag(), null);
int defaultNumPartitions = pairDStreamFunctions.defaultPartitioner$default$1();
Partitioner partitioner = pairDStreamFunctions.defaultPartitioner(defaultNumPartitions);
// use updateStateByKey to scan through the state and update elements and timers.
DStream<Tuple2<ByteArray, Tuple2<StateAndTimers, List<byte[]>>>> /*WV<KV<K, Itr<I>>>*/
firedStream = pairDStreamFunctions.updateStateByKey(new SerializableFunction1<scala.collection.Iterator<Tuple3</*K*/
ByteArray, Seq<byte[]>, Option<Tuple2<StateAndTimers, /*WV<KV<K, Itr<I>>>*/
List<byte[]>>>>>, scala.collection.Iterator<Tuple2</*K*/
ByteArray, Tuple2<StateAndTimers, /*WV<KV<K, Itr<I>>>*/
List<byte[]>>>>>() {
@Override
public scala.collection.Iterator<Tuple2</*K*/
ByteArray, Tuple2<StateAndTimers, /*WV<KV<K, Itr<I>>>*/
List<byte[]>>>> apply(final scala.collection.Iterator<Tuple3</*K*/
ByteArray, Seq<byte[]>, Option<Tuple2<StateAndTimers, /*WV<KV<K, Itr<I>>>*/
List<byte[]>>>>> iter) {
//--- ACTUAL STATEFUL OPERATION:
//
// Input Iterator: the partition (~bundle) of a cogrouping of the input
// and the previous state (if exists).
//
// Output Iterator: the output key, and the updated state.
//
// possible input scenarios for (K, Seq, Option<S>):
// (1) Option<S>.isEmpty: new data with no previous state.
// (2) Seq.isEmpty: no new data, but evaluating previous state (timer-like behaviour).
// (3) Seq.nonEmpty && Option<S>.isDefined: new data with previous state.
final SystemReduceFn<K, InputT, Iterable<InputT>, Iterable<InputT>, W> reduceFn = SystemReduceFn.buffering(((FullWindowedValueCoder<InputT>) wvCoder).getValueCoder());
final OutputWindowedValueHolder<K, InputT> outputHolder = new OutputWindowedValueHolder<>();
// use in memory Aggregators since Spark Accumulators are not resilient
// in stateful operators, once done with this partition.
final MetricsContainerImpl cellProvider = new MetricsContainerImpl("cellProvider");
final CounterCell droppedDueToClosedWindow = cellProvider.getCounter(MetricName.named(SparkGroupAlsoByWindowViaWindowSet.class, GroupAlsoByWindowsAggregators.DROPPED_DUE_TO_CLOSED_WINDOW_COUNTER));
final CounterCell droppedDueToLateness = cellProvider.getCounter(MetricName.named(SparkGroupAlsoByWindowViaWindowSet.class, GroupAlsoByWindowsAggregators.DROPPED_DUE_TO_LATENESS_COUNTER));
AbstractIterator<Tuple2<ByteArray, Tuple2<StateAndTimers, List<byte[]>>>> /*WV<KV<K, Itr<I>>>*/
outIter = new AbstractIterator<Tuple2</*K*/
ByteArray, Tuple2<StateAndTimers, /*WV<KV<K, Itr<I>>>*/
List<byte[]>>>>() {
@Override
protected Tuple2</*K*/
ByteArray, Tuple2<StateAndTimers, /*WV<KV<K, Itr<I>>>*/
List<byte[]>>> computeNext() {
// (possibly) previous-state and (possibly) new data.
while (iter.hasNext()) {
// for each element in the partition:
Tuple3<ByteArray, Seq<byte[]>, Option<Tuple2<StateAndTimers, List<byte[]>>>> next = iter.next();
ByteArray encodedKey = next._1();
K key = CoderHelpers.fromByteArray(encodedKey.getValue(), keyCoder);
Seq<byte[]> seq = next._2();
Option<Tuple2<StateAndTimers, List<byte[]>>> prevStateAndTimersOpt = next._3();
SparkStateInternals<K> stateInternals;
SparkTimerInternals timerInternals = SparkTimerInternals.forStreamFromSources(sourceIds, GlobalWatermarkHolder.get());
// get state(internals) per key.
if (prevStateAndTimersOpt.isEmpty()) {
// no previous state.
stateInternals = SparkStateInternals.forKey(key);
} else {
// with pre-existing state.
StateAndTimers prevStateAndTimers = prevStateAndTimersOpt.get()._1();
stateInternals = SparkStateInternals.forKeyAndState(key, prevStateAndTimers.getState());
Collection<byte[]> serTimers = prevStateAndTimers.getTimers();
timerInternals.addTimers(SparkTimerInternals.deserializeTimers(serTimers, timerDataCoder));
}
ReduceFnRunner<K, InputT, Iterable<InputT>, W> reduceFnRunner = new ReduceFnRunner<>(key, windowingStrategy, ExecutableTriggerStateMachine.create(TriggerStateMachines.stateMachineForTrigger(TriggerTranslation.toProto(windowingStrategy.getTrigger()))), stateInternals, timerInternals, outputHolder, new UnsupportedSideInputReader("GroupAlsoByWindow"), reduceFn, runtimeContext.getPipelineOptions());
// clear before potential use.
outputHolder.clear();
if (!seq.isEmpty()) {
// new input for key.
try {
Iterable<WindowedValue<InputT>> elementsIterable = CoderHelpers.fromByteArray(seq.head(), itrWvCoder);
Iterable<WindowedValue<InputT>> validElements = LateDataUtils.dropExpiredWindows(key, elementsIterable, timerInternals, windowingStrategy, droppedDueToLateness);
reduceFnRunner.processElements(validElements);
} catch (Exception e) {
throw new RuntimeException("Failed to process element with ReduceFnRunner", e);
}
} else if (stateInternals.getState().isEmpty()) {
// no input and no state -> GC evict now.
continue;
}
try {
// advance the watermark to HWM to fire by timers.
timerInternals.advanceWatermark();
// call on timers that are ready.
reduceFnRunner.onTimers(timerInternals.getTimersReadyToProcess());
} catch (Exception e) {
throw new RuntimeException("Failed to process ReduceFnRunner onTimer.", e);
}
// this is mostly symbolic since actual persist is done by emitting output.
reduceFnRunner.persist();
// obtain output, if fired.
List<WindowedValue<KV<K, Iterable<InputT>>>> outputs = outputHolder.get();
if (!outputs.isEmpty() || !stateInternals.getState().isEmpty()) {
StateAndTimers updated = new StateAndTimers(stateInternals.getState(), SparkTimerInternals.serializeTimers(timerInternals.getTimers(), timerDataCoder));
// persist Spark's state by outputting.
List<byte[]> serOutput = CoderHelpers.toByteArrays(outputs, wvKvIterCoder);
return new Tuple2<>(encodedKey, new Tuple2<>(updated, serOutput));
}
// an empty state with no output, can be evicted completely - do nothing.
}
return endOfData();
}
};
// log if there's something to log.
long lateDropped = droppedDueToLateness.getCumulative();
if (lateDropped > 0) {
LOG.info(String.format("Dropped %d elements due to lateness.", lateDropped));
droppedDueToLateness.inc(-droppedDueToLateness.getCumulative());
}
long closedWindowDropped = droppedDueToClosedWindow.getCumulative();
if (closedWindowDropped > 0) {
LOG.info(String.format("Dropped %d elements due to closed window.", closedWindowDropped));
droppedDueToClosedWindow.inc(-droppedDueToClosedWindow.getCumulative());
}
return scala.collection.JavaConversions.asScalaIterator(outIter);
}
}, partitioner, true, JavaSparkContext$.MODULE$.<Tuple2<StateAndTimers, List<byte[]>>>fakeClassTag());
if (checkpointDurationMillis > 0) {
firedStream.checkpoint(new Duration(checkpointDurationMillis));
}
// go back to Java now.
JavaPairDStream<ByteArray, Tuple2<StateAndTimers, List<byte[]>>> /*WV<KV<K, Itr<I>>>*/
javaFiredStream = JavaPairDStream.fromPairDStream(firedStream, JavaSparkContext$.MODULE$.<ByteArray>fakeClassTag(), JavaSparkContext$.MODULE$.<Tuple2<StateAndTimers, List<byte[]>>>fakeClassTag());
// filter state-only output (nothing to fire) and remove the state from the output.
return javaFiredStream.filter(new Function<Tuple2</*K*/
ByteArray, Tuple2<StateAndTimers, /*WV<KV<K, Itr<I>>>*/
List<byte[]>>>, Boolean>() {
@Override
public Boolean call(Tuple2</*K*/
ByteArray, Tuple2<StateAndTimers, /*WV<KV<K, Itr<I>>>*/
List<byte[]>>> t2) throws Exception {
// filter output if defined.
return !t2._2()._2().isEmpty();
}
}).flatMap(new FlatMapFunction<Tuple2</*K*/
ByteArray, Tuple2<StateAndTimers, /*WV<KV<K, Itr<I>>>*/
List<byte[]>>>, WindowedValue<KV<K, Iterable<InputT>>>>() {
@Override
public Iterable<WindowedValue<KV<K, Iterable<InputT>>>> call(Tuple2</*K*/
ByteArray, Tuple2<StateAndTimers, /*WV<KV<K, Itr<I>>>*/
List<byte[]>>> t2) throws Exception {
// return in serialized form.
return CoderHelpers.fromByteArrays(t2._2()._2(), wvKvIterCoder);
}
});
}
use of scala.Tuple3 in project incubator-systemml by apache.
the class MLContextTest method testInputTupleSeqWithMetadataDML.
@SuppressWarnings({ "rawtypes", "unchecked" })
@Test
public void testInputTupleSeqWithMetadataDML() {
System.out.println("MLContextTest - Tuple sequence with metadata DML");
List<String> list1 = new ArrayList<String>();
list1.add("1,2");
list1.add("3,4");
JavaRDD<String> javaRDD1 = sc.parallelize(list1);
RDD<String> rdd1 = JavaRDD.toRDD(javaRDD1);
List<String> list2 = new ArrayList<String>();
list2.add("5,6");
list2.add("7,8");
JavaRDD<String> javaRDD2 = sc.parallelize(list2);
RDD<String> rdd2 = JavaRDD.toRDD(javaRDD2);
MatrixMetadata mm1 = new MatrixMetadata(2, 2);
MatrixMetadata mm2 = new MatrixMetadata(2, 2);
Tuple3 tuple1 = new Tuple3("m1", rdd1, mm1);
Tuple3 tuple2 = new Tuple3("m2", rdd2, mm2);
List tupleList = new ArrayList();
tupleList.add(tuple1);
tupleList.add(tuple2);
Seq seq = JavaConversions.asScalaBuffer(tupleList).toSeq();
Script script = dml("print('sums: ' + sum(m1) + ' ' + sum(m2));").in(seq);
setExpectedStdOut("sums: 10.0 26.0");
ml.execute(script);
}
use of scala.Tuple3 in project japid42 by branaway.
the class GlobalSettingsWithJapid method getPlayRoutes.
protected List<Tuple3<String, String, String>> getPlayRoutes() {
play.api.Application realApp = _app.getWrappedApplication();
Option<Routes> routes = realApp.routes();
if (routes.isDefined()) {
Routes r = routes.get();
Seq<Tuple3<String, String, String>> docs = r.documentation();
return scala.collection.JavaConversions.seqAsJavaList(docs);
}
return null;
}
use of scala.Tuple3 in project deeplearning4j by deeplearning4j.
the class StatsUtils method getTrainingStatsTimelineChart.
private static Component[] getTrainingStatsTimelineChart(SparkTrainingStats stats, Set<String> includeSet, long maxDurationMs) {
Set<Tuple3<String, String, Long>> uniqueTuples = new HashSet<>();
Set<String> machineIDs = new HashSet<>();
Set<String> jvmIDs = new HashSet<>();
Map<String, String> machineShortNames = new HashMap<>();
Map<String, String> jvmShortNames = new HashMap<>();
long earliestStart = Long.MAX_VALUE;
long latestEnd = Long.MIN_VALUE;
for (String s : includeSet) {
List<EventStats> list = stats.getValue(s);
for (EventStats e : list) {
machineIDs.add(e.getMachineID());
jvmIDs.add(e.getJvmID());
uniqueTuples.add(new Tuple3<String, String, Long>(e.getMachineID(), e.getJvmID(), e.getThreadID()));
earliestStart = Math.min(earliestStart, e.getStartTime());
latestEnd = Math.max(latestEnd, e.getStartTime() + e.getDurationMs());
}
}
int count = 0;
for (String s : machineIDs) {
machineShortNames.put(s, "PC " + count++);
}
count = 0;
for (String s : jvmIDs) {
jvmShortNames.put(s, "JVM " + count++);
}
int nLanes = uniqueTuples.size();
List<Tuple3<String, String, Long>> outputOrder = new ArrayList<>(uniqueTuples);
Collections.sort(outputOrder, new TupleComparator());
Color[] colors = getColors(includeSet.size());
Map<String, Color> colorMap = new HashMap<>();
count = 0;
for (String s : includeSet) {
colorMap.put(s, colors[count++]);
}
//Create key for charts:
List<Component> tempList = new ArrayList<>();
for (String s : includeSet) {
String key = stats.getShortNameForKey(s) + " - " + s;
tempList.add(new ComponentDiv(new StyleDiv.Builder().backgroundColor(colorMap.get(s)).width(33.3, LengthUnit.Percent).height(25, LengthUnit.Px).floatValue(StyleDiv.FloatValue.left).build(), new ComponentText(key, new StyleText.Builder().fontSize(11).build())));
}
Component key = new ComponentDiv(new StyleDiv.Builder().width(100, LengthUnit.Percent).build(), tempList);
//How many charts?
int nCharts = (int) ((latestEnd - earliestStart) / maxDurationMs);
if (nCharts < 1)
nCharts = 1;
long[] chartStartTimes = new long[nCharts];
long[] chartEndTimes = new long[nCharts];
for (int i = 0; i < nCharts; i++) {
chartStartTimes[i] = earliestStart + i * maxDurationMs;
chartEndTimes[i] = earliestStart + (i + 1) * maxDurationMs;
}
List<List<List<ChartTimeline.TimelineEntry>>> entriesByLane = new ArrayList<>();
for (int c = 0; c < nCharts; c++) {
entriesByLane.add(new ArrayList<List<ChartTimeline.TimelineEntry>>());
for (int i = 0; i < nLanes; i++) {
entriesByLane.get(c).add(new ArrayList<ChartTimeline.TimelineEntry>());
}
}
for (String s : includeSet) {
List<EventStats> list = stats.getValue(s);
for (EventStats e : list) {
if (e.getDurationMs() == 0)
continue;
long start = e.getStartTime();
long end = start + e.getDurationMs();
int chartIdx = -1;
for (int j = 0; j < nCharts; j++) {
if (start >= chartStartTimes[j] && start < chartEndTimes[j]) {
chartIdx = j;
}
}
if (chartIdx == -1)
chartIdx = nCharts - 1;
Tuple3<String, String, Long> tuple = new Tuple3<>(e.getMachineID(), e.getJvmID(), e.getThreadID());
int idx = outputOrder.indexOf(tuple);
Color c = colorMap.get(s);
// ChartTimeline.TimelineEntry entry = new ChartTimeline.TimelineEntry(null, start, end, c);
ChartTimeline.TimelineEntry entry = new ChartTimeline.TimelineEntry(stats.getShortNameForKey(s), start, end, c);
entriesByLane.get(chartIdx).get(idx).add(entry);
}
}
//Sort each lane by start time:
for (int i = 0; i < nCharts; i++) {
for (List<ChartTimeline.TimelineEntry> l : entriesByLane.get(i)) {
Collections.sort(l, new Comparator<ChartTimeline.TimelineEntry>() {
@Override
public int compare(ChartTimeline.TimelineEntry o1, ChartTimeline.TimelineEntry o2) {
return Long.compare(o1.getStartTimeMs(), o2.getStartTimeMs());
}
});
}
}
StyleChart sc = new StyleChart.Builder().width(1280, LengthUnit.Px).height(35 * nLanes + (60 + 20 + 25), LengthUnit.Px).margin(LengthUnit.Px, 60, 20, 200, //top, bottom, left, right
10).build();
List<Component> list = new ArrayList<>(nCharts);
for (int j = 0; j < nCharts; j++) {
ChartTimeline.Builder b = new ChartTimeline.Builder("Timeline: Training Activities", sc);
int i = 0;
for (List<ChartTimeline.TimelineEntry> l : entriesByLane.get(j)) {
Tuple3<String, String, Long> t3 = outputOrder.get(i);
String name = machineShortNames.get(t3._1()) + ", " + jvmShortNames.get(t3._2()) + ", Thread " + t3._3();
b.addLane(name, l);
i++;
}
list.add(b.build());
}
list.add(key);
return list.toArray(new Component[list.size()]);
}
use of scala.Tuple3 in project geode by apache.
the class JavaAPITest method createCommonMocks.
@SuppressWarnings("unchecked")
public Tuple3<SparkContext, GeodeConnectionConf, GeodeConnection> createCommonMocks() {
SparkContext mockSparkContext = mock(SparkContext.class);
GeodeConnectionConf mockConnConf = mock(GeodeConnectionConf.class);
GeodeConnection mockConnection = mock(GeodeConnection.class);
when(mockConnConf.getConnection()).thenReturn(mockConnection);
when(mockConnConf.locators()).thenReturn(new LinkedList());
return new Tuple3<>(mockSparkContext, mockConnConf, mockConnection);
}
Aggregations