use of org.apache.beam.runners.spark.util.SideInputBroadcast in project beam by apache.
the class SparkPCollectionView method putPView.
// Driver only - during evaluation stage
void putPView(PCollectionView<?> view, Iterable<WindowedValue<?>> value, Coder<Iterable<WindowedValue<?>>> coder) {
pviews.put(view, new Tuple2<>(CoderHelpers.toByteArray(value, coder), coder));
// Currently unsynchronized unpersist, if needed can be changed to blocking
if (broadcastHelperMap != null) {
synchronized (SparkPCollectionView.class) {
SideInputBroadcast helper = broadcastHelperMap.get(view);
if (helper != null) {
helper.unpersist();
broadcastHelperMap.remove(view);
}
}
}
}
use of org.apache.beam.runners.spark.util.SideInputBroadcast in project beam by apache.
the class SparkPCollectionView method createBroadcastHelper.
private SideInputBroadcast createBroadcastHelper(PCollectionView<?> view, JavaSparkContext context) {
Tuple2<byte[], Coder<Iterable<WindowedValue<?>>>> tuple2 = pviews.get(view);
SideInputBroadcast helper = SideInputBroadcast.create(tuple2._1, tuple2._2);
String pCollectionName = view.getPCollection() != null ? view.getPCollection().getName() : "UNKNOWN";
LOG.debug("Broadcasting [size={}B] view {} from pCollection {}", helper.getBroadcastSizeEstimate(), view, pCollectionName);
helper.broadcast(context);
broadcastHelperMap.put(view, helper);
return helper;
}
use of org.apache.beam.runners.spark.util.SideInputBroadcast in project beam by apache.
the class TranslationUtils method getSideInputs.
/**
* Create SideInputs as Broadcast variables.
*
* @param views The {@link PCollectionView}s.
* @param context The {@link JavaSparkContext}.
* @param pviews The {@link SparkPCollectionView}.
* @return a map of tagged {@link SideInputBroadcast}s and their {@link WindowingStrategy}.
*/
public static Map<TupleTag<?>, KV<WindowingStrategy<?, ?>, SideInputBroadcast<?>>> getSideInputs(List<PCollectionView<?>> views, JavaSparkContext context, SparkPCollectionView pviews) {
if (views == null) {
return ImmutableMap.of();
} else {
Map<TupleTag<?>, KV<WindowingStrategy<?, ?>, SideInputBroadcast<?>>> sideInputs = Maps.newHashMap();
for (PCollectionView<?> view : views) {
SideInputBroadcast helper = pviews.getPCollectionView(view, context);
WindowingStrategy<?, ?> windowingStrategy = view.getWindowingStrategyInternal();
sideInputs.put(view.getTagInternal(), KV.<WindowingStrategy<?, ?>, SideInputBroadcast<?>>of(windowingStrategy, helper));
}
return sideInputs;
}
}
use of org.apache.beam.runners.spark.util.SideInputBroadcast in project beam by apache.
the class TranslationUtils method getSideInputs.
/**
* Create SideInputs as Broadcast variables.
*
* @param views The {@link PCollectionView}s.
* @param context The {@link JavaSparkContext}.
* @param pviews The {@link SparkPCollectionView}.
* @return a map of tagged {@link SideInputBroadcast}s and their {@link WindowingStrategy}.
*/
public static Map<TupleTag<?>, KV<WindowingStrategy<?, ?>, SideInputBroadcast<?>>> getSideInputs(Iterable<PCollectionView<?>> views, JavaSparkContext context, SparkPCollectionView pviews) {
if (views == null) {
return ImmutableMap.of();
} else {
Map<TupleTag<?>, KV<WindowingStrategy<?, ?>, SideInputBroadcast<?>>> sideInputs = Maps.newHashMap();
for (PCollectionView<?> view : views) {
SideInputBroadcast helper = pviews.getPCollectionView(view, context);
WindowingStrategy<?, ?> windowingStrategy = view.getWindowingStrategyInternal();
sideInputs.put(view.getTagInternal(), KV.of(windowingStrategy, helper));
}
return sideInputs;
}
}
Aggregations