Search in sources :

Example 1 with SideInputBroadcast

use of org.apache.beam.runners.spark.util.SideInputBroadcast in project beam by apache.

the class SparkPCollectionView method putPView.

// Driver only - during evaluation stage
void putPView(PCollectionView<?> view, Iterable<WindowedValue<?>> value, Coder<Iterable<WindowedValue<?>>> coder) {
    pviews.put(view, new Tuple2<>(CoderHelpers.toByteArray(value, coder), coder));
    // Currently unsynchronized unpersist, if needed can be changed to blocking
    if (broadcastHelperMap != null) {
        synchronized (SparkPCollectionView.class) {
            SideInputBroadcast helper = broadcastHelperMap.get(view);
            if (helper != null) {
                helper.unpersist();
                broadcastHelperMap.remove(view);
            }
        }
    }
}
Also used : SideInputBroadcast(org.apache.beam.runners.spark.util.SideInputBroadcast)

Example 2 with SideInputBroadcast

use of org.apache.beam.runners.spark.util.SideInputBroadcast in project beam by apache.

the class SparkPCollectionView method createBroadcastHelper.

private SideInputBroadcast createBroadcastHelper(PCollectionView<?> view, JavaSparkContext context) {
    Tuple2<byte[], Coder<Iterable<WindowedValue<?>>>> tuple2 = pviews.get(view);
    SideInputBroadcast helper = SideInputBroadcast.create(tuple2._1, tuple2._2);
    String pCollectionName = view.getPCollection() != null ? view.getPCollection().getName() : "UNKNOWN";
    LOG.debug("Broadcasting [size={}B] view {} from pCollection {}", helper.getBroadcastSizeEstimate(), view, pCollectionName);
    helper.broadcast(context);
    broadcastHelperMap.put(view, helper);
    return helper;
}
Also used : Coder(org.apache.beam.sdk.coders.Coder) WindowedValue(org.apache.beam.sdk.util.WindowedValue) SideInputBroadcast(org.apache.beam.runners.spark.util.SideInputBroadcast)

Example 3 with SideInputBroadcast

use of org.apache.beam.runners.spark.util.SideInputBroadcast in project beam by apache.

the class TranslationUtils method getSideInputs.

/**
   * Create SideInputs as Broadcast variables.
   *
   * @param views The {@link PCollectionView}s.
   * @param context The {@link JavaSparkContext}.
   * @param pviews The {@link SparkPCollectionView}.
   * @return a map of tagged {@link SideInputBroadcast}s and their {@link WindowingStrategy}.
   */
public static Map<TupleTag<?>, KV<WindowingStrategy<?, ?>, SideInputBroadcast<?>>> getSideInputs(List<PCollectionView<?>> views, JavaSparkContext context, SparkPCollectionView pviews) {
    if (views == null) {
        return ImmutableMap.of();
    } else {
        Map<TupleTag<?>, KV<WindowingStrategy<?, ?>, SideInputBroadcast<?>>> sideInputs = Maps.newHashMap();
        for (PCollectionView<?> view : views) {
            SideInputBroadcast helper = pviews.getPCollectionView(view, context);
            WindowingStrategy<?, ?> windowingStrategy = view.getWindowingStrategyInternal();
            sideInputs.put(view.getTagInternal(), KV.<WindowingStrategy<?, ?>, SideInputBroadcast<?>>of(windowingStrategy, helper));
        }
        return sideInputs;
    }
}
Also used : SideInputBroadcast(org.apache.beam.runners.spark.util.SideInputBroadcast) TupleTag(org.apache.beam.sdk.values.TupleTag) KV(org.apache.beam.sdk.values.KV)

Example 4 with SideInputBroadcast

use of org.apache.beam.runners.spark.util.SideInputBroadcast in project beam by apache.

the class TranslationUtils method getSideInputs.

/**
 * Create SideInputs as Broadcast variables.
 *
 * @param views The {@link PCollectionView}s.
 * @param context The {@link JavaSparkContext}.
 * @param pviews The {@link SparkPCollectionView}.
 * @return a map of tagged {@link SideInputBroadcast}s and their {@link WindowingStrategy}.
 */
public static Map<TupleTag<?>, KV<WindowingStrategy<?, ?>, SideInputBroadcast<?>>> getSideInputs(Iterable<PCollectionView<?>> views, JavaSparkContext context, SparkPCollectionView pviews) {
    if (views == null) {
        return ImmutableMap.of();
    } else {
        Map<TupleTag<?>, KV<WindowingStrategy<?, ?>, SideInputBroadcast<?>>> sideInputs = Maps.newHashMap();
        for (PCollectionView<?> view : views) {
            SideInputBroadcast helper = pviews.getPCollectionView(view, context);
            WindowingStrategy<?, ?> windowingStrategy = view.getWindowingStrategyInternal();
            sideInputs.put(view.getTagInternal(), KV.of(windowingStrategy, helper));
        }
        return sideInputs;
    }
}
Also used : SideInputBroadcast(org.apache.beam.runners.spark.util.SideInputBroadcast) TupleTag(org.apache.beam.sdk.values.TupleTag) KV(org.apache.beam.sdk.values.KV)

Aggregations

SideInputBroadcast (org.apache.beam.runners.spark.util.SideInputBroadcast)4 KV (org.apache.beam.sdk.values.KV)2 TupleTag (org.apache.beam.sdk.values.TupleTag)2 Coder (org.apache.beam.sdk.coders.Coder)1 WindowedValue (org.apache.beam.sdk.util.WindowedValue)1