Search in sources :

Example 36 with SerializablePipelineOptions

use of org.apache.beam.runners.core.construction.SerializablePipelineOptions in project beam by apache.

the class BufferingDoFnRunnerTest method createBufferingDoFnRunner.

private static BufferingDoFnRunner createBufferingDoFnRunner(int concurrentCheckpoints, List<BufferingDoFnRunner.CheckpointIdentifier> notYetAcknowledgeCheckpoints) throws Exception {
    DoFnRunner doFnRunner = Mockito.mock(DoFnRunner.class);
    OperatorStateBackend operatorStateBackend = Mockito.mock(OperatorStateBackend.class);
    // Setup not yet acknowledged checkpoint union list state
    ListState unionListState = Mockito.mock(ListState.class);
    Mockito.when(operatorStateBackend.getUnionListState(Mockito.any())).thenReturn(unionListState);
    Mockito.when(unionListState.get()).thenReturn(notYetAcknowledgeCheckpoints);
    // Setup buffer list state
    Mockito.when(operatorStateBackend.getListState(Mockito.any())).thenReturn(Mockito.mock(ListState.class));
    return BufferingDoFnRunner.create(doFnRunner, "stable-input", StringUtf8Coder.of(), WindowedValue.getFullCoder(VarIntCoder.of(), GlobalWindow.Coder.INSTANCE), operatorStateBackend, null, concurrentCheckpoints, new SerializablePipelineOptions(FlinkPipelineOptions.defaults()));
}
Also used : ListState(org.apache.flink.api.common.state.ListState) OperatorStateBackend(org.apache.flink.runtime.state.OperatorStateBackend) SerializablePipelineOptions(org.apache.beam.runners.core.construction.SerializablePipelineOptions) DoFnRunner(org.apache.beam.runners.core.DoFnRunner)

Example 37 with SerializablePipelineOptions

use of org.apache.beam.runners.core.construction.SerializablePipelineOptions in project beam by apache.

the class StreamingTransformTranslator method combineGrouped.

private static <K, InputT, OutputT> TransformEvaluator<Combine.GroupedValues<K, InputT, OutputT>> combineGrouped() {
    return new TransformEvaluator<Combine.GroupedValues<K, InputT, OutputT>>() {

        @Override
        public void evaluate(final Combine.GroupedValues<K, InputT, OutputT> transform, EvaluationContext context) {
            // get the applied combine function.
            PCollection<? extends KV<K, ? extends Iterable<InputT>>> input = context.getInput(transform);
            final WindowingStrategy<?, ?> windowingStrategy = input.getWindowingStrategy();
            @SuppressWarnings("unchecked") final CombineWithContext.CombineFnWithContext<InputT, ?, OutputT> fn = (CombineWithContext.CombineFnWithContext<InputT, ?, OutputT>) CombineFnUtil.toFnWithContext(transform.getFn());
            @SuppressWarnings("unchecked") UnboundedDataset<KV<K, Iterable<InputT>>> unboundedDataset = (UnboundedDataset<KV<K, Iterable<InputT>>>) context.borrowDataset(transform);
            JavaDStream<WindowedValue<KV<K, Iterable<InputT>>>> dStream = unboundedDataset.getDStream();
            final SerializablePipelineOptions options = context.getSerializableOptions();
            final SparkPCollectionView pviews = context.getPViews();
            JavaDStream<WindowedValue<KV<K, OutputT>>> outStream = dStream.transform(rdd -> {
                SparkCombineFn<KV<K, InputT>, InputT, ?, OutputT> combineFnWithContext = SparkCombineFn.keyed(fn, options, TranslationUtils.getSideInputs(transform.getSideInputs(), new JavaSparkContext(rdd.context()), pviews), windowingStrategy);
                return rdd.map(new TranslationUtils.CombineGroupedValues<>(combineFnWithContext));
            });
            context.putDataset(transform, new UnboundedDataset<>(outStream, unboundedDataset.getStreamSources()));
        }

        @Override
        public String toNativeString() {
            return "map(new <fn>())";
        }
    };
}
Also used : Combine(org.apache.beam.sdk.transforms.Combine) WindowedValue(org.apache.beam.sdk.util.WindowedValue) SerializablePipelineOptions(org.apache.beam.runners.core.construction.SerializablePipelineOptions) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) CombineWithContext(org.apache.beam.sdk.transforms.CombineWithContext) KV(org.apache.beam.sdk.values.KV) TransformEvaluator(org.apache.beam.runners.spark.translation.TransformEvaluator) TranslationUtils(org.apache.beam.runners.spark.translation.TranslationUtils) EvaluationContext(org.apache.beam.runners.spark.translation.EvaluationContext) SparkPCollectionView(org.apache.beam.runners.spark.translation.SparkPCollectionView)

Aggregations

SerializablePipelineOptions (org.apache.beam.runners.core.construction.SerializablePipelineOptions)37 TupleTag (org.apache.beam.sdk.values.TupleTag)29 WindowedValue (org.apache.beam.sdk.util.WindowedValue)28 StreamRecordStripper.stripStreamRecordFromWindowedValue (org.apache.beam.runners.flink.translation.wrappers.streaming.StreamRecordStripper.stripStreamRecordFromWindowedValue)24 Test (org.junit.Test)23 KeyedOneInputStreamOperatorTestHarness (org.apache.flink.streaming.util.KeyedOneInputStreamOperatorTestHarness)20 FlinkPipelineOptions (org.apache.beam.runners.flink.FlinkPipelineOptions)18 StringUtf8Coder (org.apache.beam.sdk.coders.StringUtf8Coder)16 KV (org.apache.beam.sdk.values.KV)16 OneInputStreamOperatorTestHarness (org.apache.flink.streaming.util.OneInputStreamOperatorTestHarness)15 ByteBuffer (java.nio.ByteBuffer)13 Coder (org.apache.beam.sdk.coders.Coder)12 KvCoder (org.apache.beam.sdk.coders.KvCoder)12 HashMap (java.util.HashMap)11 Instant (org.joda.time.Instant)11 CoderTypeInformation (org.apache.beam.runners.flink.translation.types.CoderTypeInformation)10 VarIntCoder (org.apache.beam.sdk.coders.VarIntCoder)10 DoFn (org.apache.beam.sdk.transforms.DoFn)10 IntervalWindow (org.apache.beam.sdk.transforms.windowing.IntervalWindow)10 WindowingStrategy (org.apache.beam.sdk.values.WindowingStrategy)10