Search in sources :

Example 46 with WindowedValue

use of org.apache.beam.sdk.util.WindowedValue in project beam by apache.

the class TransformTranslator method combineGlobally.

private static <InputT, AccumT, OutputT> TransformEvaluator<Combine.Globally<InputT, OutputT>> combineGlobally() {
    return new TransformEvaluator<Combine.Globally<InputT, OutputT>>() {

        @Override
        public void evaluate(Combine.Globally<InputT, OutputT> transform, EvaluationContext context) {
            final PCollection<InputT> input = context.getInput(transform);
            final Coder<InputT> iCoder = context.getInput(transform).getCoder();
            final Coder<OutputT> oCoder = context.getOutput(transform).getCoder();
            final WindowingStrategy<?, ?> windowingStrategy = input.getWindowingStrategy();
            @SuppressWarnings("unchecked") final CombineWithContext.CombineFnWithContext<InputT, AccumT, OutputT> combineFn = (CombineWithContext.CombineFnWithContext<InputT, AccumT, OutputT>) CombineFnUtil.toFnWithContext(transform.getFn());
            final WindowedValue.FullWindowedValueCoder<OutputT> wvoCoder = WindowedValue.FullWindowedValueCoder.of(oCoder, windowingStrategy.getWindowFn().windowCoder());
            final SparkRuntimeContext runtimeContext = context.getRuntimeContext();
            final boolean hasDefault = transform.isInsertDefault();
            final SparkGlobalCombineFn<InputT, AccumT, OutputT> sparkCombineFn = new SparkGlobalCombineFn<>(combineFn, runtimeContext, TranslationUtils.getSideInputs(transform.getSideInputs(), context), windowingStrategy);
            final Coder<AccumT> aCoder;
            try {
                aCoder = combineFn.getAccumulatorCoder(runtimeContext.getCoderRegistry(), iCoder);
            } catch (CannotProvideCoderException e) {
                throw new IllegalStateException("Could not determine coder for accumulator", e);
            }
            @SuppressWarnings("unchecked") JavaRDD<WindowedValue<InputT>> inRdd = ((BoundedDataset<InputT>) context.borrowDataset(transform)).getRDD();
            JavaRDD<WindowedValue<OutputT>> outRdd;
            Optional<Iterable<WindowedValue<AccumT>>> maybeAccumulated = GroupCombineFunctions.combineGlobally(inRdd, sparkCombineFn, iCoder, aCoder, windowingStrategy);
            if (maybeAccumulated.isPresent()) {
                Iterable<WindowedValue<OutputT>> output = sparkCombineFn.extractOutput(maybeAccumulated.get());
                outRdd = context.getSparkContext().parallelize(CoderHelpers.toByteArrays(output, wvoCoder)).map(CoderHelpers.fromByteFunction(wvoCoder));
            } else {
                // handle empty input RDD, which will naturally skip the entire execution
                // as Spark will not run on empty RDDs.
                JavaSparkContext jsc = new JavaSparkContext(inRdd.context());
                if (hasDefault) {
                    OutputT defaultValue = combineFn.defaultValue();
                    outRdd = jsc.parallelize(Lists.newArrayList(CoderHelpers.toByteArray(defaultValue, oCoder))).map(CoderHelpers.fromByteFunction(oCoder)).map(WindowingHelpers.<OutputT>windowFunction());
                } else {
                    outRdd = jsc.emptyRDD();
                }
            }
            context.putDataset(transform, new BoundedDataset<>(outRdd));
        }

        @Override
        public String toNativeString() {
            return "aggregate(..., new <fn>(), ...)";
        }
    };
}
Also used : FluentIterable(com.google.common.collect.FluentIterable) Combine(org.apache.beam.sdk.transforms.Combine) WindowedValue(org.apache.beam.sdk.util.WindowedValue) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) CombineWithContext(org.apache.beam.sdk.transforms.CombineWithContext) CannotProvideCoderException(org.apache.beam.sdk.coders.CannotProvideCoderException)

Example 47 with WindowedValue

use of org.apache.beam.sdk.util.WindowedValue in project beam by apache.

the class TransformTranslator method flattenPColl.

private static <T> TransformEvaluator<Flatten.PCollections<T>> flattenPColl() {
    return new TransformEvaluator<Flatten.PCollections<T>>() {

        @SuppressWarnings("unchecked")
        @Override
        public void evaluate(Flatten.PCollections<T> transform, EvaluationContext context) {
            Collection<PValue> pcs = context.getInputs(transform).values();
            JavaRDD<WindowedValue<T>> unionRDD;
            if (pcs.size() == 0) {
                unionRDD = context.getSparkContext().emptyRDD();
            } else {
                JavaRDD<WindowedValue<T>>[] rdds = new JavaRDD[pcs.size()];
                int index = 0;
                for (PValue pc : pcs) {
                    checkArgument(pc instanceof PCollection, "Flatten had non-PCollection value in input: %s of type %s", pc, pc.getClass().getSimpleName());
                    rdds[index] = ((BoundedDataset<T>) context.borrowDataset(pc)).getRDD();
                    index++;
                }
                unionRDD = context.getSparkContext().union(rdds);
            }
            context.putDataset(transform, new BoundedDataset<>(unionRDD));
        }

        @Override
        public String toNativeString() {
            return "sparkContext.union(...)";
        }
    };
}
Also used : Flatten(org.apache.beam.sdk.transforms.Flatten) PValue(org.apache.beam.sdk.values.PValue) JavaRDD(org.apache.spark.api.java.JavaRDD) PCollection(org.apache.beam.sdk.values.PCollection) WindowedValue(org.apache.beam.sdk.util.WindowedValue)

Example 48 with WindowedValue

use of org.apache.beam.sdk.util.WindowedValue in project beam by apache.

the class ProcessBundleHandlerTest method testCreatingAndProcessingBeamFnDataReadRunner.

@Test
public void testCreatingAndProcessingBeamFnDataReadRunner() throws Exception {
    Map<String, Message> fnApiRegistry = ImmutableMap.of(STRING_CODER_SPEC_ID, STRING_CODER_SPEC);
    String bundleId = "57L";
    String primitiveTransformId = "100L";
    long outputId = 101L;
    List<WindowedValue<String>> outputValues = new ArrayList<>();
    BeamFnApi.Target outputTarget = BeamFnApi.Target.newBuilder().setPrimitiveTransformReference(primitiveTransformId).setName(Long.toString(outputId)).build();
    Multimap<BeamFnApi.Target, ThrowingConsumer<WindowedValue<String>>> existingConsumers = ImmutableMultimap.of(outputTarget, outputValues::add);
    Multimap<BeamFnApi.Target, ThrowingConsumer<WindowedValue<String>>> newConsumers = HashMultimap.create();
    List<ThrowingRunnable> startFunctions = new ArrayList<>();
    List<ThrowingRunnable> finishFunctions = new ArrayList<>();
    BeamFnApi.FunctionSpec functionSpec = BeamFnApi.FunctionSpec.newBuilder().setId("1L").setUrn(DATA_INPUT_URN).setData(Any.pack(REMOTE_PORT)).build();
    BeamFnApi.PrimitiveTransform primitiveTransform = BeamFnApi.PrimitiveTransform.newBuilder().setId(primitiveTransformId).setFunctionSpec(functionSpec).putInputs("input", BeamFnApi.Target.List.getDefaultInstance()).putOutputs(Long.toString(outputId), BeamFnApi.PCollection.newBuilder().setCoderReference(STRING_CODER_SPEC_ID).build()).build();
    ProcessBundleHandler handler = new ProcessBundleHandler(PipelineOptionsFactory.create(), fnApiRegistry::get, beamFnDataClient);
    handler.createConsumersForPrimitiveTransform(primitiveTransform, Suppliers.ofInstance(bundleId)::get, existingConsumers::get, newConsumers::put, startFunctions::add, finishFunctions::add);
    verifyZeroInteractions(beamFnDataClient);
    CompletableFuture<Void> completionFuture = new CompletableFuture<>();
    when(beamFnDataClient.forInboundConsumer(any(), any(), any(), any())).thenReturn(completionFuture);
    Iterables.getOnlyElement(startFunctions).run();
    verify(beamFnDataClient).forInboundConsumer(eq(REMOTE_PORT.getApiServiceDescriptor()), eq(KV.of(bundleId, BeamFnApi.Target.newBuilder().setPrimitiveTransformReference(primitiveTransformId).setName("input").build())), eq(STRING_CODER), consumerCaptor.capture());
    consumerCaptor.getValue().accept(valueInGlobalWindow("TestValue"));
    assertThat(outputValues, contains(valueInGlobalWindow("TestValue")));
    outputValues.clear();
    assertThat(newConsumers.keySet(), empty());
    completionFuture.complete(null);
    Iterables.getOnlyElement(finishFunctions).run();
    verifyNoMoreInteractions(beamFnDataClient);
}
Also used : Message(com.google.protobuf.Message) ArrayList(java.util.ArrayList) ByteString(com.google.protobuf.ByteString) ThrowingRunnable(org.apache.beam.fn.harness.fn.ThrowingRunnable) CompletableFuture(java.util.concurrent.CompletableFuture) WindowedValue(org.apache.beam.sdk.util.WindowedValue) BeamFnApi(org.apache.beam.fn.v1.BeamFnApi) CloseableThrowingConsumer(org.apache.beam.fn.harness.fn.CloseableThrowingConsumer) ThrowingConsumer(org.apache.beam.fn.harness.fn.ThrowingConsumer) Test(org.junit.Test)

Example 49 with WindowedValue

use of org.apache.beam.sdk.util.WindowedValue in project beam by apache.

the class ProcessBundleHandlerTest method testCreatingAndProcessingSource.

@Test
public void testCreatingAndProcessingSource() throws Exception {
    Map<String, Message> fnApiRegistry = ImmutableMap.of(LONG_CODER_SPEC_ID, LONG_CODER_SPEC);
    String primitiveTransformId = "100L";
    long outputId = 101L;
    BeamFnApi.Target inputTarget = BeamFnApi.Target.newBuilder().setPrimitiveTransformReference("1000L").setName("inputTarget").build();
    List<WindowedValue<String>> outputValues = new ArrayList<>();
    BeamFnApi.Target outputTarget = BeamFnApi.Target.newBuilder().setPrimitiveTransformReference(primitiveTransformId).setName(Long.toString(outputId)).build();
    Multimap<BeamFnApi.Target, ThrowingConsumer<WindowedValue<String>>> existingConsumers = ImmutableMultimap.of(outputTarget, outputValues::add);
    Multimap<BeamFnApi.Target, ThrowingConsumer<WindowedValue<BoundedSource<Long>>>> newConsumers = HashMultimap.create();
    List<ThrowingRunnable> startFunctions = new ArrayList<>();
    List<ThrowingRunnable> finishFunctions = new ArrayList<>();
    BeamFnApi.FunctionSpec functionSpec = BeamFnApi.FunctionSpec.newBuilder().setId("1L").setUrn(JAVA_SOURCE_URN).setData(Any.pack(BytesValue.newBuilder().setValue(ByteString.copyFrom(SerializableUtils.serializeToByteArray(CountingSource.upTo(3)))).build())).build();
    BeamFnApi.PrimitiveTransform primitiveTransform = BeamFnApi.PrimitiveTransform.newBuilder().setId(primitiveTransformId).setFunctionSpec(functionSpec).putInputs("input", BeamFnApi.Target.List.newBuilder().addTarget(inputTarget).build()).putOutputs(Long.toString(outputId), BeamFnApi.PCollection.newBuilder().setCoderReference(LONG_CODER_SPEC_ID).build()).build();
    ProcessBundleHandler handler = new ProcessBundleHandler(PipelineOptionsFactory.create(), fnApiRegistry::get, beamFnDataClient);
    handler.createConsumersForPrimitiveTransform(primitiveTransform, Suppliers.ofInstance("57L")::get, existingConsumers::get, newConsumers::put, startFunctions::add, finishFunctions::add);
    // This is testing a deprecated way of running sources and should be removed
    // once all source definitions are instead propagated along the input edge.
    Iterables.getOnlyElement(startFunctions).run();
    assertThat(outputValues, contains(valueInGlobalWindow(0L), valueInGlobalWindow(1L), valueInGlobalWindow(2L)));
    outputValues.clear();
    // Check that when passing a source along as an input, the source is processed.
    assertEquals(newConsumers.keySet(), ImmutableSet.of(inputTarget));
    Iterables.getOnlyElement(newConsumers.get(inputTarget)).accept(valueInGlobalWindow(CountingSource.upTo(2)));
    assertThat(outputValues, contains(valueInGlobalWindow(0L), valueInGlobalWindow(1L)));
    assertThat(finishFunctions, empty());
}
Also used : Message(com.google.protobuf.Message) ArrayList(java.util.ArrayList) ByteString(com.google.protobuf.ByteString) ThrowingRunnable(org.apache.beam.fn.harness.fn.ThrowingRunnable) WindowedValue(org.apache.beam.sdk.util.WindowedValue) BoundedSource(org.apache.beam.sdk.io.BoundedSource) BeamFnApi(org.apache.beam.fn.v1.BeamFnApi) CloseableThrowingConsumer(org.apache.beam.fn.harness.fn.CloseableThrowingConsumer) ThrowingConsumer(org.apache.beam.fn.harness.fn.ThrowingConsumer) Test(org.junit.Test)

Example 50 with WindowedValue

use of org.apache.beam.sdk.util.WindowedValue in project beam by apache.

the class ProcessBundleHandlerTest method testCreatingAndProcessingBeamFnDataWriteRunner.

@Test
public void testCreatingAndProcessingBeamFnDataWriteRunner() throws Exception {
    Map<String, Message> fnApiRegistry = ImmutableMap.of(STRING_CODER_SPEC_ID, STRING_CODER_SPEC);
    String bundleId = "57L";
    String primitiveTransformId = "100L";
    long outputId = 101L;
    BeamFnApi.Target inputTarget = BeamFnApi.Target.newBuilder().setPrimitiveTransformReference("1000L").setName("inputTarget").build();
    Multimap<BeamFnApi.Target, ThrowingConsumer<WindowedValue<String>>> existingConsumers = ImmutableMultimap.of();
    Multimap<BeamFnApi.Target, ThrowingConsumer<WindowedValue<String>>> newConsumers = HashMultimap.create();
    List<ThrowingRunnable> startFunctions = new ArrayList<>();
    List<ThrowingRunnable> finishFunctions = new ArrayList<>();
    BeamFnApi.FunctionSpec functionSpec = BeamFnApi.FunctionSpec.newBuilder().setId("1L").setUrn(DATA_OUTPUT_URN).setData(Any.pack(REMOTE_PORT)).build();
    BeamFnApi.PrimitiveTransform primitiveTransform = BeamFnApi.PrimitiveTransform.newBuilder().setId(primitiveTransformId).setFunctionSpec(functionSpec).putInputs("input", BeamFnApi.Target.List.newBuilder().addTarget(inputTarget).build()).putOutputs(Long.toString(outputId), BeamFnApi.PCollection.newBuilder().setCoderReference(STRING_CODER_SPEC_ID).build()).build();
    ProcessBundleHandler handler = new ProcessBundleHandler(PipelineOptionsFactory.create(), fnApiRegistry::get, beamFnDataClient);
    handler.createConsumersForPrimitiveTransform(primitiveTransform, Suppliers.ofInstance(bundleId)::get, existingConsumers::get, newConsumers::put, startFunctions::add, finishFunctions::add);
    verifyZeroInteractions(beamFnDataClient);
    List<WindowedValue<String>> outputValues = new ArrayList<>();
    AtomicBoolean wasCloseCalled = new AtomicBoolean();
    CloseableThrowingConsumer<WindowedValue<String>> outputConsumer = new CloseableThrowingConsumer<WindowedValue<String>>() {

        @Override
        public void close() throws Exception {
            wasCloseCalled.set(true);
        }

        @Override
        public void accept(WindowedValue<String> t) throws Exception {
            outputValues.add(t);
        }
    };
    when(beamFnDataClient.forOutboundConsumer(any(), any(), Matchers.<Coder<WindowedValue<String>>>any())).thenReturn(outputConsumer);
    Iterables.getOnlyElement(startFunctions).run();
    verify(beamFnDataClient).forOutboundConsumer(eq(REMOTE_PORT.getApiServiceDescriptor()), eq(KV.of(bundleId, BeamFnApi.Target.newBuilder().setPrimitiveTransformReference(primitiveTransformId).setName(Long.toString(outputId)).build())), eq(STRING_CODER));
    assertEquals(newConsumers.keySet(), ImmutableSet.of(inputTarget));
    Iterables.getOnlyElement(newConsumers.get(inputTarget)).accept(valueInGlobalWindow("TestValue"));
    assertThat(outputValues, contains(valueInGlobalWindow("TestValue")));
    outputValues.clear();
    assertFalse(wasCloseCalled.get());
    Iterables.getOnlyElement(finishFunctions).run();
    assertTrue(wasCloseCalled.get());
    verifyNoMoreInteractions(beamFnDataClient);
}
Also used : CloseableThrowingConsumer(org.apache.beam.fn.harness.fn.CloseableThrowingConsumer) Message(com.google.protobuf.Message) ArrayList(java.util.ArrayList) ByteString(com.google.protobuf.ByteString) ThrowingRunnable(org.apache.beam.fn.harness.fn.ThrowingRunnable) WindowedValue(org.apache.beam.sdk.util.WindowedValue) BeamFnApi(org.apache.beam.fn.v1.BeamFnApi) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) CloseableThrowingConsumer(org.apache.beam.fn.harness.fn.CloseableThrowingConsumer) ThrowingConsumer(org.apache.beam.fn.harness.fn.ThrowingConsumer) Test(org.junit.Test)

Aggregations

WindowedValue (org.apache.beam.sdk.util.WindowedValue)89 Test (org.junit.Test)53 Instant (org.joda.time.Instant)47 IntervalWindow (org.apache.beam.sdk.transforms.windowing.IntervalWindow)36 KV (org.apache.beam.sdk.values.KV)19 ArrayList (java.util.ArrayList)17 WindowMatchers.isSingleWindowedValue (org.apache.beam.runners.core.WindowMatchers.isSingleWindowedValue)17 WindowMatchers.isWindowedValue (org.apache.beam.runners.core.WindowMatchers.isWindowedValue)17 BoundedWindow (org.apache.beam.sdk.transforms.windowing.BoundedWindow)17 Matchers.emptyIterable (org.hamcrest.Matchers.emptyIterable)16 TupleTag (org.apache.beam.sdk.values.TupleTag)13 JavaRDD (org.apache.spark.api.java.JavaRDD)8 ByteString (com.google.protobuf.ByteString)7 BeamFnApi (org.apache.beam.fn.v1.BeamFnApi)7 ThrowingConsumer (org.apache.beam.fn.harness.fn.ThrowingConsumer)6 IsmRecord (org.apache.beam.runners.dataflow.internal.IsmFormat.IsmRecord)6 TimestampCombiner (org.apache.beam.sdk.transforms.windowing.TimestampCombiner)6 CloseableThrowingConsumer (org.apache.beam.fn.harness.fn.CloseableThrowingConsumer)5 MetricsContainerImpl (org.apache.beam.runners.core.metrics.MetricsContainerImpl)5 EvaluationContext (org.apache.beam.runners.spark.translation.EvaluationContext)5