Search in sources :

Example 6 with KvCoder

use of org.apache.beam.sdk.coders.KvCoder in project beam by apache.

the class SparkSideInputReader method get.

@Override
@Nullable
public <T> T get(PCollectionView<T> view, BoundedWindow window) {
    // --- validate sideInput.
    checkNotNull(view, "The PCollectionView passed to sideInput cannot be null ");
    KV<WindowingStrategy<?, ?>, SideInputBroadcast<?>> windowedBroadcastHelper = sideInputs.get(view.getTagInternal());
    checkNotNull(windowedBroadcastHelper, "SideInput for view " + view + " is not available.");
    // --- sideInput window
    final BoundedWindow sideInputWindow = view.getWindowMappingFn().getSideInputWindow(window);
    // --- match the appropriate sideInput window.
    // a tag will point to all matching sideInputs, that is all windows.
    // now that we've obtained the appropriate sideInputWindow, all that's left is to filter by it.
    Iterable<WindowedValue<?>> availableSideInputs = (Iterable<WindowedValue<?>>) windowedBroadcastHelper.getValue().getValue();
    Iterable<?> sideInputForWindow = StreamSupport.stream(availableSideInputs.spliterator(), false).filter(sideInputCandidate -> {
        if (sideInputCandidate == null) {
            return false;
        }
        return Iterables.contains(sideInputCandidate.getWindows(), sideInputWindow);
    }).collect(Collectors.toList()).stream().map(WindowedValue::getValue).collect(Collectors.toList());
    switch(view.getViewFn().getMaterialization().getUrn()) {
        case Materializations.ITERABLE_MATERIALIZATION_URN:
            {
                ViewFn<IterableView, T> viewFn = (ViewFn<IterableView, T>) view.getViewFn();
                return viewFn.apply(() -> sideInputForWindow);
            }
        case Materializations.MULTIMAP_MATERIALIZATION_URN:
            {
                ViewFn<MultimapView, T> viewFn = (ViewFn<MultimapView, T>) view.getViewFn();
                Coder<?> keyCoder = ((KvCoder<?, ?>) view.getCoderInternal()).getKeyCoder();
                return viewFn.apply(InMemoryMultimapSideInputView.fromIterable(keyCoder, (Iterable) sideInputForWindow));
            }
        default:
            throw new IllegalStateException(String.format("Unknown side input materialization format requested '%s'", view.getViewFn().getMaterialization().getUrn()));
    }
}
Also used : KvCoder(org.apache.beam.sdk.coders.KvCoder) Coder(org.apache.beam.sdk.coders.Coder) IterableView(org.apache.beam.sdk.transforms.Materializations.IterableView) MultimapView(org.apache.beam.sdk.transforms.Materializations.MultimapView) WindowingStrategy(org.apache.beam.sdk.values.WindowingStrategy) ViewFn(org.apache.beam.sdk.transforms.ViewFn) WindowedValue(org.apache.beam.sdk.util.WindowedValue) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) Nullable(org.checkerframework.checker.nullness.qual.Nullable)

Example 7 with KvCoder

use of org.apache.beam.sdk.coders.KvCoder in project beam by apache.

the class Twister2SideInputReader method getMultimapSideInput.

private <T> T getMultimapSideInput(PCollectionView<T> view, BoundedWindow window) {
    Map<BoundedWindow, List<WindowedValue<?>>> partitionedElements = getPartitionedElements(view);
    Map<BoundedWindow, T> resultMap = new HashMap<>();
    ViewFn<MultimapView, T> viewFn = (ViewFn<MultimapView, T>) view.getViewFn();
    for (Map.Entry<BoundedWindow, List<WindowedValue<?>>> elements : partitionedElements.entrySet()) {
        Coder keyCoder = ((KvCoder<?, ?>) view.getCoderInternal()).getKeyCoder();
        resultMap.put(elements.getKey(), viewFn.apply(InMemoryMultimapSideInputView.fromIterable(keyCoder, (Iterable) elements.getValue().stream().map(WindowedValue::getValue).collect(Collectors.toList()))));
    }
    T result = resultMap.get(window);
    if (result == null) {
        result = viewFn.apply(InMemoryMultimapSideInputView.empty());
    }
    return result;
}
Also used : KvCoder(org.apache.beam.sdk.coders.KvCoder) Coder(org.apache.beam.sdk.coders.Coder) HashMap(java.util.HashMap) MultimapView(org.apache.beam.sdk.transforms.Materializations.MultimapView) KvCoder(org.apache.beam.sdk.coders.KvCoder) ViewFn(org.apache.beam.sdk.transforms.ViewFn) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) ArrayList(java.util.ArrayList) List(java.util.List) HashMap(java.util.HashMap) Map(java.util.Map)

Example 8 with KvCoder

use of org.apache.beam.sdk.coders.KvCoder in project beam by apache.

the class ParDoTranslation method translateParDo.

/**
 * Translate a ParDo.
 */
public static <InputT> ParDoPayload translateParDo(ParDo.MultiOutput<InputT, ?> parDo, PCollection<InputT> mainInput, DoFnSchemaInformation doFnSchemaInformation, Pipeline pipeline, SdkComponents components) throws IOException {
    final DoFn<?, ?> doFn = parDo.getFn();
    final DoFnSignature signature = DoFnSignatures.getSignature(doFn.getClass());
    final String restrictionCoderId;
    if (signature.processElement().isSplittable()) {
        DoFnInvoker<?, ?> doFnInvoker = DoFnInvokers.invokerFor(doFn);
        final Coder<?> restrictionAndWatermarkStateCoder = KvCoder.of(doFnInvoker.invokeGetRestrictionCoder(pipeline.getCoderRegistry()), doFnInvoker.invokeGetWatermarkEstimatorStateCoder(pipeline.getCoderRegistry()));
        restrictionCoderId = components.registerCoder(restrictionAndWatermarkStateCoder);
    } else {
        restrictionCoderId = "";
    }
    Coder<BoundedWindow> windowCoder = (Coder<BoundedWindow>) mainInput.getWindowingStrategy().getWindowFn().windowCoder();
    Coder<?> keyCoder;
    if (signature.usesState() || signature.usesTimers()) {
        checkArgument(mainInput.getCoder() instanceof KvCoder, "DoFn's that use state or timers must have an input PCollection with a KvCoder but received %s", mainInput.getCoder());
        keyCoder = ((KvCoder) mainInput.getCoder()).getKeyCoder();
    } else {
        keyCoder = null;
    }
    return payloadForParDoLike(new ParDoLike() {

        @Override
        public FunctionSpec translateDoFn(SdkComponents newComponents) {
            return ParDoTranslation.translateDoFn(parDo.getFn(), parDo.getMainOutputTag(), parDo.getSideInputs(), doFnSchemaInformation, newComponents);
        }

        @Override
        public Map<String, SideInput> translateSideInputs(SdkComponents components) {
            Map<String, SideInput> sideInputs = new HashMap<>();
            for (PCollectionView<?> sideInput : parDo.getSideInputs().values()) {
                sideInputs.put(sideInput.getTagInternal().getId(), translateView(sideInput, components));
            }
            return sideInputs;
        }

        @Override
        public Map<String, RunnerApi.StateSpec> translateStateSpecs(SdkComponents components) throws IOException {
            Map<String, RunnerApi.StateSpec> stateSpecs = new HashMap<>();
            for (Map.Entry<String, StateDeclaration> state : signature.stateDeclarations().entrySet()) {
                RunnerApi.StateSpec spec = translateStateSpec(getStateSpecOrThrow(state.getValue(), doFn), components);
                stateSpecs.put(state.getKey(), spec);
            }
            return stateSpecs;
        }

        @Override
        public ParDoLikeTimerFamilySpecs translateTimerFamilySpecs(SdkComponents newComponents) {
            Map<String, RunnerApi.TimerFamilySpec> timerFamilySpecs = new HashMap<>();
            for (Map.Entry<String, TimerDeclaration> timer : signature.timerDeclarations().entrySet()) {
                RunnerApi.TimerFamilySpec spec = translateTimerFamilySpec(getTimerSpecOrThrow(timer.getValue(), doFn), newComponents, keyCoder, windowCoder);
                timerFamilySpecs.put(timer.getKey(), spec);
            }
            for (Map.Entry<String, DoFnSignature.TimerFamilyDeclaration> timerFamily : signature.timerFamilyDeclarations().entrySet()) {
                RunnerApi.TimerFamilySpec spec = translateTimerFamilySpec(DoFnSignatures.getTimerFamilySpecOrThrow(timerFamily.getValue(), doFn), newComponents, keyCoder, windowCoder);
                timerFamilySpecs.put(timerFamily.getKey(), spec);
            }
            String onWindowExpirationTimerFamilySpec = null;
            if (signature.onWindowExpiration() != null) {
                RunnerApi.TimerFamilySpec spec = RunnerApi.TimerFamilySpec.newBuilder().setTimeDomain(translateTimeDomain(TimeDomain.EVENT_TIME)).setTimerFamilyCoderId(registerCoderOrThrow(components, Timer.Coder.of(keyCoder, windowCoder))).build();
                for (int i = 0; i < Integer.MAX_VALUE; ++i) {
                    onWindowExpirationTimerFamilySpec = "onWindowExpiration" + i;
                    if (!timerFamilySpecs.containsKey(onWindowExpirationTimerFamilySpec)) {
                        break;
                    }
                }
                timerFamilySpecs.put(onWindowExpirationTimerFamilySpec, spec);
            }
            return ParDoLikeTimerFamilySpecs.create(timerFamilySpecs, onWindowExpirationTimerFamilySpec);
        }

        @Override
        public boolean isStateful() {
            return !signature.stateDeclarations().isEmpty() || !signature.timerDeclarations().isEmpty() || !signature.timerFamilyDeclarations().isEmpty() || signature.onWindowExpiration() != null;
        }

        @Override
        public boolean isSplittable() {
            return signature.processElement().isSplittable();
        }

        @Override
        public boolean isRequiresStableInput() {
            return signature.processElement().requiresStableInput();
        }

        @Override
        public boolean isRequiresTimeSortedInput() {
            return signature.processElement().requiresTimeSortedInput();
        }

        @Override
        public boolean requestsFinalization() {
            return (signature.startBundle() != null && signature.startBundle().extraParameters().contains(Parameter.bundleFinalizer())) || (signature.processElement() != null && signature.processElement().extraParameters().contains(Parameter.bundleFinalizer())) || (signature.finishBundle() != null && signature.finishBundle().extraParameters().contains(Parameter.bundleFinalizer()));
        }

        @Override
        public String translateRestrictionCoderId(SdkComponents newComponents) {
            return restrictionCoderId;
        }
    }, components);
}
Also used : KvCoder(org.apache.beam.sdk.coders.KvCoder) Coder(org.apache.beam.sdk.coders.Coder) FunctionSpec(org.apache.beam.model.pipeline.v1.RunnerApi.FunctionSpec) KvCoder(org.apache.beam.sdk.coders.KvCoder) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) IOException(java.io.IOException) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) StateSpec(org.apache.beam.sdk.state.StateSpec) PCollectionView(org.apache.beam.sdk.values.PCollectionView) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) Map(java.util.Map) HashMap(java.util.HashMap) DoFnSignature(org.apache.beam.sdk.transforms.reflect.DoFnSignature)

Example 9 with KvCoder

use of org.apache.beam.sdk.coders.KvCoder in project beam by apache.

the class PartialGroupByKeyParDoFnsTest method testCreateWithCombinerAndStreamingSideInputs.

@Test
public void testCreateWithCombinerAndStreamingSideInputs() throws Exception {
    StreamingOptions options = PipelineOptionsFactory.as(StreamingOptions.class);
    options.setStreaming(true);
    Coder keyCoder = StringUtf8Coder.of();
    Coder valueCoder = BigEndianIntegerCoder.of();
    KvCoder<String, Integer> kvCoder = KvCoder.of(keyCoder, valueCoder);
    TestOutputReceiver receiver = new TestOutputReceiver(new ElementByteSizeObservableCoder(WindowedValue.getValueOnlyCoder(kvCoder)), counterSet, NameContextsForTests.nameContextForTest());
    when(mockSideInputReader.isEmpty()).thenReturn(false);
    when(mockStreamingStepContext.stateInternals()).thenReturn((StateInternals) mockStateInternals);
    when(mockStateInternals.state(Matchers.<StateNamespace>any(), Matchers.<StateTag>any())).thenReturn(mockState);
    when(mockState.read()).thenReturn(Maps.newHashMap());
    ParDoFn pgbk = PartialGroupByKeyParDoFns.create(options, kvCoder, AppliedCombineFn.withInputCoder(Sum.ofIntegers(), CoderRegistry.createDefault(), kvCoder, ImmutableList.<PCollectionView<?>>of(), WindowingStrategy.globalDefault()), mockSideInputReader, receiver, mockStreamingStepContext);
    assertTrue(pgbk instanceof StreamingSideInputPGBKParDoFn);
}
Also used : ElementByteSizeObservableCoder(org.apache.beam.runners.dataflow.worker.IntrinsicMapTaskExecutorFactory.ElementByteSizeObservableCoder) KvCoder(org.apache.beam.sdk.coders.KvCoder) BigEndianIntegerCoder(org.apache.beam.sdk.coders.BigEndianIntegerCoder) Coder(org.apache.beam.sdk.coders.Coder) StringUtf8Coder(org.apache.beam.sdk.coders.StringUtf8Coder) IterableCoder(org.apache.beam.sdk.coders.IterableCoder) PCollectionView(org.apache.beam.sdk.values.PCollectionView) StreamingOptions(org.apache.beam.sdk.options.StreamingOptions) ElementByteSizeObservableCoder(org.apache.beam.runners.dataflow.worker.IntrinsicMapTaskExecutorFactory.ElementByteSizeObservableCoder) BatchSideInputPGBKParDoFn(org.apache.beam.runners.dataflow.worker.PartialGroupByKeyParDoFns.BatchSideInputPGBKParDoFn) ParDoFn(org.apache.beam.runners.dataflow.worker.util.common.worker.ParDoFn) StreamingSideInputPGBKParDoFn(org.apache.beam.runners.dataflow.worker.PartialGroupByKeyParDoFns.StreamingSideInputPGBKParDoFn) SimplePartialGroupByKeyParDoFn(org.apache.beam.runners.dataflow.worker.util.common.worker.SimplePartialGroupByKeyParDoFn) TestOutputReceiver(org.apache.beam.runners.dataflow.worker.util.common.worker.TestOutputReceiver) StreamingSideInputPGBKParDoFn(org.apache.beam.runners.dataflow.worker.PartialGroupByKeyParDoFns.StreamingSideInputPGBKParDoFn) Test(org.junit.Test)

Example 10 with KvCoder

use of org.apache.beam.sdk.coders.KvCoder in project beam by apache.

the class ModelCodersTest method kvCoderComponentsToConstructor.

@Test
public void kvCoderComponentsToConstructor() throws IOException {
    KvCoder<byte[], Iterable<Long>> javaCoder = KvCoder.of(ByteArrayCoder.of(), IterableCoder.of(LengthPrefixCoder.of(VarLongCoder.of())));
    MessageWithComponents coderAndComponents = CoderTranslation.toProto(javaCoder);
    KvCoderComponents kvCoderComponents = ModelCoders.getKvCoderComponents(coderAndComponents.getCoder());
    Coder kvCoder = ModelCoders.kvCoder(kvCoderComponents.keyCoderId(), kvCoderComponents.valueCoderId());
    assertThat(kvCoder, equalTo(coderAndComponents.getCoder()));
}
Also used : KvCoder(org.apache.beam.sdk.coders.KvCoder) Coder(org.apache.beam.model.pipeline.v1.RunnerApi.Coder) IterableCoder(org.apache.beam.sdk.coders.IterableCoder) FullWindowedValueCoder(org.apache.beam.sdk.util.WindowedValue.FullWindowedValueCoder) VarLongCoder(org.apache.beam.sdk.coders.VarLongCoder) StringUtf8Coder(org.apache.beam.sdk.coders.StringUtf8Coder) ByteArrayCoder(org.apache.beam.sdk.coders.ByteArrayCoder) VarIntCoder(org.apache.beam.sdk.coders.VarIntCoder) IntervalWindowCoder(org.apache.beam.sdk.transforms.windowing.IntervalWindow.IntervalWindowCoder) LengthPrefixCoder(org.apache.beam.sdk.coders.LengthPrefixCoder) MessageWithComponents(org.apache.beam.model.pipeline.v1.RunnerApi.MessageWithComponents) KvCoderComponents(org.apache.beam.runners.core.construction.ModelCoders.KvCoderComponents) Test(org.junit.Test)

Aggregations

KvCoder (org.apache.beam.sdk.coders.KvCoder)44 Coder (org.apache.beam.sdk.coders.Coder)26 WindowedValue (org.apache.beam.sdk.util.WindowedValue)25 KV (org.apache.beam.sdk.values.KV)21 BoundedWindow (org.apache.beam.sdk.transforms.windowing.BoundedWindow)20 Map (java.util.Map)17 List (java.util.List)16 ArrayList (java.util.ArrayList)15 RunnerApi (org.apache.beam.model.pipeline.v1.RunnerApi)15 IOException (java.io.IOException)14 HashMap (java.util.HashMap)14 WindowingStrategy (org.apache.beam.sdk.values.WindowingStrategy)13 StringUtf8Coder (org.apache.beam.sdk.coders.StringUtf8Coder)11 IterableCoder (org.apache.beam.sdk.coders.IterableCoder)10 VoidCoder (org.apache.beam.sdk.coders.VoidCoder)10 PCollectionView (org.apache.beam.sdk.values.PCollectionView)10 Test (org.junit.Test)10 WindowedValueCoder (org.apache.beam.sdk.util.WindowedValue.WindowedValueCoder)8 ViewFn (org.apache.beam.sdk.transforms.ViewFn)7 PCollection (org.apache.beam.sdk.values.PCollection)7