Search in sources :

Example 6 with PCollectionView

use of org.apache.beam.sdk.values.PCollectionView in project beam by apache.

the class SideInputContainerTest method getAfterWriteReturnsPaneInWindow.

@Test
public void getAfterWriteReturnsPaneInWindow() throws Exception {
    WindowedValue<KV<String, Integer>> one = WindowedValue.of(KV.of("one", 1), new Instant(1L), FIRST_WINDOW, PaneInfo.ON_TIME_AND_ONLY_FIRING);
    WindowedValue<KV<String, Integer>> two = WindowedValue.of(KV.of("two", 2), new Instant(20L), FIRST_WINDOW, PaneInfo.ON_TIME_AND_ONLY_FIRING);
    container.write(mapView, ImmutableList.<WindowedValue<?>>of(one, two));
    Map<String, Integer> viewContents = container.createReaderForViews(ImmutableList.<PCollectionView<?>>of(mapView)).get(mapView, FIRST_WINDOW);
    assertThat(viewContents, hasEntry("one", 1));
    assertThat(viewContents, hasEntry("two", 2));
    assertThat(viewContents.size(), is(2));
}
Also used : PCollectionView(org.apache.beam.sdk.values.PCollectionView) Instant(org.joda.time.Instant) KV(org.apache.beam.sdk.values.KV) Test(org.junit.Test)

Example 7 with PCollectionView

use of org.apache.beam.sdk.values.PCollectionView in project beam by apache.

the class SideInputContainerTest method finishDoesNotOverwriteWrittenElements.

@Test
public void finishDoesNotOverwriteWrittenElements() throws Exception {
    WindowedValue<KV<String, Integer>> one = WindowedValue.of(KV.of("one", 1), new Instant(1L), SECOND_WINDOW, PaneInfo.createPane(true, false, Timing.EARLY));
    WindowedValue<KV<String, Integer>> two = WindowedValue.of(KV.of("two", 2), new Instant(20L), SECOND_WINDOW, PaneInfo.createPane(true, false, Timing.EARLY));
    container.write(mapView, ImmutableList.<WindowedValue<?>>of(one, two));
    immediatelyInvokeCallback(mapView, SECOND_WINDOW);
    Map<String, Integer> viewContents = container.createReaderForViews(ImmutableList.<PCollectionView<?>>of(mapView)).get(mapView, SECOND_WINDOW);
    assertThat(viewContents, hasEntry("one", 1));
    assertThat(viewContents, hasEntry("two", 2));
    assertThat(viewContents.size(), is(2));
}
Also used : PCollectionView(org.apache.beam.sdk.values.PCollectionView) Instant(org.joda.time.Instant) KV(org.apache.beam.sdk.values.KV) Test(org.junit.Test)

Example 8 with PCollectionView

use of org.apache.beam.sdk.values.PCollectionView in project beam by apache.

the class ViewOverrideFactoryTest method replacementGetViewReturnsOriginal.

@Test
public void replacementGetViewReturnsOriginal() {
    final PCollection<Integer> ints = p.apply("CreateContents", Create.of(1, 2, 3));
    final PCollectionView<List<Integer>> view = PCollectionViews.listView(ints, WindowingStrategy.globalDefault(), ints.getCoder());
    PTransformReplacement<PCollection<Integer>, PCollectionView<List<Integer>>> replacement = factory.getReplacementTransform(AppliedPTransform.<PCollection<Integer>, PCollectionView<List<Integer>>, CreatePCollectionView<Integer, List<Integer>>>of("foo", ints.expand(), view.expand(), CreatePCollectionView.<Integer, List<Integer>>of(view), p));
    ints.apply(replacement.getTransform());
    final AtomicBoolean writeViewVisited = new AtomicBoolean();
    p.traverseTopologically(new PipelineVisitor.Defaults() {

        @Override
        public void visitPrimitiveTransform(Node node) {
            if (node.getTransform() instanceof WriteView) {
                assertThat("There should only be one WriteView primitive in the graph", writeViewVisited.getAndSet(true), is(false));
                PCollectionView replacementView = ((WriteView) node.getTransform()).getView();
                assertThat(replacementView, Matchers.<PCollectionView>theInstance(view));
                assertThat(node.getInputs().entrySet(), hasSize(1));
            }
        }
    });
    assertThat(writeViewVisited.get(), is(true));
}
Also used : WriteView(org.apache.beam.runners.direct.ViewOverrideFactory.WriteView) Node(org.apache.beam.sdk.runners.TransformHierarchy.Node) PCollection(org.apache.beam.sdk.values.PCollection) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) PCollectionView(org.apache.beam.sdk.values.PCollectionView) CreatePCollectionView(org.apache.beam.sdk.transforms.View.CreatePCollectionView) PipelineVisitor(org.apache.beam.sdk.Pipeline.PipelineVisitor) List(java.util.List) Test(org.junit.Test)

Example 9 with PCollectionView

use of org.apache.beam.sdk.values.PCollectionView in project beam by apache.

the class ParDoTranslator method translate.

@Override
public void translate(ParDo.MultiOutput<InputT, OutputT> transform, TranslationContext context) {
    DoFn<InputT, OutputT> doFn = transform.getFn();
    DoFnSignature signature = DoFnSignatures.getSignature(doFn.getClass());
    if (signature.processElement().isSplittable()) {
        throw new UnsupportedOperationException(String.format("%s does not support splittable DoFn: %s", ApexRunner.class.getSimpleName(), doFn));
    }
    if (signature.stateDeclarations().size() > 0) {
        throw new UnsupportedOperationException(String.format("Found %s annotations on %s, but %s cannot yet be used with state in the %s.", DoFn.StateId.class.getSimpleName(), doFn.getClass().getName(), DoFn.class.getSimpleName(), ApexRunner.class.getSimpleName()));
    }
    if (signature.timerDeclarations().size() > 0) {
        throw new UnsupportedOperationException(String.format("Found %s annotations on %s, but %s cannot yet be used with timers in the %s.", DoFn.TimerId.class.getSimpleName(), doFn.getClass().getName(), DoFn.class.getSimpleName(), ApexRunner.class.getSimpleName()));
    }
    Map<TupleTag<?>, PValue> outputs = context.getOutputs();
    PCollection<InputT> input = context.getInput();
    List<PCollectionView<?>> sideInputs = transform.getSideInputs();
    Coder<InputT> inputCoder = input.getCoder();
    WindowedValueCoder<InputT> wvInputCoder = FullWindowedValueCoder.of(inputCoder, input.getWindowingStrategy().getWindowFn().windowCoder());
    ApexParDoOperator<InputT, OutputT> operator = new ApexParDoOperator<>(context.getPipelineOptions(), doFn, transform.getMainOutputTag(), transform.getAdditionalOutputTags().getAll(), input.getWindowingStrategy(), sideInputs, wvInputCoder, context.getStateBackend());
    Map<PCollection<?>, OutputPort<?>> ports = Maps.newHashMapWithExpectedSize(outputs.size());
    for (Entry<TupleTag<?>, PValue> output : outputs.entrySet()) {
        checkArgument(output.getValue() instanceof PCollection, "%s %s outputs non-PCollection %s of type %s", ParDo.MultiOutput.class.getSimpleName(), context.getFullName(), output.getValue(), output.getValue().getClass().getSimpleName());
        PCollection<?> pc = (PCollection<?>) output.getValue();
        if (output.getKey().equals(transform.getMainOutputTag())) {
            ports.put(pc, operator.output);
        } else {
            int portIndex = 0;
            for (TupleTag<?> tag : transform.getAdditionalOutputTags().getAll()) {
                if (tag.equals(output.getKey())) {
                    ports.put(pc, operator.additionalOutputPorts[portIndex]);
                    break;
                }
                portIndex++;
            }
        }
    }
    context.addOperator(operator, ports);
    context.addStream(context.getInput(), operator.input);
    if (!sideInputs.isEmpty()) {
        addSideInputs(operator.sideInput1, sideInputs, context);
    }
}
Also used : OutputPort(com.datatorrent.api.Operator.OutputPort) TupleTag(org.apache.beam.sdk.values.TupleTag) ApexParDoOperator(org.apache.beam.runners.apex.translation.operators.ApexParDoOperator) PValue(org.apache.beam.sdk.values.PValue) PCollection(org.apache.beam.sdk.values.PCollection) PCollectionView(org.apache.beam.sdk.values.PCollectionView) DoFn(org.apache.beam.sdk.transforms.DoFn) DoFnSignature(org.apache.beam.sdk.transforms.reflect.DoFnSignature)

Example 10 with PCollectionView

use of org.apache.beam.sdk.values.PCollectionView in project beam by apache.

the class EvaluationContextTest method writeToViewWriterThenReadReads.

@Test
public void writeToViewWriterThenReadReads() {
    PCollectionViewWriter<Integer, Iterable<Integer>> viewWriter = context.createPCollectionViewWriter(PCollection.<Iterable<Integer>>createPrimitiveOutputInternal(p, WindowingStrategy.globalDefault(), IsBounded.BOUNDED), view);
    BoundedWindow window = new TestBoundedWindow(new Instant(1024L));
    BoundedWindow second = new TestBoundedWindow(new Instant(899999L));
    WindowedValue<Integer> firstValue = WindowedValue.of(1, new Instant(1222), window, PaneInfo.ON_TIME_AND_ONLY_FIRING);
    WindowedValue<Integer> secondValue = WindowedValue.of(2, new Instant(8766L), second, PaneInfo.createPane(true, false, Timing.ON_TIME, 0, 0));
    Iterable<WindowedValue<Integer>> values = ImmutableList.of(firstValue, secondValue);
    viewWriter.add(values);
    SideInputReader reader = context.createSideInputReader(ImmutableList.<PCollectionView<?>>of(view));
    assertThat(reader.get(view, window), containsInAnyOrder(1));
    assertThat(reader.get(view, second), containsInAnyOrder(2));
    WindowedValue<Integer> overrittenSecondValue = WindowedValue.of(4444, new Instant(8677L), second, PaneInfo.createPane(false, true, Timing.LATE, 1, 1));
    viewWriter.add(Collections.singleton(overrittenSecondValue));
    assertThat(reader.get(view, second), containsInAnyOrder(2));
    // The cached value is served in the earlier reader
    reader = context.createSideInputReader(ImmutableList.<PCollectionView<?>>of(view));
    assertThat(reader.get(view, second), containsInAnyOrder(4444));
}
Also used : PCollectionView(org.apache.beam.sdk.values.PCollectionView) Matchers.emptyIterable(org.hamcrest.Matchers.emptyIterable) WindowedValue(org.apache.beam.sdk.util.WindowedValue) Instant(org.joda.time.Instant) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) SideInputReader(org.apache.beam.runners.core.SideInputReader) Test(org.junit.Test)

Aggregations

PCollectionView (org.apache.beam.sdk.values.PCollectionView)20 Test (org.junit.Test)12 Instant (org.joda.time.Instant)10 TupleTag (org.apache.beam.sdk.values.TupleTag)9 KV (org.apache.beam.sdk.values.KV)8 DoFn (org.apache.beam.sdk.transforms.DoFn)7 BoundedWindow (org.apache.beam.sdk.transforms.windowing.BoundedWindow)6 IntervalWindow (org.apache.beam.sdk.transforms.windowing.IntervalWindow)6 WindowedValue (org.apache.beam.sdk.util.WindowedValue)5 PCollection (org.apache.beam.sdk.values.PCollection)5 Pipeline (org.apache.beam.sdk.Pipeline)4 PCollectionTuple (org.apache.beam.sdk.values.PCollectionTuple)4 Duration (org.joda.time.Duration)4 HashMap (java.util.HashMap)3 Map (java.util.Map)3 List (java.util.List)2 TimeZone (java.util.TimeZone)2 FlinkPipelineOptions (org.apache.beam.runners.flink.FlinkPipelineOptions)2 DoFnOperator (org.apache.beam.runners.flink.translation.wrappers.streaming.DoFnOperator)2 PipelineResult (org.apache.beam.sdk.PipelineResult)2