use of org.apache.beam.sdk.util.WindowedValue in project beam by apache.
the class TransformTranslator method flattenPColl.
private static <T> TransformEvaluator<Flatten.PCollections<T>> flattenPColl() {
return new TransformEvaluator<Flatten.PCollections<T>>() {
@SuppressWarnings("unchecked")
@Override
public void evaluate(Flatten.PCollections<T> transform, EvaluationContext context) {
Collection<PValue> pcs = context.getInputs(transform).values();
JavaRDD<WindowedValue<T>> unionRDD;
if (pcs.size() == 0) {
unionRDD = context.getSparkContext().emptyRDD();
} else {
JavaRDD<WindowedValue<T>>[] rdds = new JavaRDD[pcs.size()];
int index = 0;
for (PValue pc : pcs) {
checkArgument(pc instanceof PCollection, "Flatten had non-PCollection value in input: %s of type %s", pc, pc.getClass().getSimpleName());
rdds[index] = ((BoundedDataset<T>) context.borrowDataset(pc)).getRDD();
index++;
}
unionRDD = context.getSparkContext().union(rdds);
}
context.putDataset(transform, new BoundedDataset<>(unionRDD));
}
@Override
public String toNativeString() {
return "sparkContext.union(...)";
}
};
}
use of org.apache.beam.sdk.util.WindowedValue in project beam by apache.
the class ProcessBundleHandlerTest method testCreatingAndProcessingBeamFnDataReadRunner.
@Test
public void testCreatingAndProcessingBeamFnDataReadRunner() throws Exception {
Map<String, Message> fnApiRegistry = ImmutableMap.of(STRING_CODER_SPEC_ID, STRING_CODER_SPEC);
String bundleId = "57L";
String primitiveTransformId = "100L";
long outputId = 101L;
List<WindowedValue<String>> outputValues = new ArrayList<>();
BeamFnApi.Target outputTarget = BeamFnApi.Target.newBuilder().setPrimitiveTransformReference(primitiveTransformId).setName(Long.toString(outputId)).build();
Multimap<BeamFnApi.Target, ThrowingConsumer<WindowedValue<String>>> existingConsumers = ImmutableMultimap.of(outputTarget, outputValues::add);
Multimap<BeamFnApi.Target, ThrowingConsumer<WindowedValue<String>>> newConsumers = HashMultimap.create();
List<ThrowingRunnable> startFunctions = new ArrayList<>();
List<ThrowingRunnable> finishFunctions = new ArrayList<>();
BeamFnApi.FunctionSpec functionSpec = BeamFnApi.FunctionSpec.newBuilder().setId("1L").setUrn(DATA_INPUT_URN).setData(Any.pack(REMOTE_PORT)).build();
BeamFnApi.PrimitiveTransform primitiveTransform = BeamFnApi.PrimitiveTransform.newBuilder().setId(primitiveTransformId).setFunctionSpec(functionSpec).putInputs("input", BeamFnApi.Target.List.getDefaultInstance()).putOutputs(Long.toString(outputId), BeamFnApi.PCollection.newBuilder().setCoderReference(STRING_CODER_SPEC_ID).build()).build();
ProcessBundleHandler handler = new ProcessBundleHandler(PipelineOptionsFactory.create(), fnApiRegistry::get, beamFnDataClient);
handler.createConsumersForPrimitiveTransform(primitiveTransform, Suppliers.ofInstance(bundleId)::get, existingConsumers::get, newConsumers::put, startFunctions::add, finishFunctions::add);
verifyZeroInteractions(beamFnDataClient);
CompletableFuture<Void> completionFuture = new CompletableFuture<>();
when(beamFnDataClient.forInboundConsumer(any(), any(), any(), any())).thenReturn(completionFuture);
Iterables.getOnlyElement(startFunctions).run();
verify(beamFnDataClient).forInboundConsumer(eq(REMOTE_PORT.getApiServiceDescriptor()), eq(KV.of(bundleId, BeamFnApi.Target.newBuilder().setPrimitiveTransformReference(primitiveTransformId).setName("input").build())), eq(STRING_CODER), consumerCaptor.capture());
consumerCaptor.getValue().accept(valueInGlobalWindow("TestValue"));
assertThat(outputValues, contains(valueInGlobalWindow("TestValue")));
outputValues.clear();
assertThat(newConsumers.keySet(), empty());
completionFuture.complete(null);
Iterables.getOnlyElement(finishFunctions).run();
verifyNoMoreInteractions(beamFnDataClient);
}
use of org.apache.beam.sdk.util.WindowedValue in project beam by apache.
the class ProcessBundleHandlerTest method testCreatingAndProcessingSource.
@Test
public void testCreatingAndProcessingSource() throws Exception {
Map<String, Message> fnApiRegistry = ImmutableMap.of(LONG_CODER_SPEC_ID, LONG_CODER_SPEC);
String primitiveTransformId = "100L";
long outputId = 101L;
BeamFnApi.Target inputTarget = BeamFnApi.Target.newBuilder().setPrimitiveTransformReference("1000L").setName("inputTarget").build();
List<WindowedValue<String>> outputValues = new ArrayList<>();
BeamFnApi.Target outputTarget = BeamFnApi.Target.newBuilder().setPrimitiveTransformReference(primitiveTransformId).setName(Long.toString(outputId)).build();
Multimap<BeamFnApi.Target, ThrowingConsumer<WindowedValue<String>>> existingConsumers = ImmutableMultimap.of(outputTarget, outputValues::add);
Multimap<BeamFnApi.Target, ThrowingConsumer<WindowedValue<BoundedSource<Long>>>> newConsumers = HashMultimap.create();
List<ThrowingRunnable> startFunctions = new ArrayList<>();
List<ThrowingRunnable> finishFunctions = new ArrayList<>();
BeamFnApi.FunctionSpec functionSpec = BeamFnApi.FunctionSpec.newBuilder().setId("1L").setUrn(JAVA_SOURCE_URN).setData(Any.pack(BytesValue.newBuilder().setValue(ByteString.copyFrom(SerializableUtils.serializeToByteArray(CountingSource.upTo(3)))).build())).build();
BeamFnApi.PrimitiveTransform primitiveTransform = BeamFnApi.PrimitiveTransform.newBuilder().setId(primitiveTransformId).setFunctionSpec(functionSpec).putInputs("input", BeamFnApi.Target.List.newBuilder().addTarget(inputTarget).build()).putOutputs(Long.toString(outputId), BeamFnApi.PCollection.newBuilder().setCoderReference(LONG_CODER_SPEC_ID).build()).build();
ProcessBundleHandler handler = new ProcessBundleHandler(PipelineOptionsFactory.create(), fnApiRegistry::get, beamFnDataClient);
handler.createConsumersForPrimitiveTransform(primitiveTransform, Suppliers.ofInstance("57L")::get, existingConsumers::get, newConsumers::put, startFunctions::add, finishFunctions::add);
// This is testing a deprecated way of running sources and should be removed
// once all source definitions are instead propagated along the input edge.
Iterables.getOnlyElement(startFunctions).run();
assertThat(outputValues, contains(valueInGlobalWindow(0L), valueInGlobalWindow(1L), valueInGlobalWindow(2L)));
outputValues.clear();
// Check that when passing a source along as an input, the source is processed.
assertEquals(newConsumers.keySet(), ImmutableSet.of(inputTarget));
Iterables.getOnlyElement(newConsumers.get(inputTarget)).accept(valueInGlobalWindow(CountingSource.upTo(2)));
assertThat(outputValues, contains(valueInGlobalWindow(0L), valueInGlobalWindow(1L)));
assertThat(finishFunctions, empty());
}
use of org.apache.beam.sdk.util.WindowedValue in project beam by apache.
the class ProcessBundleHandlerTest method testCreatingAndProcessingBeamFnDataWriteRunner.
@Test
public void testCreatingAndProcessingBeamFnDataWriteRunner() throws Exception {
Map<String, Message> fnApiRegistry = ImmutableMap.of(STRING_CODER_SPEC_ID, STRING_CODER_SPEC);
String bundleId = "57L";
String primitiveTransformId = "100L";
long outputId = 101L;
BeamFnApi.Target inputTarget = BeamFnApi.Target.newBuilder().setPrimitiveTransformReference("1000L").setName("inputTarget").build();
Multimap<BeamFnApi.Target, ThrowingConsumer<WindowedValue<String>>> existingConsumers = ImmutableMultimap.of();
Multimap<BeamFnApi.Target, ThrowingConsumer<WindowedValue<String>>> newConsumers = HashMultimap.create();
List<ThrowingRunnable> startFunctions = new ArrayList<>();
List<ThrowingRunnable> finishFunctions = new ArrayList<>();
BeamFnApi.FunctionSpec functionSpec = BeamFnApi.FunctionSpec.newBuilder().setId("1L").setUrn(DATA_OUTPUT_URN).setData(Any.pack(REMOTE_PORT)).build();
BeamFnApi.PrimitiveTransform primitiveTransform = BeamFnApi.PrimitiveTransform.newBuilder().setId(primitiveTransformId).setFunctionSpec(functionSpec).putInputs("input", BeamFnApi.Target.List.newBuilder().addTarget(inputTarget).build()).putOutputs(Long.toString(outputId), BeamFnApi.PCollection.newBuilder().setCoderReference(STRING_CODER_SPEC_ID).build()).build();
ProcessBundleHandler handler = new ProcessBundleHandler(PipelineOptionsFactory.create(), fnApiRegistry::get, beamFnDataClient);
handler.createConsumersForPrimitiveTransform(primitiveTransform, Suppliers.ofInstance(bundleId)::get, existingConsumers::get, newConsumers::put, startFunctions::add, finishFunctions::add);
verifyZeroInteractions(beamFnDataClient);
List<WindowedValue<String>> outputValues = new ArrayList<>();
AtomicBoolean wasCloseCalled = new AtomicBoolean();
CloseableThrowingConsumer<WindowedValue<String>> outputConsumer = new CloseableThrowingConsumer<WindowedValue<String>>() {
@Override
public void close() throws Exception {
wasCloseCalled.set(true);
}
@Override
public void accept(WindowedValue<String> t) throws Exception {
outputValues.add(t);
}
};
when(beamFnDataClient.forOutboundConsumer(any(), any(), Matchers.<Coder<WindowedValue<String>>>any())).thenReturn(outputConsumer);
Iterables.getOnlyElement(startFunctions).run();
verify(beamFnDataClient).forOutboundConsumer(eq(REMOTE_PORT.getApiServiceDescriptor()), eq(KV.of(bundleId, BeamFnApi.Target.newBuilder().setPrimitiveTransformReference(primitiveTransformId).setName(Long.toString(outputId)).build())), eq(STRING_CODER));
assertEquals(newConsumers.keySet(), ImmutableSet.of(inputTarget));
Iterables.getOnlyElement(newConsumers.get(inputTarget)).accept(valueInGlobalWindow("TestValue"));
assertThat(outputValues, contains(valueInGlobalWindow("TestValue")));
outputValues.clear();
assertFalse(wasCloseCalled.get());
Iterables.getOnlyElement(finishFunctions).run();
assertTrue(wasCloseCalled.get());
verifyNoMoreInteractions(beamFnDataClient);
}
use of org.apache.beam.sdk.util.WindowedValue in project beam by apache.
the class BatchViewOverridesTest method testToIsmRecordForMapLikeDoFn.
@Test
public void testToIsmRecordForMapLikeDoFn() throws Exception {
TupleTag<KV<Integer, KV<IntervalWindow, Long>>> outputForSizeTag = new TupleTag<>();
TupleTag<KV<Integer, KV<IntervalWindow, Long>>> outputForEntrySetTag = new TupleTag<>();
Coder<Long> keyCoder = VarLongCoder.of();
Coder<IntervalWindow> windowCoder = IntervalWindow.getCoder();
IsmRecordCoder<WindowedValue<Long>> ismCoder = IsmRecordCoder.of(1, 2, ImmutableList.<Coder<?>>of(MetadataKeyCoder.of(keyCoder), IntervalWindow.getCoder(), BigEndianLongCoder.of()), FullWindowedValueCoder.of(VarLongCoder.of(), windowCoder));
DoFnTester<KV<Integer, Iterable<KV<KV<Long, IntervalWindow>, WindowedValue<Long>>>>, IsmRecord<WindowedValue<Long>>> doFnTester = DoFnTester.of(new BatchViewOverrides.BatchViewAsMultimap.ToIsmRecordForMapLikeDoFn<>(outputForSizeTag, outputForEntrySetTag, windowCoder, keyCoder, ismCoder, false));
IntervalWindow windowA = new IntervalWindow(new Instant(0), new Instant(10));
IntervalWindow windowB = new IntervalWindow(new Instant(10), new Instant(20));
IntervalWindow windowC = new IntervalWindow(new Instant(20), new Instant(30));
Iterable<KV<Integer, Iterable<KV<KV<Long, IntervalWindow>, WindowedValue<Long>>>>> inputElements = ImmutableList.of(KV.of(1, (Iterable<KV<KV<Long, IntervalWindow>, WindowedValue<Long>>>) ImmutableList.of(KV.of(KV.of(1L, windowA), WindowedValue.of(110L, new Instant(1), windowA, PaneInfo.NO_FIRING)), // same window same key as to previous
KV.of(KV.of(1L, windowA), WindowedValue.of(111L, new Instant(2), windowA, PaneInfo.NO_FIRING)), // same window different key as to previous
KV.of(KV.of(2L, windowA), WindowedValue.of(120L, new Instant(3), windowA, PaneInfo.NO_FIRING)), // different window same key as to previous
KV.of(KV.of(2L, windowB), WindowedValue.of(210L, new Instant(11), windowB, PaneInfo.NO_FIRING)), // different window and different key as to previous
KV.of(KV.of(3L, windowB), WindowedValue.of(220L, new Instant(12), windowB, PaneInfo.NO_FIRING)))), KV.of(2, (Iterable<KV<KV<Long, IntervalWindow>, WindowedValue<Long>>>) ImmutableList.of(// different shard
KV.of(KV.of(4L, windowC), WindowedValue.of(330L, new Instant(21), windowC, PaneInfo.NO_FIRING)))));
// The order of the output elements is important relative to processing order
assertThat(doFnTester.processBundle(inputElements), contains(IsmRecord.of(ImmutableList.of(1L, windowA, 0L), WindowedValue.of(110L, new Instant(1), windowA, PaneInfo.NO_FIRING)), IsmRecord.of(ImmutableList.of(1L, windowA, 1L), WindowedValue.of(111L, new Instant(2), windowA, PaneInfo.NO_FIRING)), IsmRecord.of(ImmutableList.of(2L, windowA, 0L), WindowedValue.of(120L, new Instant(3), windowA, PaneInfo.NO_FIRING)), IsmRecord.of(ImmutableList.of(2L, windowB, 0L), WindowedValue.of(210L, new Instant(11), windowB, PaneInfo.NO_FIRING)), IsmRecord.of(ImmutableList.of(3L, windowB, 0L), WindowedValue.of(220L, new Instant(12), windowB, PaneInfo.NO_FIRING)), IsmRecord.of(ImmutableList.of(4L, windowC, 0L), WindowedValue.of(330L, new Instant(21), windowC, PaneInfo.NO_FIRING))));
// Verify the number of unique keys per window.
assertThat(doFnTester.takeOutputElements(outputForSizeTag), contains(KV.of(ismCoder.hash(ImmutableList.of(IsmFormat.getMetadataKey(), windowA)), KV.of(windowA, 2L)), KV.of(ismCoder.hash(ImmutableList.of(IsmFormat.getMetadataKey(), windowB)), KV.of(windowB, 2L)), KV.of(ismCoder.hash(ImmutableList.of(IsmFormat.getMetadataKey(), windowC)), KV.of(windowC, 1L))));
// Verify the output for the unique keys.
assertThat(doFnTester.takeOutputElements(outputForEntrySetTag), contains(KV.of(ismCoder.hash(ImmutableList.of(IsmFormat.getMetadataKey(), windowA)), KV.of(windowA, 1L)), KV.of(ismCoder.hash(ImmutableList.of(IsmFormat.getMetadataKey(), windowA)), KV.of(windowA, 2L)), KV.of(ismCoder.hash(ImmutableList.of(IsmFormat.getMetadataKey(), windowB)), KV.of(windowB, 2L)), KV.of(ismCoder.hash(ImmutableList.of(IsmFormat.getMetadataKey(), windowB)), KV.of(windowB, 3L)), KV.of(ismCoder.hash(ImmutableList.of(IsmFormat.getMetadataKey(), windowC)), KV.of(windowC, 4L))));
}
Aggregations