use of com.google.api.services.dataflow.model.SideInputInfo in project beam by apache.
the class LengthPrefixUnknownCodersTest method createSideInputInfosWithCoders.
private static SideInputInfo createSideInputInfosWithCoders(Coder<?>... coders) {
SideInputInfo sideInputInfo = new SideInputInfo().setSources(new ArrayList<>());
sideInputInfo.setFactory(new JacksonFactory());
for (Coder<?> coder : coders) {
Source source = new Source().setCodec(CloudObjects.asCloudObject(coder, /*sdkComponents=*/
null));
source.setFactory(new JacksonFactory());
sideInputInfo.getSources().add(source);
}
return sideInputInfo;
}
use of com.google.api.services.dataflow.model.SideInputInfo in project beam by apache.
the class LazilyInitializedSideInputReaderTest method testLazyInitialization.
@Test
public void testLazyInitialization() {
final AtomicInteger wasCalled = new AtomicInteger();
SideInputReader lazilyInitializedSideInputReader = new LazilyInitializedSideInputReader(ImmutableList.of(new SideInputInfo().setTag(TEST_TAG)), () -> {
wasCalled.incrementAndGet();
return mockSideInputReader;
});
// Ensure that after construction we have not been initialized yet.
assertEquals(0, wasCalled.get());
// Ensure that after checking some basic tag information we have not been initialized yet.
assertFalse(lazilyInitializedSideInputReader.isEmpty());
assertEquals(0, wasCalled.get());
when(mockPCollectionView.getTagInternal()).thenReturn(new TupleTag(TEST_TAG));
assertTrue(lazilyInitializedSideInputReader.contains(mockPCollectionView));
assertEquals(0, wasCalled.get());
// Ensure that we were constructed only once, and provided the expected parameters and returned
// the expected result.
when(mockSideInputReader.get(any(PCollectionView.class), any(BoundedWindow.class))).thenReturn(42).thenReturn(43);
assertEquals(42, lazilyInitializedSideInputReader.get(mockPCollectionView, GlobalWindow.INSTANCE));
assertEquals(1, wasCalled.get());
assertEquals(43, lazilyInitializedSideInputReader.get(mockPCollectionView, GlobalWindow.INSTANCE));
assertEquals(1, wasCalled.get());
}
use of com.google.api.services.dataflow.model.SideInputInfo in project beam by apache.
the class IsmSideInputReaderTest method toSideInputInfo.
private SideInputInfo toSideInputInfo(String tagId, Source... sources) {
SideInputInfo sideInputInfo = new SideInputInfo();
sideInputInfo.setTag(tagId);
sideInputInfo.setKind(new HashMap<String, Object>());
if (sources.length == 1) {
sideInputInfo.getKind().put(PropertyNames.OBJECT_TYPE_NAME, "singleton");
} else {
sideInputInfo.getKind().put(PropertyNames.OBJECT_TYPE_NAME, "collection");
}
sideInputInfo.setSources(new ArrayList<>(Arrays.asList(sources)));
return sideInputInfo;
}
use of com.google.api.services.dataflow.model.SideInputInfo in project beam by apache.
the class LengthPrefixUnknownCoders method forSideInputInfos.
/**
* Wraps unknown coders on every {@link SideInputInfo} with length prefixes and also replaces the
* wrapped coder with a byte array coder if requested.
*/
public static List<SideInputInfo> forSideInputInfos(List<SideInputInfo> sideInputInfos, boolean replaceWithByteArrayCoder) {
ImmutableList.Builder<SideInputInfo> updatedSideInputInfos = ImmutableList.builder();
for (SideInputInfo sideInputInfo : sideInputInfos) {
try {
SideInputInfo updatedSideInputInfo = clone(sideInputInfo, SideInputInfo.class);
for (Source source : updatedSideInputInfo.getSources()) {
source.setCodec(forCodec(source.getCodec(), replaceWithByteArrayCoder));
}
updatedSideInputInfos.add(updatedSideInputInfo);
} catch (IOException e) {
throw new RuntimeException(String.format("Failed to replace unknown coder with " + "LengthPrefixCoder for : {%s}", sideInputInfo), e);
}
}
return updatedSideInputInfos.build();
}
use of com.google.api.services.dataflow.model.SideInputInfo in project beam by apache.
the class GroupAlsoByWindowParDoFnFactory method create.
@Override
public ParDoFn create(PipelineOptions options, CloudObject cloudUserFn, @Nullable List<SideInputInfo> sideInputInfos, TupleTag<?> mainOutputTag, Map<TupleTag<?>, Integer> outputTupleTagsToReceiverIndices, final DataflowExecutionContext<?> executionContext, DataflowOperationContext operationContext) throws Exception {
Map.Entry<TupleTag<?>, Integer> entry = Iterables.getOnlyElement(outputTupleTagsToReceiverIndices.entrySet());
checkArgument(entry.getKey().equals(mainOutputTag), "Output tags should reference only the main output tag: %s vs %s", entry.getKey(), mainOutputTag);
checkArgument(entry.getValue() == 0, "There should be a single receiver, but using receiver index %s", entry.getValue());
byte[] encodedWindowingStrategy = getBytes(cloudUserFn, PropertyNames.SERIALIZED_FN);
WindowingStrategy windowingStrategy;
try {
windowingStrategy = deserializeWindowingStrategy(encodedWindowingStrategy);
} catch (Exception e) {
// TODO: Catch block disappears, becoming an error once Python SDK is compliant.
if (DataflowRunner.hasExperiment(options.as(DataflowPipelineDebugOptions.class), "beam_fn_api")) {
LOG.info("FnAPI: Unable to deserialize windowing strategy, assuming default", e);
windowingStrategy = WindowingStrategy.globalDefault();
} else {
throw e;
}
}
byte[] serializedCombineFn = getBytes(cloudUserFn, WorkerPropertyNames.COMBINE_FN, null);
AppliedCombineFn<?, ?, ?, ?> combineFn = null;
if (serializedCombineFn != null) {
Object combineFnObj = SerializableUtils.deserializeFromByteArray(serializedCombineFn, "serialized combine fn");
checkArgument(combineFnObj instanceof AppliedCombineFn, "unexpected kind of AppliedCombineFn: " + combineFnObj.getClass().getName());
combineFn = (AppliedCombineFn<?, ?, ?, ?>) combineFnObj;
}
Map<String, Object> inputCoderObject = getObject(cloudUserFn, WorkerPropertyNames.INPUT_CODER);
Coder<?> inputCoder = CloudObjects.coderFromCloudObject(CloudObject.fromSpec(inputCoderObject));
checkArgument(inputCoder instanceof WindowedValueCoder, "Expected WindowedValueCoder for inputCoder, got: " + inputCoder.getClass().getName());
@SuppressWarnings("unchecked") WindowedValueCoder<?> windowedValueCoder = (WindowedValueCoder<?>) inputCoder;
Coder<?> elemCoder = windowedValueCoder.getValueCoder();
checkArgument(elemCoder instanceof KvCoder, "Expected KvCoder for inputCoder, got: " + elemCoder.getClass().getName());
@SuppressWarnings("unchecked") KvCoder<?, ?> kvCoder = (KvCoder<?, ?>) elemCoder;
boolean isStreamingPipeline = options.as(StreamingOptions.class).isStreaming();
SideInputReader sideInputReader = NullSideInputReader.empty();
@Nullable AppliedCombineFn<?, ?, ?, ?> maybeMergingCombineFn = null;
if (combineFn != null) {
sideInputReader = executionContext.getSideInputReader(sideInputInfos, combineFn.getSideInputViews(), operationContext);
String phase = getString(cloudUserFn, WorkerPropertyNames.PHASE, CombinePhase.ALL);
checkArgument(phase.equals(CombinePhase.ALL) || phase.equals(CombinePhase.MERGE), "Unexpected phase: %s", phase);
if (phase.equals(CombinePhase.MERGE)) {
maybeMergingCombineFn = makeAppliedMergingFunction(combineFn);
} else {
maybeMergingCombineFn = combineFn;
}
}
StateInternalsFactory<?> stateInternalsFactory = key -> executionContext.getStepContext(operationContext).stateInternals();
// This will be a GABW Fn for either batch or streaming, with combiner in it or not
GroupAlsoByWindowFn<?, ?> fn;
// This will be a FakeKeyedWorkItemCoder for streaming or null for batch
Coder<?> gabwInputCoder;
// TODO: do not do this with mess of "if"
if (isStreamingPipeline) {
if (maybeMergingCombineFn == null) {
fn = StreamingGroupAlsoByWindowsDoFns.createForIterable(windowingStrategy, stateInternalsFactory, ((KvCoder) kvCoder).getValueCoder());
gabwInputCoder = WindmillKeyedWorkItem.FakeKeyedWorkItemCoder.of(kvCoder);
} else {
fn = StreamingGroupAlsoByWindowsDoFns.create(windowingStrategy, stateInternalsFactory, (AppliedCombineFn) maybeMergingCombineFn, ((KvCoder) kvCoder).getKeyCoder());
gabwInputCoder = WindmillKeyedWorkItem.FakeKeyedWorkItemCoder.of(((AppliedCombineFn) maybeMergingCombineFn).getKvCoder());
}
} else {
if (maybeMergingCombineFn == null) {
fn = BatchGroupAlsoByWindowsDoFns.createForIterable(windowingStrategy, stateInternalsFactory, ((KvCoder) kvCoder).getValueCoder());
gabwInputCoder = null;
} else {
fn = BatchGroupAlsoByWindowsDoFns.create(windowingStrategy, (AppliedCombineFn) maybeMergingCombineFn);
gabwInputCoder = null;
}
}
// TODO: or anyhow related to it, do not do this with mess of "if"
if (maybeMergingCombineFn != null) {
return new GroupAlsoByWindowsParDoFn(options, fn, windowingStrategy, ((AppliedCombineFn) maybeMergingCombineFn).getSideInputViews(), gabwInputCoder, sideInputReader, mainOutputTag, executionContext.getStepContext(operationContext));
} else {
return new GroupAlsoByWindowsParDoFn(options, fn, windowingStrategy, null, gabwInputCoder, sideInputReader, mainOutputTag, executionContext.getStepContext(operationContext));
}
}
Aggregations