use of org.apache.beam.sdk.transforms.DoFn in project beam by apache.
the class RemoteExecutionTest method testExecutionWithMultipleStages.
@Test
public void testExecutionWithMultipleStages() throws Exception {
launchSdkHarness(PipelineOptionsFactory.create());
Pipeline p = Pipeline.create();
Function<String, PCollection<String>> pCollectionGenerator = suffix -> p.apply("impulse" + suffix, Impulse.create()).apply("create" + suffix, ParDo.of(new DoFn<byte[], String>() {
@ProcessElement
public void process(ProcessContext c) {
try {
c.output(CoderUtils.decodeFromByteArray(StringUtf8Coder.of(), c.element()));
} catch (CoderException e) {
throw new RuntimeException(e);
}
}
})).setCoder(StringUtf8Coder.of()).apply(ParDo.of(new DoFn<String, String>() {
@ProcessElement
public void processElement(ProcessContext c) {
c.output("stream" + suffix + c.element());
}
}));
PCollection<String> input1 = pCollectionGenerator.apply("1");
PCollection<String> input2 = pCollectionGenerator.apply("2");
PCollection<String> outputMerged = PCollectionList.of(input1).and(input2).apply(Flatten.pCollections());
outputMerged.apply("createKV", ParDo.of(new DoFn<String, KV<String, String>>() {
@ProcessElement
public void process(ProcessContext c) {
c.output(KV.of(c.element(), ""));
}
})).setCoder(KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of())).apply("gbk", GroupByKey.create());
RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(p);
FusedPipeline fused = GreedyPipelineFuser.fuse(pipelineProto);
Set<ExecutableStage> stages = fused.getFusedStages();
assertThat(stages.size(), equalTo(2));
List<WindowedValue<?>> outputValues = Collections.synchronizedList(new ArrayList<>());
for (ExecutableStage stage : stages) {
ExecutableProcessBundleDescriptor descriptor = ProcessBundleDescriptors.fromExecutableStage(stage.toString(), stage, dataServer.getApiServiceDescriptor(), stateServer.getApiServiceDescriptor());
BundleProcessor processor = controlClient.getProcessor(descriptor.getProcessBundleDescriptor(), descriptor.getRemoteInputDestinations(), stateDelegator);
Map<String, Coder> remoteOutputCoders = descriptor.getRemoteOutputCoders();
Map<String, RemoteOutputReceiver<?>> outputReceivers = new HashMap<>();
for (Entry<String, Coder> remoteOutputCoder : remoteOutputCoders.entrySet()) {
outputReceivers.putIfAbsent(remoteOutputCoder.getKey(), RemoteOutputReceiver.of((Coder<WindowedValue<?>>) remoteOutputCoder.getValue(), outputValues::add));
}
try (RemoteBundle bundle = processor.newBundle(outputReceivers, StateRequestHandler.unsupported(), BundleProgressHandler.ignored())) {
Iterables.getOnlyElement(bundle.getInputReceivers().values()).accept(valueInGlobalWindow(CoderUtils.encodeToByteArray(StringUtf8Coder.of(), "X")));
}
}
assertThat(outputValues, containsInAnyOrder(valueInGlobalWindow(KV.of("stream1X", "")), valueInGlobalWindow(KV.of("stream2X", ""))));
}
use of org.apache.beam.sdk.transforms.DoFn in project beam by apache.
the class ConfigGeneratorTest method testDuplicateStateIdConfig.
@Test
public void testDuplicateStateIdConfig() {
SamzaPipelineOptions options = PipelineOptionsFactory.create().as(SamzaPipelineOptions.class);
options.setJobName("TestStoreConfig");
options.setRunner(SamzaRunner.class);
Pipeline pipeline = Pipeline.create(options);
pipeline.apply(Create.empty(TypeDescriptors.kvs(TypeDescriptors.strings(), TypeDescriptors.strings()))).apply(ParDo.of(new DoFn<KV<String, String>, KV<String, String>>() {
private static final String testState = "testState";
@StateId(testState)
private final StateSpec<ValueState<Integer>> state = StateSpecs.value();
@ProcessElement
public void processElement(ProcessContext context, @StateId(testState) ValueState<Integer> state) {
context.output(context.element());
}
})).apply(ParDo.of(new DoFn<KV<String, String>, Void>() {
private static final String testState = "testState";
@StateId(testState)
private final StateSpec<ValueState<Integer>> state = StateSpecs.value();
@ProcessElement
public void processElement(ProcessContext context, @StateId(testState) ValueState<Integer> state) {
}
}));
final Map<PValue, String> idMap = PViewToIdMapper.buildIdMap(pipeline);
final ConfigBuilder configBuilder = new ConfigBuilder(options);
assertThrows(IllegalStateException.class, () -> SamzaPipelineTranslator.createConfig(pipeline, options, idMap, configBuilder));
}
use of org.apache.beam.sdk.transforms.DoFn in project beam by apache.
the class DoFnSignatures method analyzeSplitRestrictionMethod.
@VisibleForTesting
static DoFnSignature.SplitRestrictionMethod analyzeSplitRestrictionMethod(ErrorReporter errors, TypeDescriptor<? extends DoFn<?, ?>> fnT, Method m, TypeDescriptor<?> inputT, TypeDescriptor<?> outputT, TypeDescriptor<?> restrictionT, FnAnalysisContext fnContext) {
// Method is of the form:
// @SplitRestriction
// void splitRestriction(... parameters ...);
errors.checkArgument(void.class.equals(m.getReturnType()), "Must return void");
Type[] params = m.getGenericParameterTypes();
MethodAnalysisContext methodContext = MethodAnalysisContext.create();
TypeDescriptor<? extends BoundedWindow> windowT = getWindowType(fnT, m);
for (int i = 0; i < params.length; ++i) {
Parameter extraParam = analyzeExtraParameter(errors, fnContext, methodContext, ParameterDescription.of(m, i, fnT.resolveType(params[i]), Arrays.asList(m.getParameterAnnotations()[i])), inputT, restrictionT);
if (extraParam instanceof SchemaElementParameter) {
errors.throwIllegalArgument("Schema @%s are not supported for @%s method. Found %s, did you mean to use %s?", format(DoFn.Element.class), format(DoFn.SplitRestriction.class), format(((SchemaElementParameter) extraParam).elementT()), format(inputT));
} else if (extraParam instanceof RestrictionParameter) {
errors.checkArgument(restrictionT.equals(((RestrictionParameter) extraParam).restrictionT()), "Uses restriction type %s, but @%s method uses restriction type %s", format(((RestrictionParameter) extraParam).restrictionT()), format(DoFn.GetInitialRestriction.class), format(restrictionT));
}
methodContext.addParameter(extraParam);
}
for (Parameter parameter : methodContext.getExtraParameters()) {
checkParameterOneOf(errors, parameter, ALLOWED_SPLIT_RESTRICTION_PARAMETERS);
}
return DoFnSignature.SplitRestrictionMethod.create(m, windowT, methodContext.getExtraParameters());
}
use of org.apache.beam.sdk.transforms.DoFn in project beam by apache.
the class DoFnSignatures method analyzeProcessElementMethod.
@VisibleForTesting
static DoFnSignature.ProcessElementMethod analyzeProcessElementMethod(ErrorReporter errors, TypeDescriptor<? extends DoFn<?, ?>> fnClass, Method m, TypeDescriptor<?> inputT, TypeDescriptor<?> outputT, FnAnalysisContext fnContext) {
errors.checkArgument(void.class.equals(m.getReturnType()) || DoFn.ProcessContinuation.class.equals(m.getReturnType()), "Must return void or %s", format(DoFn.ProcessContinuation.class));
MethodAnalysisContext methodContext = MethodAnalysisContext.create();
boolean requiresStableInput = m.isAnnotationPresent(DoFn.RequiresStableInput.class);
boolean requiresTimeSortedInput = m.isAnnotationPresent(DoFn.RequiresTimeSortedInput.class);
TypeDescriptor<? extends BoundedWindow> windowT = getWindowType(fnClass, m);
Type[] params = m.getGenericParameterTypes();
for (int i = 0; i < params.length; ++i) {
Parameter extraParam = analyzeExtraParameter(errors.forMethod(DoFn.ProcessElement.class, m), fnContext, methodContext, ParameterDescription.of(m, i, fnClass.resolveType(params[i]), Arrays.asList(m.getParameterAnnotations()[i])), inputT, outputT);
methodContext.addParameter(extraParam);
}
int schemaElementIndex = 0;
for (int i = 0; i < methodContext.getExtraParameters().size(); ++i) {
Parameter parameter = methodContext.getExtraParameters().get(i);
if (parameter instanceof SchemaElementParameter) {
SchemaElementParameter schemaParameter = (SchemaElementParameter) parameter;
schemaParameter = schemaParameter.toBuilder().setIndex(schemaElementIndex).build();
methodContext.setParameter(i, schemaParameter);
++schemaElementIndex;
}
}
TypeDescriptor<?> trackerT = methodContext.findParameter(RestrictionTrackerParameter.class).map(p -> p.trackerT()).orElse(null);
TypeDescriptor<?> watermarkEstimatorT = methodContext.findParameter(WatermarkEstimatorParameter.class).map(p -> p.estimatorT()).orElse(null);
// The allowed parameters depend on whether this DoFn is splittable
if (trackerT != null) {
for (Parameter parameter : methodContext.getExtraParameters()) {
checkParameterOneOf(errors, parameter, ALLOWED_SPLITTABLE_PROCESS_ELEMENT_PARAMETERS);
}
} else {
for (Parameter parameter : methodContext.getExtraParameters()) {
checkParameterOneOf(errors, parameter, ALLOWED_NON_SPLITTABLE_PROCESS_ELEMENT_PARAMETERS);
}
}
return DoFnSignature.ProcessElementMethod.create(m, methodContext.getExtraParameters(), requiresStableInput, requiresTimeSortedInput, trackerT, watermarkEstimatorT, windowT, DoFn.ProcessContinuation.class.equals(m.getReturnType()));
}
use of org.apache.beam.sdk.transforms.DoFn in project beam by apache.
the class DoFnSignatures method analyzeOnTimerFamilyMethod.
@VisibleForTesting
static DoFnSignature.OnTimerFamilyMethod analyzeOnTimerFamilyMethod(ErrorReporter errors, TypeDescriptor<? extends DoFn<?, ?>> fnClass, Method m, String timerFamilyId, TypeDescriptor<?> inputT, TypeDescriptor<?> outputT, FnAnalysisContext fnContext) {
errors.checkArgument(void.class.equals(m.getReturnType()), "Must return void");
Type[] params = m.getGenericParameterTypes();
MethodAnalysisContext methodContext = MethodAnalysisContext.create();
boolean requiresStableInput = m.isAnnotationPresent(DoFn.RequiresStableInput.class);
@Nullable TypeDescriptor<? extends BoundedWindow> windowT = getWindowType(fnClass, m);
List<DoFnSignature.Parameter> extraParameters = new ArrayList<>();
ErrorReporter onTimerErrors = errors.forMethod(DoFn.OnTimerFamily.class, m);
for (int i = 0; i < params.length; ++i) {
Parameter parameter = analyzeExtraParameter(onTimerErrors, fnContext, methodContext, ParameterDescription.of(m, i, fnClass.resolveType(params[i]), Arrays.asList(m.getParameterAnnotations()[i])), inputT, outputT);
checkParameterOneOf(errors, parameter, ALLOWED_ON_TIMER_FAMILY_PARAMETERS);
extraParameters.add(parameter);
}
return DoFnSignature.OnTimerFamilyMethod.create(m, timerFamilyId, requiresStableInput, windowT, extraParameters);
}
Aggregations