Search in sources :

Example 1 with DoFnSignature

use of org.apache.beam.sdk.transforms.reflect.DoFnSignature in project beam by apache.

the class ParDoTranslator method translate.

@Override
public void translate(ParDo.MultiOutput<InputT, OutputT> transform, TranslationContext context) {
    DoFn<InputT, OutputT> doFn = transform.getFn();
    DoFnSignature signature = DoFnSignatures.getSignature(doFn.getClass());
    if (signature.processElement().isSplittable()) {
        throw new UnsupportedOperationException(String.format("%s does not support splittable DoFn: %s", ApexRunner.class.getSimpleName(), doFn));
    }
    if (signature.stateDeclarations().size() > 0) {
        throw new UnsupportedOperationException(String.format("Found %s annotations on %s, but %s cannot yet be used with state in the %s.", DoFn.StateId.class.getSimpleName(), doFn.getClass().getName(), DoFn.class.getSimpleName(), ApexRunner.class.getSimpleName()));
    }
    if (signature.timerDeclarations().size() > 0) {
        throw new UnsupportedOperationException(String.format("Found %s annotations on %s, but %s cannot yet be used with timers in the %s.", DoFn.TimerId.class.getSimpleName(), doFn.getClass().getName(), DoFn.class.getSimpleName(), ApexRunner.class.getSimpleName()));
    }
    Map<TupleTag<?>, PValue> outputs = context.getOutputs();
    PCollection<InputT> input = context.getInput();
    List<PCollectionView<?>> sideInputs = transform.getSideInputs();
    Coder<InputT> inputCoder = input.getCoder();
    WindowedValueCoder<InputT> wvInputCoder = FullWindowedValueCoder.of(inputCoder, input.getWindowingStrategy().getWindowFn().windowCoder());
    ApexParDoOperator<InputT, OutputT> operator = new ApexParDoOperator<>(context.getPipelineOptions(), doFn, transform.getMainOutputTag(), transform.getAdditionalOutputTags().getAll(), input.getWindowingStrategy(), sideInputs, wvInputCoder, context.getStateBackend());
    Map<PCollection<?>, OutputPort<?>> ports = Maps.newHashMapWithExpectedSize(outputs.size());
    for (Entry<TupleTag<?>, PValue> output : outputs.entrySet()) {
        checkArgument(output.getValue() instanceof PCollection, "%s %s outputs non-PCollection %s of type %s", ParDo.MultiOutput.class.getSimpleName(), context.getFullName(), output.getValue(), output.getValue().getClass().getSimpleName());
        PCollection<?> pc = (PCollection<?>) output.getValue();
        if (output.getKey().equals(transform.getMainOutputTag())) {
            ports.put(pc, operator.output);
        } else {
            int portIndex = 0;
            for (TupleTag<?> tag : transform.getAdditionalOutputTags().getAll()) {
                if (tag.equals(output.getKey())) {
                    ports.put(pc, operator.additionalOutputPorts[portIndex]);
                    break;
                }
                portIndex++;
            }
        }
    }
    context.addOperator(operator, ports);
    context.addStream(context.getInput(), operator.input);
    if (!sideInputs.isEmpty()) {
        addSideInputs(operator.sideInput1, sideInputs, context);
    }
}
Also used : OutputPort(com.datatorrent.api.Operator.OutputPort) TupleTag(org.apache.beam.sdk.values.TupleTag) ApexParDoOperator(org.apache.beam.runners.apex.translation.operators.ApexParDoOperator) PValue(org.apache.beam.sdk.values.PValue) PCollection(org.apache.beam.sdk.values.PCollection) PCollectionView(org.apache.beam.sdk.values.PCollectionView) DoFn(org.apache.beam.sdk.transforms.DoFn) DoFnSignature(org.apache.beam.sdk.transforms.reflect.DoFnSignature)

Example 2 with DoFnSignature

use of org.apache.beam.sdk.transforms.reflect.DoFnSignature in project beam by apache.

the class ParDo method validateSideInputTypes.

private static void validateSideInputTypes(Map<String, PCollectionView<?>> sideInputs, DoFn<?, ?> fn) {
    DoFnSignature signature = DoFnSignatures.getSignature(fn.getClass());
    DoFnSignature.ProcessElementMethod processElementMethod = signature.processElement();
    for (SideInputParameter sideInput : processElementMethod.getSideInputParameters()) {
        PCollectionView<?> view = sideInputs.get(sideInput.sideInputId());
        checkArgument(view != null, "the ProcessElement method expects a side input identified with the tag %s, but no such side input was" + " supplied. Use withSideInput(String, PCollectionView) to supply this side input.", sideInput.sideInputId());
        TypeDescriptor<?> viewType = view.getViewFn().getTypeDescriptor();
        // Currently check that the types exactly match, even if the types are convertible.
        checkArgument(viewType.equals(sideInput.elementT()), "Side Input with tag %s and type %s cannot be bound to ProcessElement parameter with type %s", sideInput.sideInputId(), viewType, sideInput.elementT());
    }
}
Also used : SideInputParameter(org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.SideInputParameter) DoFnSignature(org.apache.beam.sdk.transforms.reflect.DoFnSignature)

Example 3 with DoFnSignature

use of org.apache.beam.sdk.transforms.reflect.DoFnSignature in project beam by apache.

the class ParDo method getDoFnSchemaInformation.

/**
 * Extract information on how the DoFn uses schemas. In particular, if the schema of an element
 * parameter does not match the input PCollection's schema, convert.
 */
@Internal
public static DoFnSchemaInformation getDoFnSchemaInformation(DoFn<?, ?> fn, PCollection<?> input) {
    DoFnSignature signature = DoFnSignatures.getSignature(fn.getClass());
    DoFnSignature.ProcessElementMethod processElementMethod = signature.processElement();
    if (!processElementMethod.getSchemaElementParameters().isEmpty()) {
        if (!input.hasSchema()) {
            throw new IllegalArgumentException("Type of @Element must match the DoFn type" + input);
        }
    }
    SchemaRegistry schemaRegistry = input.getPipeline().getSchemaRegistry();
    DoFnSchemaInformation doFnSchemaInformation = DoFnSchemaInformation.create();
    for (SchemaElementParameter parameter : processElementMethod.getSchemaElementParameters()) {
        TypeDescriptor<?> elementT = parameter.elementT();
        FieldAccessDescriptor accessDescriptor = getFieldAccessDescriptorFromParameter(parameter.fieldAccessString(), input.getSchema(), signature.fieldAccessDeclarations(), fn);
        doFnSchemaInformation = doFnSchemaInformation.withFieldAccessDescriptor(accessDescriptor);
        Schema selectedSchema = SelectHelpers.getOutputSchema(input.getSchema(), accessDescriptor);
        ConvertHelpers.ConvertedSchemaInformation converted = ConvertHelpers.getConvertedSchemaInformation(selectedSchema, elementT, schemaRegistry);
        if (converted.outputSchemaCoder != null) {
            doFnSchemaInformation = doFnSchemaInformation.withSelectFromSchemaParameter((SchemaCoder<?>) input.getCoder(), accessDescriptor, selectedSchema, converted.outputSchemaCoder, converted.unboxedType != null);
        } else {
            // If the selected schema is a Row containing a single primitive type (which is the output
            // of Select when selecting a primitive), attempt to unbox it and match against the
            // parameter.
            checkArgument(converted.unboxedType != null);
            doFnSchemaInformation = doFnSchemaInformation.withUnboxPrimitiveParameter((SchemaCoder<?>) input.getCoder(), accessDescriptor, selectedSchema, elementT);
        }
    }
    for (DoFnSignature.Parameter p : processElementMethod.extraParameters()) {
        if (p instanceof ProcessContextParameter || p instanceof ElementParameter) {
            doFnSchemaInformation = doFnSchemaInformation.withFieldAccessDescriptor(FieldAccessDescriptor.withAllFields());
            break;
        }
    }
    return doFnSchemaInformation;
}
Also used : FieldAccessDescriptor(org.apache.beam.sdk.schemas.FieldAccessDescriptor) ConvertHelpers(org.apache.beam.sdk.schemas.utils.ConvertHelpers) SchemaCoder(org.apache.beam.sdk.schemas.SchemaCoder) Schema(org.apache.beam.sdk.schemas.Schema) ProcessContextParameter(org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.ProcessContextParameter) ElementParameter(org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.ElementParameter) SchemaElementParameter(org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.SchemaElementParameter) SchemaRegistry(org.apache.beam.sdk.schemas.SchemaRegistry) SchemaElementParameter(org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.SchemaElementParameter) DoFnSignature(org.apache.beam.sdk.transforms.reflect.DoFnSignature) Internal(org.apache.beam.sdk.annotations.Internal)

Example 4 with DoFnSignature

use of org.apache.beam.sdk.transforms.reflect.DoFnSignature in project beam by apache.

the class ParDo method finishSpecifyingStateSpecs.

private static void finishSpecifyingStateSpecs(DoFn<?, ?> fn, CoderRegistry coderRegistry, SchemaRegistry schemaRegistry, Coder<?> inputCoder) {
    DoFnSignature signature = DoFnSignatures.getSignature(fn.getClass());
    Map<String, DoFnSignature.StateDeclaration> stateDeclarations = signature.stateDeclarations();
    for (DoFnSignature.StateDeclaration stateDeclaration : stateDeclarations.values()) {
        try {
            StateSpec<?> stateSpec = (StateSpec<?>) stateDeclaration.field().get(fn);
            Coder[] coders;
            try {
                coders = schemasForStateSpecTypes(stateDeclaration, schemaRegistry);
            } catch (NoSuchSchemaException e) {
                coders = codersForStateSpecTypes(stateDeclaration, coderRegistry, inputCoder);
            }
            stateSpec.offerCoders(coders);
            stateSpec.finishSpecifying();
        } catch (IllegalAccessException e) {
            throw new RuntimeException(e);
        }
    }
}
Also used : StateSpec(org.apache.beam.sdk.state.StateSpec) KvCoder(org.apache.beam.sdk.coders.KvCoder) SchemaCoder(org.apache.beam.sdk.schemas.SchemaCoder) Coder(org.apache.beam.sdk.coders.Coder) NoSuchSchemaException(org.apache.beam.sdk.schemas.NoSuchSchemaException) DoFnSignature(org.apache.beam.sdk.transforms.reflect.DoFnSignature)

Example 5 with DoFnSignature

use of org.apache.beam.sdk.transforms.reflect.DoFnSignature in project beam by apache.

the class ParDo method validateWindowType.

/**
 * Perform common validations of the {@link DoFn} against the input {@link PCollection}, for
 * example ensuring that the window type expected by the {@link DoFn} matches the window type of
 * the {@link PCollection}.
 */
private static <InputT, OutputT> void validateWindowType(PCollection<? extends InputT> input, DoFn<InputT, OutputT> fn) {
    DoFnSignature signature = DoFnSignatures.getSignature((Class) fn.getClass());
    TypeDescriptor<? extends BoundedWindow> actualWindowT = input.getWindowingStrategy().getWindowFn().getWindowTypeDescriptor();
    validateWindowTypeForMethod(actualWindowT, signature.processElement());
    for (OnTimerMethod method : signature.onTimerMethods().values()) {
        validateWindowTypeForMethod(actualWindowT, method);
    }
    for (DoFnSignature.OnTimerFamilyMethod method : signature.onTimerFamilyMethods().values()) {
        validateWindowTypeForMethod(actualWindowT, method);
    }
}
Also used : DoFnSignature(org.apache.beam.sdk.transforms.reflect.DoFnSignature) OnTimerMethod(org.apache.beam.sdk.transforms.reflect.DoFnSignature.OnTimerMethod)

Aggregations

DoFnSignature (org.apache.beam.sdk.transforms.reflect.DoFnSignature)25 AppliedPTransform (org.apache.beam.sdk.runners.AppliedPTransform)8 PTransformMatcher (org.apache.beam.sdk.runners.PTransformMatcher)8 HashMap (java.util.HashMap)4 Map (java.util.Map)4 StateSpec (org.apache.beam.sdk.state.StateSpec)4 Coder (org.apache.beam.sdk.coders.Coder)3 KvCoder (org.apache.beam.sdk.coders.KvCoder)3 PCollectionView (org.apache.beam.sdk.values.PCollectionView)3 StatefulDoFnRunner (org.apache.beam.runners.core.StatefulDoFnRunner)2 SplittableParDo (org.apache.beam.runners.core.construction.SplittableParDo)2 SamzaExecutionContext (org.apache.beam.runners.samza.SamzaExecutionContext)2 SchemaCoder (org.apache.beam.sdk.schemas.SchemaCoder)2 DoFn (org.apache.beam.sdk.transforms.DoFn)2 ParDo (org.apache.beam.sdk.transforms.ParDo)2 BoundedWindow (org.apache.beam.sdk.transforms.windowing.BoundedWindow)2 PCollection (org.apache.beam.sdk.values.PCollection)2 TupleTag (org.apache.beam.sdk.values.TupleTag)2 OutputPort (com.datatorrent.api.Operator.OutputPort)1 ByteString (com.google.protobuf.ByteString)1