use of org.apache.beam.sdk.schemas.SchemaCoder in project beam by apache.
the class ParDo method getDoFnSchemaInformation.
/**
* Extract information on how the DoFn uses schemas. In particular, if the schema of an element
* parameter does not match the input PCollection's schema, convert.
*/
@Internal
public static DoFnSchemaInformation getDoFnSchemaInformation(DoFn<?, ?> fn, PCollection<?> input) {
DoFnSignature signature = DoFnSignatures.getSignature(fn.getClass());
DoFnSignature.ProcessElementMethod processElementMethod = signature.processElement();
if (!processElementMethod.getSchemaElementParameters().isEmpty()) {
if (!input.hasSchema()) {
throw new IllegalArgumentException("Type of @Element must match the DoFn type" + input);
}
}
SchemaRegistry schemaRegistry = input.getPipeline().getSchemaRegistry();
DoFnSchemaInformation doFnSchemaInformation = DoFnSchemaInformation.create();
for (SchemaElementParameter parameter : processElementMethod.getSchemaElementParameters()) {
TypeDescriptor<?> elementT = parameter.elementT();
FieldAccessDescriptor accessDescriptor = getFieldAccessDescriptorFromParameter(parameter.fieldAccessString(), input.getSchema(), signature.fieldAccessDeclarations(), fn);
doFnSchemaInformation = doFnSchemaInformation.withFieldAccessDescriptor(accessDescriptor);
Schema selectedSchema = SelectHelpers.getOutputSchema(input.getSchema(), accessDescriptor);
ConvertHelpers.ConvertedSchemaInformation converted = ConvertHelpers.getConvertedSchemaInformation(selectedSchema, elementT, schemaRegistry);
if (converted.outputSchemaCoder != null) {
doFnSchemaInformation = doFnSchemaInformation.withSelectFromSchemaParameter((SchemaCoder<?>) input.getCoder(), accessDescriptor, selectedSchema, converted.outputSchemaCoder, converted.unboxedType != null);
} else {
// If the selected schema is a Row containing a single primitive type (which is the output
// of Select when selecting a primitive), attempt to unbox it and match against the
// parameter.
checkArgument(converted.unboxedType != null);
doFnSchemaInformation = doFnSchemaInformation.withUnboxPrimitiveParameter((SchemaCoder<?>) input.getCoder(), accessDescriptor, selectedSchema, elementT);
}
}
for (DoFnSignature.Parameter p : processElementMethod.extraParameters()) {
if (p instanceof ProcessContextParameter || p instanceof ElementParameter) {
doFnSchemaInformation = doFnSchemaInformation.withFieldAccessDescriptor(FieldAccessDescriptor.withAllFields());
break;
}
}
return doFnSchemaInformation;
}
use of org.apache.beam.sdk.schemas.SchemaCoder in project beam by apache.
the class ParDo method schemasForStateSpecTypes.
private static SchemaCoder[] schemasForStateSpecTypes(DoFnSignature.StateDeclaration stateDeclaration, SchemaRegistry schemaRegistry) throws NoSuchSchemaException {
Type stateType = stateDeclaration.stateType().getType();
if (!(stateType instanceof ParameterizedType)) {
// No type arguments means no coders to infer.
return new SchemaCoder[0];
}
Type[] typeArguments = ((ParameterizedType) stateType).getActualTypeArguments();
SchemaCoder[] coders = new SchemaCoder[typeArguments.length];
for (int i = 0; i < typeArguments.length; i++) {
Type typeArgument = typeArguments[i];
TypeDescriptor typeDescriptor = TypeDescriptor.of(typeArgument);
coders[i] = SchemaCoder.of(schemaRegistry.getSchema(typeDescriptor), typeDescriptor, schemaRegistry.getToRowFunction(typeDescriptor), schemaRegistry.getFromRowFunction(typeDescriptor));
}
return coders;
}
Aggregations