use of org.apache.beam.sdk.values.Row in project beam by apache.
the class DoFnSignatures method analyzeExtraParameter.
private static Parameter analyzeExtraParameter(ErrorReporter methodErrors, FnAnalysisContext fnContext, MethodAnalysisContext methodContext, ParameterDescription param, TypeDescriptor<?> inputT, TypeDescriptor<?> outputT) {
TypeDescriptor<?> expectedProcessContextT = doFnProcessContextTypeOf(inputT, outputT);
TypeDescriptor<?> expectedStartBundleContextT = doFnStartBundleContextTypeOf(inputT, outputT);
TypeDescriptor<?> expectedFinishBundleContextT = doFnFinishBundleContextTypeOf(inputT, outputT);
TypeDescriptor<?> expectedOnTimerContextT = doFnOnTimerContextTypeOf(inputT, outputT);
TypeDescriptor<?> expectedOnWindowExpirationContextT = doFnOnWindowExpirationContextTypeOf(inputT, outputT);
TypeDescriptor<?> paramT = param.getType();
Class<?> rawType = paramT.getRawType();
ErrorReporter paramErrors = methodErrors.forParameter(param);
String fieldAccessString = getFieldAccessId(param.getAnnotations());
if (fieldAccessString != null) {
return Parameter.schemaElementParameter(paramT, fieldAccessString, param.getIndex());
} else if (hasAnnotation(DoFn.Element.class, param.getAnnotations())) {
return paramT.equals(inputT) ? Parameter.elementParameter(paramT) : Parameter.schemaElementParameter(paramT, null, param.getIndex());
} else if (hasAnnotation(DoFn.Restriction.class, param.getAnnotations())) {
return Parameter.restrictionParameter(paramT);
} else if (hasAnnotation(DoFn.WatermarkEstimatorState.class, param.getAnnotations())) {
return Parameter.watermarkEstimatorState(paramT);
} else if (hasAnnotation(DoFn.Timestamp.class, param.getAnnotations())) {
methodErrors.checkArgument(rawType.equals(Instant.class), "@Timestamp argument must have type org.joda.time.Instant.");
return Parameter.timestampParameter();
} else if (hasAnnotation(DoFn.Key.class, param.getAnnotations())) {
methodErrors.checkArgument(KV.class.equals(inputT.getRawType()), "@Key argument is expected to be use with input element of type KV.");
Type keyType = ((ParameterizedType) inputT.getType()).getActualTypeArguments()[0];
methodErrors.checkArgument(TypeDescriptor.of(keyType).equals(paramT), "@Key argument is expected to be type of %s, but found %s.", keyType, rawType);
return Parameter.keyT(paramT);
} else if (rawType.equals(TimeDomain.class)) {
return Parameter.timeDomainParameter();
} else if (hasAnnotation(DoFn.SideInput.class, param.getAnnotations())) {
String sideInputId = getSideInputId(param.getAnnotations());
paramErrors.checkArgument(sideInputId != null, "%s missing %s annotation", sideInputId, format(SideInput.class));
return Parameter.sideInputParameter(paramT, sideInputId);
} else if (rawType.equals(PaneInfo.class)) {
return Parameter.paneInfoParameter();
} else if (rawType.equals(DoFn.BundleFinalizer.class)) {
return Parameter.bundleFinalizer();
} else if (rawType.equals(DoFn.ProcessContext.class)) {
paramErrors.checkArgument(paramT.equals(expectedProcessContextT), "ProcessContext argument must have type %s", format(expectedProcessContextT));
return Parameter.processContext();
} else if (rawType.equals(DoFn.StartBundleContext.class)) {
paramErrors.checkArgument(paramT.equals(expectedStartBundleContextT), "StartBundleContext argument must have type %s", format(expectedProcessContextT));
return Parameter.startBundleContext();
} else if (rawType.equals(DoFn.FinishBundleContext.class)) {
paramErrors.checkArgument(paramT.equals(expectedFinishBundleContextT), "FinishBundleContext argument must have type %s", format(expectedProcessContextT));
return Parameter.finishBundleContext();
} else if (rawType.equals(DoFn.OnTimerContext.class)) {
paramErrors.checkArgument(paramT.equals(expectedOnTimerContextT), "OnTimerContext argument must have type %s", format(expectedOnTimerContextT));
return Parameter.onTimerContext();
} else if (rawType.equals(DoFn.OnWindowExpirationContext.class)) {
paramErrors.checkArgument(paramT.equals(expectedOnWindowExpirationContextT), "OnWindowExpirationContext argument must have type %s", format(expectedOnWindowExpirationContextT));
return Parameter.onWindowExpirationContext();
} else if (BoundedWindow.class.isAssignableFrom(rawType)) {
methodErrors.checkArgument(!methodContext.hasParameter(WindowParameter.class), "Multiple %s parameters", format(BoundedWindow.class));
return Parameter.boundedWindow((TypeDescriptor<? extends BoundedWindow>) paramT);
} else if (rawType.equals(OutputReceiver.class)) {
// It's a schema row receiver if it's an OutputReceiver<Row> _and_ the output type is not
// already Row.
boolean schemaRowReceiver = paramT.equals(outputReceiverTypeOf(TypeDescriptor.of(Row.class))) && !outputT.equals(TypeDescriptor.of(Row.class));
if (!schemaRowReceiver) {
TypeDescriptor<?> expectedReceiverT = outputReceiverTypeOf(outputT);
paramErrors.checkArgument(paramT.equals(expectedReceiverT), "OutputReceiver should be parameterized by %s", outputT);
}
return Parameter.outputReceiverParameter(schemaRowReceiver);
} else if (rawType.equals(MultiOutputReceiver.class)) {
return Parameter.taggedOutputReceiverParameter();
} else if (PipelineOptions.class.equals(rawType)) {
methodErrors.checkArgument(!methodContext.hasParameter(PipelineOptionsParameter.class), "Multiple %s parameters", format(PipelineOptions.class));
return Parameter.pipelineOptions();
} else if (RestrictionTracker.class.isAssignableFrom(rawType)) {
methodErrors.checkArgument(!methodContext.hasParameter(RestrictionTrackerParameter.class), "Multiple %s parameters", format(RestrictionTracker.class));
return Parameter.restrictionTracker(paramT);
} else if (WatermarkEstimator.class.isAssignableFrom(rawType)) {
methodErrors.checkArgument(!methodContext.hasParameter(WatermarkEstimatorParameter.class), "Multiple %s parameters", format(WatermarkEstimator.class));
return Parameter.watermarkEstimator(paramT);
} else if (rawType.equals(Timer.class)) {
// m.getParameters() is not available until Java 8
String id = getTimerId(param.getAnnotations());
paramErrors.checkArgument(id != null, "%s missing %s annotation", format(Timer.class), format(TimerId.class));
paramErrors.checkArgument(!methodContext.getTimerParameters().containsKey(id), "duplicate %s: \"%s\"", format(TimerId.class), id);
TimerDeclaration timerDecl = fnContext.getTimerDeclarations().get(id);
paramErrors.checkArgument(timerDecl != null, "reference to undeclared %s: \"%s\"", format(TimerId.class), id);
paramErrors.checkArgument(timerDecl.field().getDeclaringClass().equals(getDeclaringClass(param.getMethod())), "%s %s declared in a different class %s." + " Timers may be referenced only in the lexical scope where they are declared.", format(TimerId.class), id, timerDecl.field().getDeclaringClass().getName());
return Parameter.timerParameter(timerDecl);
} else if (hasAnnotation(DoFn.TimerId.class, param.getAnnotations())) {
boolean isValidTimerIdForTimerFamily = fnContext.getTimerFamilyDeclarations().size() > 0 && rawType.equals(String.class);
paramErrors.checkArgument(isValidTimerIdForTimerFamily, "%s not allowed here", format(DoFn.TimerId.class));
return Parameter.timerIdParameter();
} else if (rawType.equals(TimerMap.class)) {
String id = getTimerFamilyId(param.getAnnotations());
paramErrors.checkArgument(id != null, "%s missing %s annotation", format(TimerMap.class), format(DoFn.TimerFamily.class));
paramErrors.checkArgument(!methodContext.getTimerFamilyParameters().containsKey(id), "duplicate %s: \"%s\"", format(DoFn.TimerFamily.class), id);
TimerFamilyDeclaration timerDecl = fnContext.getTimerFamilyDeclarations().get(id);
paramErrors.checkArgument(timerDecl != null, "reference to undeclared %s: \"%s\"", format(DoFn.TimerFamily.class), id);
paramErrors.checkArgument(timerDecl.field().getDeclaringClass().equals(getDeclaringClass(param.getMethod())), "%s %s declared in a different class %s." + " Timers may be referenced only in the lexical scope where they are declared.", format(DoFn.TimerFamily.class), id, timerDecl.field().getDeclaringClass().getName());
return Parameter.timerFamilyParameter(timerDecl);
} else if (State.class.isAssignableFrom(rawType)) {
// m.getParameters() is not available until Java 8
String id = getStateId(param.getAnnotations());
paramErrors.checkArgument(id != null, "missing %s annotation", format(DoFn.StateId.class));
paramErrors.checkArgument(!methodContext.getStateParameters().containsKey(id), "duplicate %s: \"%s\"", format(DoFn.StateId.class), id);
// By static typing this is already a well-formed State subclass
TypeDescriptor<? extends State> stateType = (TypeDescriptor<? extends State>) param.getType();
StateDeclaration stateDecl = fnContext.getStateDeclarations().get(id);
paramErrors.checkArgument(stateDecl != null, "reference to undeclared %s: \"%s\"", format(DoFn.StateId.class), id);
paramErrors.checkArgument(stateDecl.stateType().isSubtypeOf(stateType), "data type of reference to %s %s must be a supertype of %s", format(StateId.class), id, format(stateDecl.stateType()));
paramErrors.checkArgument(stateDecl.field().getDeclaringClass().equals(getDeclaringClass(param.getMethod())), "%s %s declared in a different class %s." + " State may be referenced only in the class where it is declared.", format(StateId.class), id, stateDecl.field().getDeclaringClass().getName());
boolean alwaysFetched = getStateAlwaysFetched(param.getAnnotations());
if (alwaysFetched) {
paramErrors.checkArgument(ReadableState.class.isAssignableFrom(rawType), "@AlwaysFetched can only be used on ReadableStates. It cannot be used on %s", format(stateDecl.stateType()));
}
return Parameter.stateParameter(stateDecl, alwaysFetched);
} else {
paramErrors.throwIllegalArgument("%s is not a valid context parameter.", format(paramT));
// Unreachable
return null;
}
}
use of org.apache.beam.sdk.values.Row in project beam by apache.
the class AvroUtils method getFieldSchema.
private static org.apache.avro.Schema getFieldSchema(Schema.FieldType fieldType, String fieldName, String namespace) {
org.apache.avro.Schema baseType;
switch(fieldType.getTypeName()) {
case BYTE:
case INT16:
case INT32:
baseType = org.apache.avro.Schema.create(Type.INT);
break;
case INT64:
baseType = org.apache.avro.Schema.create(Type.LONG);
break;
case DECIMAL:
baseType = LogicalTypes.decimal(Integer.MAX_VALUE).addToSchema(org.apache.avro.Schema.create(Type.BYTES));
break;
case FLOAT:
baseType = org.apache.avro.Schema.create(Type.FLOAT);
break;
case DOUBLE:
baseType = org.apache.avro.Schema.create(Type.DOUBLE);
break;
case STRING:
baseType = org.apache.avro.Schema.create(Type.STRING);
break;
case DATETIME:
// TODO: There is a desire to move Beam schema DATETIME to a micros representation. When
// this is done, this logical type needs to be changed.
baseType = LogicalTypes.timestampMillis().addToSchema(org.apache.avro.Schema.create(Type.LONG));
break;
case BOOLEAN:
baseType = org.apache.avro.Schema.create(Type.BOOLEAN);
break;
case BYTES:
baseType = org.apache.avro.Schema.create(Type.BYTES);
break;
case LOGICAL_TYPE:
switch(fieldType.getLogicalType().getIdentifier()) {
case FixedBytes.IDENTIFIER:
FixedBytesField fixedBytesField = checkNotNull(FixedBytesField.fromBeamFieldType(fieldType));
baseType = fixedBytesField.toAvroType("fixed", namespace + "." + fieldName);
break;
case EnumerationType.IDENTIFIER:
EnumerationType enumerationType = fieldType.getLogicalType(EnumerationType.class);
baseType = org.apache.avro.Schema.createEnum(fieldName, "", "", enumerationType.getValues());
break;
case OneOfType.IDENTIFIER:
OneOfType oneOfType = fieldType.getLogicalType(OneOfType.class);
baseType = org.apache.avro.Schema.createUnion(oneOfType.getOneOfSchema().getFields().stream().map(x -> getFieldSchema(x.getType(), x.getName(), namespace)).collect(Collectors.toList()));
break;
case "CHAR":
case "NCHAR":
baseType = buildHiveLogicalTypeSchema("char", (int) fieldType.getLogicalType().getArgument());
break;
case "NVARCHAR":
case "VARCHAR":
case "LONGNVARCHAR":
case "LONGVARCHAR":
baseType = buildHiveLogicalTypeSchema("varchar", (int) fieldType.getLogicalType().getArgument());
break;
case "DATE":
baseType = LogicalTypes.date().addToSchema(org.apache.avro.Schema.create(Type.INT));
break;
case "TIME":
baseType = LogicalTypes.timeMillis().addToSchema(org.apache.avro.Schema.create(Type.INT));
break;
default:
throw new RuntimeException("Unhandled logical type " + fieldType.getLogicalType().getIdentifier());
}
break;
case ARRAY:
case ITERABLE:
baseType = org.apache.avro.Schema.createArray(getFieldSchema(fieldType.getCollectionElementType(), fieldName, namespace));
break;
case MAP:
if (fieldType.getMapKeyType().getTypeName().isStringType()) {
// Avro only supports string keys in maps.
baseType = org.apache.avro.Schema.createMap(getFieldSchema(fieldType.getMapValueType(), fieldName, namespace));
} else {
throw new IllegalArgumentException("Avro only supports maps with string keys");
}
break;
case ROW:
baseType = toAvroSchema(fieldType.getRowSchema(), fieldName, namespace);
break;
default:
throw new IllegalArgumentException("Unexpected type " + fieldType);
}
return fieldType.getNullable() ? ReflectData.makeNullable(baseType) : baseType;
}
use of org.apache.beam.sdk.values.Row in project beam by apache.
the class ProtoDynamicMessageSchemaTest method testNestedRowToProto.
@Test
public void testNestedRowToProto() throws InvalidProtocolBufferException {
ProtoDynamicMessageSchema schemaProvider = schemaFromDescriptor(Nested.getDescriptor());
SerializableFunction<Row, DynamicMessage> fromRow = schemaProvider.getFromRowFunction();
Nested proto = parseFrom(fromRow.apply(NESTED_ROW).toString(), Nested.newBuilder()).build();
assertEquals(NESTED_PROTO, proto);
}
use of org.apache.beam.sdk.values.Row in project beam by apache.
the class ProtoSchemaTranslatorTest method testOptionsMessageOnField.
@Test
public void testOptionsMessageOnField() {
Schema schema = ProtoSchemaTranslator.getSchema(Proto3SchemaMessages.OptionMessage.class);
Schema.Options options = schema.getField("field_one").getOptions();
Row optionMessage = options.getValue("beam:option:proto:field:proto3_schema_options.field_option_message");
assertEquals("foobar in field", optionMessage.getString("single_string"));
assertEquals(Integer.valueOf(56), optionMessage.getInt32("single_int32"));
assertEquals(Long.valueOf(78L), optionMessage.getInt64("single_int64"));
}
use of org.apache.beam.sdk.values.Row in project beam by apache.
the class ProtoDynamicMessageSchemaTest method testMapRowToProto.
@Test
public void testMapRowToProto() {
ProtoDynamicMessageSchema schemaProvider = schemaFromDescriptor(MapPrimitive.getDescriptor());
SerializableFunction<Row, DynamicMessage> fromRow = schemaProvider.getFromRowFunction();
MapPrimitive proto = parseFrom(fromRow.apply(MAP_PRIMITIVE_ROW).toString(), MapPrimitive.newBuilder()).build();
assertEquals(MAP_PRIMITIVE_PROTO, proto);
}
Aggregations