use of org.apache.beam.sdk.schemas.SchemaRegistry in project beam by apache.
the class SqlQuery1Test method testConvertsPriceToEur.
@Test
public void testConvertsPriceToEur() throws Exception {
SchemaRegistry registry = SchemaRegistry.createDefault();
PCollection<Event> bids = testPipeline.apply(TestStream.create(registry.getSchema(Event.class), TypeDescriptor.of(Event.class), registry.getToRowFunction(Event.class), registry.getFromRowFunction(Event.class)).addElements(new Event(BID1_USD)).addElements(new Event(BID2_USD)).advanceWatermarkToInfinity());
PAssert.that(bids.apply(new SqlQuery1())).containsInAnyOrder(BID1_EUR, BID2_EUR);
testPipeline.run();
}
use of org.apache.beam.sdk.schemas.SchemaRegistry in project beam by apache.
the class ParDo method getDoFnSchemaInformation.
/**
* Extract information on how the DoFn uses schemas. In particular, if the schema of an element
* parameter does not match the input PCollection's schema, convert.
*/
@Internal
public static DoFnSchemaInformation getDoFnSchemaInformation(DoFn<?, ?> fn, PCollection<?> input) {
DoFnSignature signature = DoFnSignatures.getSignature(fn.getClass());
DoFnSignature.ProcessElementMethod processElementMethod = signature.processElement();
if (!processElementMethod.getSchemaElementParameters().isEmpty()) {
if (!input.hasSchema()) {
throw new IllegalArgumentException("Type of @Element must match the DoFn type" + input);
}
}
SchemaRegistry schemaRegistry = input.getPipeline().getSchemaRegistry();
DoFnSchemaInformation doFnSchemaInformation = DoFnSchemaInformation.create();
for (SchemaElementParameter parameter : processElementMethod.getSchemaElementParameters()) {
TypeDescriptor<?> elementT = parameter.elementT();
FieldAccessDescriptor accessDescriptor = getFieldAccessDescriptorFromParameter(parameter.fieldAccessString(), input.getSchema(), signature.fieldAccessDeclarations(), fn);
doFnSchemaInformation = doFnSchemaInformation.withFieldAccessDescriptor(accessDescriptor);
Schema selectedSchema = SelectHelpers.getOutputSchema(input.getSchema(), accessDescriptor);
ConvertHelpers.ConvertedSchemaInformation converted = ConvertHelpers.getConvertedSchemaInformation(selectedSchema, elementT, schemaRegistry);
if (converted.outputSchemaCoder != null) {
doFnSchemaInformation = doFnSchemaInformation.withSelectFromSchemaParameter((SchemaCoder<?>) input.getCoder(), accessDescriptor, selectedSchema, converted.outputSchemaCoder, converted.unboxedType != null);
} else {
// If the selected schema is a Row containing a single primitive type (which is the output
// of Select when selecting a primitive), attempt to unbox it and match against the
// parameter.
checkArgument(converted.unboxedType != null);
doFnSchemaInformation = doFnSchemaInformation.withUnboxPrimitiveParameter((SchemaCoder<?>) input.getCoder(), accessDescriptor, selectedSchema, elementT);
}
}
for (DoFnSignature.Parameter p : processElementMethod.extraParameters()) {
if (p instanceof ProcessContextParameter || p instanceof ElementParameter) {
doFnSchemaInformation = doFnSchemaInformation.withFieldAccessDescriptor(FieldAccessDescriptor.withAllFields());
break;
}
}
return doFnSchemaInformation;
}
use of org.apache.beam.sdk.schemas.SchemaRegistry in project beam by apache.
the class StorageApiWritesShardedRecords method expand.
@Override
public PCollection<Void> expand(PCollection<KV<ShardedKey<DestinationT>, Iterable<byte[]>>> input) {
String operationName = input.getName() + "/" + getName();
// Append records to the Storage API streams.
PCollection<KV<String, Operation>> written = input.apply("Write Records", ParDo.of(new WriteRecordsDoFn(operationName)).withSideInputs(dynamicDestinations.getSideInputs()));
SchemaCoder<Operation> operationCoder;
try {
SchemaRegistry schemaRegistry = input.getPipeline().getSchemaRegistry();
operationCoder = SchemaCoder.of(schemaRegistry.getSchema(Operation.class), TypeDescriptor.of(Operation.class), schemaRegistry.getToRowFunction(Operation.class), schemaRegistry.getFromRowFunction(Operation.class));
} catch (NoSuchSchemaException e) {
throw new RuntimeException(e);
}
// Send all successful writes to be flushed.
return written.setCoder(KvCoder.of(StringUtf8Coder.of(), operationCoder)).apply(Window.<KV<String, Operation>>configure().triggering(Repeatedly.forever(AfterProcessingTime.pastFirstElementInPane().plusDelayOf(Duration.standardSeconds(1)))).discardingFiredPanes()).apply("maxFlushPosition", Combine.perKey(Max.naturalOrder(new Operation(-1, false)))).apply("Flush and finalize writes", ParDo.of(new StorageApiFlushAndFinalizeDoFn(bqServices)));
}
use of org.apache.beam.sdk.schemas.SchemaRegistry in project beam by apache.
the class Neo4jIO method getOutputPCollection.
private static <ParameterT, OutputT> PCollection<OutputT> getOutputPCollection(PCollection<ParameterT> input, DoFn<ParameterT, OutputT> writeFn, @Nullable Coder<OutputT> coder) {
PCollection<OutputT> output = input.apply(ParDo.of(writeFn));
if (coder != null) {
output.setCoder(coder);
try {
TypeDescriptor<OutputT> typeDesc = coder.getEncodedTypeDescriptor();
SchemaRegistry registry = input.getPipeline().getSchemaRegistry();
Schema schema = registry.getSchema(typeDesc);
output.setSchema(schema, typeDesc, registry.getToRowFunction(typeDesc), registry.getFromRowFunction(typeDesc));
} catch (NoSuchSchemaException e) {
// ignore
}
}
return output;
}
Aggregations