Search in sources :

Example 1 with SchemaRegistry

use of org.apache.beam.sdk.schemas.SchemaRegistry in project beam by apache.

the class SqlQuery1Test method testConvertsPriceToEur.

@Test
public void testConvertsPriceToEur() throws Exception {
    SchemaRegistry registry = SchemaRegistry.createDefault();
    PCollection<Event> bids = testPipeline.apply(TestStream.create(registry.getSchema(Event.class), TypeDescriptor.of(Event.class), registry.getToRowFunction(Event.class), registry.getFromRowFunction(Event.class)).addElements(new Event(BID1_USD)).addElements(new Event(BID2_USD)).advanceWatermarkToInfinity());
    PAssert.that(bids.apply(new SqlQuery1())).containsInAnyOrder(BID1_EUR, BID2_EUR);
    testPipeline.run();
}
Also used : Event(org.apache.beam.sdk.nexmark.model.Event) SchemaRegistry(org.apache.beam.sdk.schemas.SchemaRegistry) Test(org.junit.Test)

Example 2 with SchemaRegistry

use of org.apache.beam.sdk.schemas.SchemaRegistry in project beam by apache.

the class ParDo method getDoFnSchemaInformation.

/**
 * Extract information on how the DoFn uses schemas. In particular, if the schema of an element
 * parameter does not match the input PCollection's schema, convert.
 */
@Internal
public static DoFnSchemaInformation getDoFnSchemaInformation(DoFn<?, ?> fn, PCollection<?> input) {
    DoFnSignature signature = DoFnSignatures.getSignature(fn.getClass());
    DoFnSignature.ProcessElementMethod processElementMethod = signature.processElement();
    if (!processElementMethod.getSchemaElementParameters().isEmpty()) {
        if (!input.hasSchema()) {
            throw new IllegalArgumentException("Type of @Element must match the DoFn type" + input);
        }
    }
    SchemaRegistry schemaRegistry = input.getPipeline().getSchemaRegistry();
    DoFnSchemaInformation doFnSchemaInformation = DoFnSchemaInformation.create();
    for (SchemaElementParameter parameter : processElementMethod.getSchemaElementParameters()) {
        TypeDescriptor<?> elementT = parameter.elementT();
        FieldAccessDescriptor accessDescriptor = getFieldAccessDescriptorFromParameter(parameter.fieldAccessString(), input.getSchema(), signature.fieldAccessDeclarations(), fn);
        doFnSchemaInformation = doFnSchemaInformation.withFieldAccessDescriptor(accessDescriptor);
        Schema selectedSchema = SelectHelpers.getOutputSchema(input.getSchema(), accessDescriptor);
        ConvertHelpers.ConvertedSchemaInformation converted = ConvertHelpers.getConvertedSchemaInformation(selectedSchema, elementT, schemaRegistry);
        if (converted.outputSchemaCoder != null) {
            doFnSchemaInformation = doFnSchemaInformation.withSelectFromSchemaParameter((SchemaCoder<?>) input.getCoder(), accessDescriptor, selectedSchema, converted.outputSchemaCoder, converted.unboxedType != null);
        } else {
            // If the selected schema is a Row containing a single primitive type (which is the output
            // of Select when selecting a primitive), attempt to unbox it and match against the
            // parameter.
            checkArgument(converted.unboxedType != null);
            doFnSchemaInformation = doFnSchemaInformation.withUnboxPrimitiveParameter((SchemaCoder<?>) input.getCoder(), accessDescriptor, selectedSchema, elementT);
        }
    }
    for (DoFnSignature.Parameter p : processElementMethod.extraParameters()) {
        if (p instanceof ProcessContextParameter || p instanceof ElementParameter) {
            doFnSchemaInformation = doFnSchemaInformation.withFieldAccessDescriptor(FieldAccessDescriptor.withAllFields());
            break;
        }
    }
    return doFnSchemaInformation;
}
Also used : FieldAccessDescriptor(org.apache.beam.sdk.schemas.FieldAccessDescriptor) ConvertHelpers(org.apache.beam.sdk.schemas.utils.ConvertHelpers) SchemaCoder(org.apache.beam.sdk.schemas.SchemaCoder) Schema(org.apache.beam.sdk.schemas.Schema) ProcessContextParameter(org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.ProcessContextParameter) ElementParameter(org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.ElementParameter) SchemaElementParameter(org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.SchemaElementParameter) SchemaRegistry(org.apache.beam.sdk.schemas.SchemaRegistry) SchemaElementParameter(org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.SchemaElementParameter) DoFnSignature(org.apache.beam.sdk.transforms.reflect.DoFnSignature) Internal(org.apache.beam.sdk.annotations.Internal)

Example 3 with SchemaRegistry

use of org.apache.beam.sdk.schemas.SchemaRegistry in project beam by apache.

the class StorageApiWritesShardedRecords method expand.

@Override
public PCollection<Void> expand(PCollection<KV<ShardedKey<DestinationT>, Iterable<byte[]>>> input) {
    String operationName = input.getName() + "/" + getName();
    // Append records to the Storage API streams.
    PCollection<KV<String, Operation>> written = input.apply("Write Records", ParDo.of(new WriteRecordsDoFn(operationName)).withSideInputs(dynamicDestinations.getSideInputs()));
    SchemaCoder<Operation> operationCoder;
    try {
        SchemaRegistry schemaRegistry = input.getPipeline().getSchemaRegistry();
        operationCoder = SchemaCoder.of(schemaRegistry.getSchema(Operation.class), TypeDescriptor.of(Operation.class), schemaRegistry.getToRowFunction(Operation.class), schemaRegistry.getFromRowFunction(Operation.class));
    } catch (NoSuchSchemaException e) {
        throw new RuntimeException(e);
    }
    // Send all successful writes to be flushed.
    return written.setCoder(KvCoder.of(StringUtf8Coder.of(), operationCoder)).apply(Window.<KV<String, Operation>>configure().triggering(Repeatedly.forever(AfterProcessingTime.pastFirstElementInPane().plusDelayOf(Duration.standardSeconds(1)))).discardingFiredPanes()).apply("maxFlushPosition", Combine.perKey(Max.naturalOrder(new Operation(-1, false)))).apply("Flush and finalize writes", ParDo.of(new StorageApiFlushAndFinalizeDoFn(bqServices)));
}
Also used : NoSuchSchemaException(org.apache.beam.sdk.schemas.NoSuchSchemaException) KV(org.apache.beam.sdk.values.KV) Operation(org.apache.beam.sdk.io.gcp.bigquery.StorageApiFlushAndFinalizeDoFn.Operation) SchemaRegistry(org.apache.beam.sdk.schemas.SchemaRegistry)

Example 4 with SchemaRegistry

use of org.apache.beam.sdk.schemas.SchemaRegistry in project beam by apache.

the class Neo4jIO method getOutputPCollection.

private static <ParameterT, OutputT> PCollection<OutputT> getOutputPCollection(PCollection<ParameterT> input, DoFn<ParameterT, OutputT> writeFn, @Nullable Coder<OutputT> coder) {
    PCollection<OutputT> output = input.apply(ParDo.of(writeFn));
    if (coder != null) {
        output.setCoder(coder);
        try {
            TypeDescriptor<OutputT> typeDesc = coder.getEncodedTypeDescriptor();
            SchemaRegistry registry = input.getPipeline().getSchemaRegistry();
            Schema schema = registry.getSchema(typeDesc);
            output.setSchema(schema, typeDesc, registry.getToRowFunction(typeDesc), registry.getFromRowFunction(typeDesc));
        } catch (NoSuchSchemaException e) {
        // ignore
        }
    }
    return output;
}
Also used : NoSuchSchemaException(org.apache.beam.sdk.schemas.NoSuchSchemaException) Schema(org.apache.beam.sdk.schemas.Schema) SchemaRegistry(org.apache.beam.sdk.schemas.SchemaRegistry)

Aggregations

SchemaRegistry (org.apache.beam.sdk.schemas.SchemaRegistry)4 NoSuchSchemaException (org.apache.beam.sdk.schemas.NoSuchSchemaException)2 Schema (org.apache.beam.sdk.schemas.Schema)2 Internal (org.apache.beam.sdk.annotations.Internal)1 Operation (org.apache.beam.sdk.io.gcp.bigquery.StorageApiFlushAndFinalizeDoFn.Operation)1 Event (org.apache.beam.sdk.nexmark.model.Event)1 FieldAccessDescriptor (org.apache.beam.sdk.schemas.FieldAccessDescriptor)1 SchemaCoder (org.apache.beam.sdk.schemas.SchemaCoder)1 ConvertHelpers (org.apache.beam.sdk.schemas.utils.ConvertHelpers)1 DoFnSignature (org.apache.beam.sdk.transforms.reflect.DoFnSignature)1 ElementParameter (org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.ElementParameter)1 ProcessContextParameter (org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.ProcessContextParameter)1 SchemaElementParameter (org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.SchemaElementParameter)1 KV (org.apache.beam.sdk.values.KV)1 Test (org.junit.Test)1