Search in sources :

Example 1 with Internal

use of org.apache.beam.sdk.annotations.Internal in project beam by apache.

the class ParDo method getDoFnSchemaInformation.

/**
 * Extract information on how the DoFn uses schemas. In particular, if the schema of an element
 * parameter does not match the input PCollection's schema, convert.
 */
@Internal
public static DoFnSchemaInformation getDoFnSchemaInformation(DoFn<?, ?> fn, PCollection<?> input) {
    DoFnSignature signature = DoFnSignatures.getSignature(fn.getClass());
    DoFnSignature.ProcessElementMethod processElementMethod = signature.processElement();
    if (!processElementMethod.getSchemaElementParameters().isEmpty()) {
        if (!input.hasSchema()) {
            throw new IllegalArgumentException("Type of @Element must match the DoFn type" + input);
        }
    }
    SchemaRegistry schemaRegistry = input.getPipeline().getSchemaRegistry();
    DoFnSchemaInformation doFnSchemaInformation = DoFnSchemaInformation.create();
    for (SchemaElementParameter parameter : processElementMethod.getSchemaElementParameters()) {
        TypeDescriptor<?> elementT = parameter.elementT();
        FieldAccessDescriptor accessDescriptor = getFieldAccessDescriptorFromParameter(parameter.fieldAccessString(), input.getSchema(), signature.fieldAccessDeclarations(), fn);
        doFnSchemaInformation = doFnSchemaInformation.withFieldAccessDescriptor(accessDescriptor);
        Schema selectedSchema = SelectHelpers.getOutputSchema(input.getSchema(), accessDescriptor);
        ConvertHelpers.ConvertedSchemaInformation converted = ConvertHelpers.getConvertedSchemaInformation(selectedSchema, elementT, schemaRegistry);
        if (converted.outputSchemaCoder != null) {
            doFnSchemaInformation = doFnSchemaInformation.withSelectFromSchemaParameter((SchemaCoder<?>) input.getCoder(), accessDescriptor, selectedSchema, converted.outputSchemaCoder, converted.unboxedType != null);
        } else {
            // If the selected schema is a Row containing a single primitive type (which is the output
            // of Select when selecting a primitive), attempt to unbox it and match against the
            // parameter.
            checkArgument(converted.unboxedType != null);
            doFnSchemaInformation = doFnSchemaInformation.withUnboxPrimitiveParameter((SchemaCoder<?>) input.getCoder(), accessDescriptor, selectedSchema, elementT);
        }
    }
    for (DoFnSignature.Parameter p : processElementMethod.extraParameters()) {
        if (p instanceof ProcessContextParameter || p instanceof ElementParameter) {
            doFnSchemaInformation = doFnSchemaInformation.withFieldAccessDescriptor(FieldAccessDescriptor.withAllFields());
            break;
        }
    }
    return doFnSchemaInformation;
}
Also used : FieldAccessDescriptor(org.apache.beam.sdk.schemas.FieldAccessDescriptor) ConvertHelpers(org.apache.beam.sdk.schemas.utils.ConvertHelpers) SchemaCoder(org.apache.beam.sdk.schemas.SchemaCoder) Schema(org.apache.beam.sdk.schemas.Schema) ProcessContextParameter(org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.ProcessContextParameter) ElementParameter(org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.ElementParameter) SchemaElementParameter(org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.SchemaElementParameter) SchemaRegistry(org.apache.beam.sdk.schemas.SchemaRegistry) SchemaElementParameter(org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.SchemaElementParameter) DoFnSignature(org.apache.beam.sdk.transforms.reflect.DoFnSignature) Internal(org.apache.beam.sdk.annotations.Internal)

Example 2 with Internal

use of org.apache.beam.sdk.annotations.Internal in project beam by apache.

the class DataCatalogTableProvider method setSchemaIfNotPresent.

@Internal
public boolean setSchemaIfNotPresent(String resource, Schema schema) {
    com.google.cloud.datacatalog.v1beta1.Schema dcSchema = SchemaUtils.toDataCatalog(schema);
    Entry entry = dataCatalog.lookupEntry(LookupEntryRequest.newBuilder().setSqlResource(resource).build());
    if (entry.getSchema().getColumnsCount() == 0) {
        dataCatalog.updateEntry(UpdateEntryRequest.newBuilder().setEntry(entry.toBuilder().setSchema(dcSchema).build()).setUpdateMask(FieldMask.newBuilder().addPaths("schema").build()).build());
        return true;
    } else {
        LOG.info(String.format("Not updating schema for '%s' since it already has one.", resource));
        return false;
    }
}
Also used : Entry(com.google.cloud.datacatalog.v1beta1.Entry) Internal(org.apache.beam.sdk.annotations.Internal)

Example 3 with Internal

use of org.apache.beam.sdk.annotations.Internal in project beam by apache.

the class PubsubOptions method targetForRootUrl.

/**
 * Internal only utility for converting {@link #getPubsubRootUrl()} (e.g. {@code https://<host>})
 * to an endpoint target, usable by GCP client libraries (e.g. {@code <host>:443})
 */
@Internal
static String targetForRootUrl(String urlString) {
    URL url;
    try {
        url = new URL(urlString);
    } catch (MalformedURLException e) {
        throw new IllegalArgumentException(String.format("Could not parse pubsub root url \"%s\"", urlString), e);
    }
    int port = url.getPort();
    if (port < 0) {
        switch(url.getProtocol()) {
            case "https":
                port = 443;
                break;
            case "http":
                port = 80;
                break;
            default:
                throw new IllegalArgumentException(String.format("Could not determine port for pubsub root url \"%s\". You must either specify the port or use the protocol \"https\" or \"http\"", urlString));
        }
    }
    return String.format("%s:%d", url.getHost(), port);
}
Also used : MalformedURLException(java.net.MalformedURLException) URL(java.net.URL) Internal(org.apache.beam.sdk.annotations.Internal)

Aggregations

Internal (org.apache.beam.sdk.annotations.Internal)3 Entry (com.google.cloud.datacatalog.v1beta1.Entry)1 MalformedURLException (java.net.MalformedURLException)1 URL (java.net.URL)1 FieldAccessDescriptor (org.apache.beam.sdk.schemas.FieldAccessDescriptor)1 Schema (org.apache.beam.sdk.schemas.Schema)1 SchemaCoder (org.apache.beam.sdk.schemas.SchemaCoder)1 SchemaRegistry (org.apache.beam.sdk.schemas.SchemaRegistry)1 ConvertHelpers (org.apache.beam.sdk.schemas.utils.ConvertHelpers)1 DoFnSignature (org.apache.beam.sdk.transforms.reflect.DoFnSignature)1 ElementParameter (org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.ElementParameter)1 ProcessContextParameter (org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.ProcessContextParameter)1 SchemaElementParameter (org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.SchemaElementParameter)1