use of org.apache.beam.sdk.annotations.Internal in project beam by apache.
the class ParDo method getDoFnSchemaInformation.
/**
* Extract information on how the DoFn uses schemas. In particular, if the schema of an element
* parameter does not match the input PCollection's schema, convert.
*/
@Internal
public static DoFnSchemaInformation getDoFnSchemaInformation(DoFn<?, ?> fn, PCollection<?> input) {
DoFnSignature signature = DoFnSignatures.getSignature(fn.getClass());
DoFnSignature.ProcessElementMethod processElementMethod = signature.processElement();
if (!processElementMethod.getSchemaElementParameters().isEmpty()) {
if (!input.hasSchema()) {
throw new IllegalArgumentException("Type of @Element must match the DoFn type" + input);
}
}
SchemaRegistry schemaRegistry = input.getPipeline().getSchemaRegistry();
DoFnSchemaInformation doFnSchemaInformation = DoFnSchemaInformation.create();
for (SchemaElementParameter parameter : processElementMethod.getSchemaElementParameters()) {
TypeDescriptor<?> elementT = parameter.elementT();
FieldAccessDescriptor accessDescriptor = getFieldAccessDescriptorFromParameter(parameter.fieldAccessString(), input.getSchema(), signature.fieldAccessDeclarations(), fn);
doFnSchemaInformation = doFnSchemaInformation.withFieldAccessDescriptor(accessDescriptor);
Schema selectedSchema = SelectHelpers.getOutputSchema(input.getSchema(), accessDescriptor);
ConvertHelpers.ConvertedSchemaInformation converted = ConvertHelpers.getConvertedSchemaInformation(selectedSchema, elementT, schemaRegistry);
if (converted.outputSchemaCoder != null) {
doFnSchemaInformation = doFnSchemaInformation.withSelectFromSchemaParameter((SchemaCoder<?>) input.getCoder(), accessDescriptor, selectedSchema, converted.outputSchemaCoder, converted.unboxedType != null);
} else {
// If the selected schema is a Row containing a single primitive type (which is the output
// of Select when selecting a primitive), attempt to unbox it and match against the
// parameter.
checkArgument(converted.unboxedType != null);
doFnSchemaInformation = doFnSchemaInformation.withUnboxPrimitiveParameter((SchemaCoder<?>) input.getCoder(), accessDescriptor, selectedSchema, elementT);
}
}
for (DoFnSignature.Parameter p : processElementMethod.extraParameters()) {
if (p instanceof ProcessContextParameter || p instanceof ElementParameter) {
doFnSchemaInformation = doFnSchemaInformation.withFieldAccessDescriptor(FieldAccessDescriptor.withAllFields());
break;
}
}
return doFnSchemaInformation;
}
use of org.apache.beam.sdk.annotations.Internal in project beam by apache.
the class DataCatalogTableProvider method setSchemaIfNotPresent.
@Internal
public boolean setSchemaIfNotPresent(String resource, Schema schema) {
com.google.cloud.datacatalog.v1beta1.Schema dcSchema = SchemaUtils.toDataCatalog(schema);
Entry entry = dataCatalog.lookupEntry(LookupEntryRequest.newBuilder().setSqlResource(resource).build());
if (entry.getSchema().getColumnsCount() == 0) {
dataCatalog.updateEntry(UpdateEntryRequest.newBuilder().setEntry(entry.toBuilder().setSchema(dcSchema).build()).setUpdateMask(FieldMask.newBuilder().addPaths("schema").build()).build());
return true;
} else {
LOG.info(String.format("Not updating schema for '%s' since it already has one.", resource));
return false;
}
}
use of org.apache.beam.sdk.annotations.Internal in project beam by apache.
the class PubsubOptions method targetForRootUrl.
/**
* Internal only utility for converting {@link #getPubsubRootUrl()} (e.g. {@code https://<host>})
* to an endpoint target, usable by GCP client libraries (e.g. {@code <host>:443})
*/
@Internal
static String targetForRootUrl(String urlString) {
URL url;
try {
url = new URL(urlString);
} catch (MalformedURLException e) {
throw new IllegalArgumentException(String.format("Could not parse pubsub root url \"%s\"", urlString), e);
}
int port = url.getPort();
if (port < 0) {
switch(url.getProtocol()) {
case "https":
port = 443;
break;
case "http":
port = 80;
break;
default:
throw new IllegalArgumentException(String.format("Could not determine port for pubsub root url \"%s\". You must either specify the port or use the protocol \"https\" or \"http\"", urlString));
}
}
return String.format("%s:%d", url.getHost(), port);
}
Aggregations