use of com.google.api.services.dataflow.model.SourceMetadata in project beam by apache.
the class CustomSources method serializeToCloudSource.
public static com.google.api.services.dataflow.model.Source serializeToCloudSource(Source<?> source, PipelineOptions options) throws Exception {
com.google.api.services.dataflow.model.Source cloudSource = new com.google.api.services.dataflow.model.Source();
// We ourselves act as the SourceFormat.
cloudSource.setSpec(CloudObject.forClass(CustomSources.class));
addString(cloudSource.getSpec(), SERIALIZED_SOURCE, encodeBase64String(serializeToByteArray(source)));
SourceMetadata metadata = new SourceMetadata();
if (source instanceof BoundedSource) {
BoundedSource<?> boundedSource = (BoundedSource<?>) source;
// Size estimation is best effort so we continue even if it fails here.
try {
metadata.setEstimatedSizeBytes(boundedSource.getEstimatedSizeBytes(options));
} catch (Exception e) {
LOG.warn("Size estimation of the source failed: " + source, e);
}
} else if (source instanceof UnboundedSource) {
UnboundedSource<?, ?> unboundedSource = (UnboundedSource<?, ?>) source;
metadata.setInfinite(true);
List<String> encodedSplits = new ArrayList<>();
int desiredNumSplits = getDesiredNumUnboundedSourceSplits(options.as(DataflowPipelineOptions.class));
for (UnboundedSource<?, ?> split : unboundedSource.split(desiredNumSplits, options)) {
encodedSplits.add(encodeBase64String(serializeToByteArray(split)));
}
checkArgument(!encodedSplits.isEmpty(), "UnboundedSources must have at least one split");
addStringList(cloudSource.getSpec(), SERIALIZED_SOURCE_SPLITS, encodedSplits);
} else {
throw new IllegalArgumentException("Unexpected source kind: " + source.getClass());
}
cloudSource.setMetadata(metadata);
return cloudSource;
}
Aggregations