use of org.apache.beam.runners.dataflow.internal.CustomSources in project beam by apache.
the class WorkerCustomSources method serializeSplitToCloudSource.
/**
* Version of {@link CustomSources#serializeToCloudSource(Source, PipelineOptions)} intended for
* use on splits of {@link BoundedSource}.
*/
private static com.google.api.services.dataflow.model.Source serializeSplitToCloudSource(BoundedSource<?> source) throws Exception {
com.google.api.services.dataflow.model.Source cloudSource = new com.google.api.services.dataflow.model.Source();
cloudSource.setSpec(CloudObject.forClass(CustomSources.class));
addString(cloudSource.getSpec(), SERIALIZED_SOURCE, encodeBase64String(serializeToByteArray(source)));
SourceMetadata metadata = new SourceMetadata();
// Size estimation is best effort so we continue even if it fails here.
try {
long estimatedSize = source.getEstimatedSizeBytes(PipelineOptionsFactory.create());
if (estimatedSize >= 0) {
metadata.setEstimatedSizeBytes(estimatedSize);
} else {
LOG.warn("Ignoring negative estimated size {} produced by source {}", estimatedSize, source);
}
} catch (Exception e) {
LOG.warn("Size estimation of the source failed: " + source, e);
}
cloudSource.setMetadata(metadata);
return cloudSource;
}
Aggregations