Search in sources :

Example 1 with SourceMetadata

use of com.google.api.services.dataflow.model.SourceMetadata in project beam by apache.

the class CustomSources method serializeToCloudSource.

public static com.google.api.services.dataflow.model.Source serializeToCloudSource(Source<?> source, PipelineOptions options) throws Exception {
    com.google.api.services.dataflow.model.Source cloudSource = new com.google.api.services.dataflow.model.Source();
    // We ourselves act as the SourceFormat.
    cloudSource.setSpec(CloudObject.forClass(CustomSources.class));
    addString(cloudSource.getSpec(), SERIALIZED_SOURCE, encodeBase64String(serializeToByteArray(source)));
    SourceMetadata metadata = new SourceMetadata();
    if (source instanceof BoundedSource) {
        BoundedSource<?> boundedSource = (BoundedSource<?>) source;
        // Size estimation is best effort so we continue even if it fails here.
        try {
            metadata.setEstimatedSizeBytes(boundedSource.getEstimatedSizeBytes(options));
        } catch (Exception e) {
            LOG.warn("Size estimation of the source failed: " + source, e);
        }
    } else if (source instanceof UnboundedSource) {
        UnboundedSource<?, ?> unboundedSource = (UnboundedSource<?, ?>) source;
        metadata.setInfinite(true);
        List<String> encodedSplits = new ArrayList<>();
        int desiredNumSplits = getDesiredNumUnboundedSourceSplits(options.as(DataflowPipelineOptions.class));
        for (UnboundedSource<?, ?> split : unboundedSource.split(desiredNumSplits, options)) {
            encodedSplits.add(encodeBase64String(serializeToByteArray(split)));
        }
        checkArgument(!encodedSplits.isEmpty(), "UnboundedSources must have at least one split");
        addStringList(cloudSource.getSpec(), SERIALIZED_SOURCE_SPLITS, encodedSplits);
    } else {
        throw new IllegalArgumentException("Unexpected source kind: " + source.getClass());
    }
    cloudSource.setMetadata(metadata);
    return cloudSource;
}
Also used : BoundedSource(org.apache.beam.sdk.io.BoundedSource) SourceMetadata(com.google.api.services.dataflow.model.SourceMetadata) UnboundedSource(org.apache.beam.sdk.io.UnboundedSource) Source(org.apache.beam.sdk.io.Source) BoundedSource(org.apache.beam.sdk.io.BoundedSource) Structs.addStringList(org.apache.beam.runners.dataflow.util.Structs.addStringList) ArrayList(java.util.ArrayList) List(java.util.List) UnboundedSource(org.apache.beam.sdk.io.UnboundedSource)

Aggregations

SourceMetadata (com.google.api.services.dataflow.model.SourceMetadata)1 ArrayList (java.util.ArrayList)1 List (java.util.List)1 Structs.addStringList (org.apache.beam.runners.dataflow.util.Structs.addStringList)1 BoundedSource (org.apache.beam.sdk.io.BoundedSource)1 Source (org.apache.beam.sdk.io.Source)1 UnboundedSource (org.apache.beam.sdk.io.UnboundedSource)1