Search in sources :

Example 1 with DataStreamWriter

use of org.apache.spark.sql.streaming.DataStreamWriter in project beam by apache.

the class AbstractTranslationContext method startPipeline.

// --------------------------------------------------------------------------------------------
// Pipeline methods
// --------------------------------------------------------------------------------------------
/**
 * Starts the pipeline.
 */
public void startPipeline() {
    SparkStructuredStreamingPipelineOptions options = serializablePipelineOptions.get().as(SparkStructuredStreamingPipelineOptions.class);
    int datasetIndex = 0;
    for (Dataset<?> dataset : leaves) {
        if (options.isStreaming()) {
            // TODO: deal with Beam Discarding, Accumulating and Accumulating & Retracting	outputmodes
            // with DatastreamWriter.outputMode
            DataStreamWriter<?> dataStreamWriter = dataset.writeStream();
            // spark sets a default checkpoint dir if not set.
            if (options.getCheckpointDir() != null) {
                dataStreamWriter = dataStreamWriter.option("checkpointLocation", options.getCheckpointDir());
            }
            launchStreaming(dataStreamWriter.foreach(new NoOpForeachWriter<>()));
        } else {
            if (options.getTestMode()) {
                LOG.debug("**** dataset {} catalyst execution plans ****", ++datasetIndex);
                dataset.explain(true);
            }
            // apply a dummy fn just to apply foreach action that will trigger the pipeline run in
            // spark
            dataset.foreach((ForeachFunction) t -> {
            });
        }
    }
}
Also used : SerializablePipelineOptions(org.apache.beam.runners.core.construction.SerializablePipelineOptions) WindowedValue(org.apache.beam.sdk.util.WindowedValue) Dataset(org.apache.spark.sql.Dataset) ForeachFunction(org.apache.spark.api.java.function.ForeachFunction) LoggerFactory(org.slf4j.LoggerFactory) Coder(org.apache.beam.sdk.coders.Coder) HashMap(java.util.HashMap) TransformInputs(org.apache.beam.runners.core.construction.TransformInputs) PTransform(org.apache.beam.sdk.transforms.PTransform) HashSet(java.util.HashSet) EncoderHelpers(org.apache.beam.runners.spark.structuredstreaming.translation.helpers.EncoderHelpers) TupleTag(org.apache.beam.sdk.values.TupleTag) Map(java.util.Map) Iterables(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Iterables) ForeachWriter(org.apache.spark.sql.ForeachWriter) AppliedPTransform(org.apache.beam.sdk.runners.AppliedPTransform) SparkSession(org.apache.spark.sql.SparkSession) Logger(org.slf4j.Logger) SparkConf(org.apache.spark.SparkConf) Set(java.util.Set) PCollection(org.apache.beam.sdk.values.PCollection) Collectors(java.util.stream.Collectors) DataStreamWriter(org.apache.spark.sql.streaming.DataStreamWriter) SparkStructuredStreamingPipelineOptions(org.apache.beam.runners.spark.structuredstreaming.SparkStructuredStreamingPipelineOptions) List(java.util.List) PValue(org.apache.beam.sdk.values.PValue) PCollectionView(org.apache.beam.sdk.values.PCollectionView) VoidCoder(org.apache.beam.sdk.coders.VoidCoder) SuppressFBWarnings(edu.umd.cs.findbugs.annotations.SuppressFBWarnings) SparkStructuredStreamingPipelineOptions(org.apache.beam.runners.spark.structuredstreaming.SparkStructuredStreamingPipelineOptions)

Aggregations

SuppressFBWarnings (edu.umd.cs.findbugs.annotations.SuppressFBWarnings)1 HashMap (java.util.HashMap)1 HashSet (java.util.HashSet)1 List (java.util.List)1 Map (java.util.Map)1 Set (java.util.Set)1 Collectors (java.util.stream.Collectors)1 SerializablePipelineOptions (org.apache.beam.runners.core.construction.SerializablePipelineOptions)1 TransformInputs (org.apache.beam.runners.core.construction.TransformInputs)1 SparkStructuredStreamingPipelineOptions (org.apache.beam.runners.spark.structuredstreaming.SparkStructuredStreamingPipelineOptions)1 EncoderHelpers (org.apache.beam.runners.spark.structuredstreaming.translation.helpers.EncoderHelpers)1 Coder (org.apache.beam.sdk.coders.Coder)1 VoidCoder (org.apache.beam.sdk.coders.VoidCoder)1 AppliedPTransform (org.apache.beam.sdk.runners.AppliedPTransform)1 PTransform (org.apache.beam.sdk.transforms.PTransform)1 WindowedValue (org.apache.beam.sdk.util.WindowedValue)1 PCollection (org.apache.beam.sdk.values.PCollection)1 PCollectionView (org.apache.beam.sdk.values.PCollectionView)1 PValue (org.apache.beam.sdk.values.PValue)1 TupleTag (org.apache.beam.sdk.values.TupleTag)1