Search in sources :

Example 76 with DoFn

use of org.apache.beam.sdk.transforms.DoFn in project DataflowJavaSDK-examples by GoogleCloudPlatform.

the class GameStats method main.

public static void main(String[] args) throws Exception {
    Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
    // Enforce that this pipeline is always run in streaming mode.
    options.setStreaming(true);
    ExampleUtils exampleUtils = new ExampleUtils(options);
    Pipeline pipeline = Pipeline.create(options);
    // Read Events from Pub/Sub using custom timestamps
    PCollection<GameActionInfo> rawEvents = pipeline.apply(PubsubIO.readStrings().withTimestampAttribute(TIMESTAMP_ATTRIBUTE).fromTopic(options.getTopic())).apply("ParseGameEvent", ParDo.of(new ParseEventFn()));
    // Extract username/score pairs from the event stream
    PCollection<KV<String, Integer>> userEvents = rawEvents.apply("ExtractUserScore", MapElements.into(TypeDescriptors.kvs(TypeDescriptors.strings(), TypeDescriptors.integers())).via((GameActionInfo gInfo) -> KV.of(gInfo.getUser(), gInfo.getScore())));
    // Calculate the total score per user over fixed windows, and
    // cumulative updates for late data.
    final PCollectionView<Map<String, Integer>> spammersView = userEvents.apply("FixedWindowsUser", Window.<KV<String, Integer>>into(FixedWindows.of(Duration.standardMinutes(options.getFixedWindowDuration())))).apply("CalculateSpammyUsers", new CalculateSpammyUsers()).apply("CreateSpammersView", View.<String, Integer>asMap());
    // [START DocInclude_FilterAndCalc]
    // Calculate the total score per team over fixed windows,
    // and emit cumulative updates for late data. Uses the side input derived above-- the set of
    // suspected robots-- to filter out scores from those users from the sum.
    // Write the results to BigQuery.
    rawEvents.apply("WindowIntoFixedWindows", Window.<GameActionInfo>into(FixedWindows.of(Duration.standardMinutes(options.getFixedWindowDuration())))).apply("FilterOutSpammers", ParDo.of(new DoFn<GameActionInfo, GameActionInfo>() {

        @ProcessElement
        public void processElement(ProcessContext c) {
            // If the user is not in the spammers Map, output the data element.
            if (c.sideInput(spammersView).get(c.element().getUser().trim()) == null) {
                c.output(c.element());
            }
        }
    }).withSideInputs(spammersView)).apply("ExtractTeamScore", new ExtractAndSumScore("team")).apply("WriteTeamSums", new WriteWindowedToBigQuery<KV<String, Integer>>(options.as(GcpOptions.class).getProject(), options.getDataset(), options.getGameStatsTablePrefix() + "_team", configureWindowedWrite()));
    // [START DocInclude_SessionCalc]
    // Detect user sessions-- that is, a burst of activity separated by a gap from further
    // activity. Find and record the mean session lengths.
    // This information could help the game designers track the changing user engagement
    // as their set of games changes.
    userEvents.apply("WindowIntoSessions", Window.<KV<String, Integer>>into(Sessions.withGapDuration(Duration.standardMinutes(options.getSessionGap()))).withTimestampCombiner(TimestampCombiner.END_OF_WINDOW)).apply(Combine.perKey(x -> 0)).apply("UserSessionActivity", ParDo.of(new UserSessionInfoFn())).apply("WindowToExtractSessionMean", Window.<Integer>into(FixedWindows.of(Duration.standardMinutes(options.getUserActivityWindowDuration())))).apply(Mean.<Integer>globally().withoutDefaults()).apply("WriteAvgSessionLength", new WriteWindowedToBigQuery<Double>(options.as(GcpOptions.class).getProject(), options.getDataset(), options.getGameStatsTablePrefix() + "_sessions", configureSessionWindowWrite()));
    // [END DocInclude_Rewindow]
    // Run the pipeline and wait for the pipeline to finish; capture cancellation requests from the
    // command line.
    PipelineResult result = pipeline.run();
    exampleUtils.waitToFinish(result);
}
Also used : KV(org.apache.beam.sdk.values.KV) DateTimeZone(org.joda.time.DateTimeZone) TimestampCombiner(org.apache.beam.sdk.transforms.windowing.TimestampCombiner) PipelineResult(org.apache.beam.sdk.PipelineResult) Default(org.apache.beam.sdk.options.Default) Combine(org.apache.beam.sdk.transforms.Combine) Duration(org.joda.time.Duration) LoggerFactory(org.slf4j.LoggerFactory) HashMap(java.util.HashMap) View(org.apache.beam.sdk.transforms.View) PipelineOptionsFactory(org.apache.beam.sdk.options.PipelineOptionsFactory) Metrics(org.apache.beam.sdk.metrics.Metrics) Description(org.apache.beam.sdk.options.Description) PTransform(org.apache.beam.sdk.transforms.PTransform) Sessions(org.apache.beam.sdk.transforms.windowing.Sessions) Map(java.util.Map) Window(org.apache.beam.sdk.transforms.windowing.Window) WriteWindowedToBigQuery(com.google.cloud.dataflow.examples.complete.game.utils.WriteWindowedToBigQuery) Pipeline(org.apache.beam.sdk.Pipeline) DoFn(org.apache.beam.sdk.transforms.DoFn) MapElements(org.apache.beam.sdk.transforms.MapElements) DateTimeFormat(org.joda.time.format.DateTimeFormat) Logger(org.slf4j.Logger) GcpOptions(org.apache.beam.sdk.extensions.gcp.options.GcpOptions) DateTimeFormatter(org.joda.time.format.DateTimeFormatter) TimeZone(java.util.TimeZone) Counter(org.apache.beam.sdk.metrics.Counter) Sum(org.apache.beam.sdk.transforms.Sum) FixedWindows(org.apache.beam.sdk.transforms.windowing.FixedWindows) PCollection(org.apache.beam.sdk.values.PCollection) Mean(org.apache.beam.sdk.transforms.Mean) ExampleUtils(com.google.cloud.dataflow.examples.common.ExampleUtils) PubsubIO(org.apache.beam.sdk.io.gcp.pubsub.PubsubIO) ParDo(org.apache.beam.sdk.transforms.ParDo) PCollectionView(org.apache.beam.sdk.values.PCollectionView) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) TypeDescriptors(org.apache.beam.sdk.values.TypeDescriptors) Instant(org.joda.time.Instant) IntervalWindow(org.apache.beam.sdk.transforms.windowing.IntervalWindow) Values(org.apache.beam.sdk.transforms.Values) GcpOptions(org.apache.beam.sdk.extensions.gcp.options.GcpOptions) ExampleUtils(com.google.cloud.dataflow.examples.common.ExampleUtils) PipelineResult(org.apache.beam.sdk.PipelineResult) KV(org.apache.beam.sdk.values.KV) Pipeline(org.apache.beam.sdk.Pipeline) GcpOptions(org.apache.beam.sdk.extensions.gcp.options.GcpOptions) HashMap(java.util.HashMap) Map(java.util.Map)

Example 77 with DoFn

use of org.apache.beam.sdk.transforms.DoFn in project beam by apache.

the class WriteFiles method createWrite.

/**
   * A write is performed as sequence of three {@link ParDo}'s.
   *
   * <p>This singleton collection containing the WriteOperation is then used as a side
   * input to a ParDo over the PCollection of elements to write. In this bundle-writing phase,
   * {@link WriteOperation#createWriter} is called to obtain a {@link Writer}.
   * {@link Writer#open} and {@link Writer#close} are called in
   * {@link DoFn.StartBundle} and {@link DoFn.FinishBundle}, respectively, and
   * {@link Writer#write} method is called for every element in the bundle. The output
   * of this ParDo is a PCollection of <i>writer result</i> objects (see {@link FileBasedSink}
   * for a description of writer results)-one for each bundle.
   *
   * <p>The final do-once ParDo uses a singleton collection asinput and the collection of writer
   * results as a side-input. In this ParDo, {@link WriteOperation#finalize} is called
   * to finalize the write.
   *
   * <p>If the write of any element in the PCollection fails, {@link Writer#close} will be
   * called before the exception that caused the write to fail is propagated and the write result
   * will be discarded.
   *
   * <p>Since the {@link WriteOperation} is serialized after the initialization ParDo and
   * deserialized in the bundle-writing and finalization phases, any state change to the
   * WriteOperation object that occurs during initialization is visible in the latter
   * phases. However, the WriteOperation is not serialized after the bundle-writing
   * phase. This is why implementations should guarantee that
   * {@link WriteOperation#createWriter} does not mutate WriteOperation).
   */
private PDone createWrite(PCollection<T> input) {
    Pipeline p = input.getPipeline();
    if (!windowedWrites) {
        // Re-window the data into the global window and remove any existing triggers.
        input = input.apply(Window.<T>into(new GlobalWindows()).triggering(DefaultTrigger.of()).discardingFiredPanes());
    }
    // Perform the per-bundle writes as a ParDo on the input PCollection (with the
    // WriteOperation as a side input) and collect the results of the writes in a
    // PCollection. There is a dependency between this ParDo and the first (the
    // WriteOperation PCollection as a side input), so this will happen after the
    // initial ParDo.
    PCollection<FileResult> results;
    final PCollectionView<Integer> numShardsView;
    Coder<BoundedWindow> shardedWindowCoder = (Coder<BoundedWindow>) input.getWindowingStrategy().getWindowFn().windowCoder();
    if (computeNumShards == null && numShardsProvider == null) {
        numShardsView = null;
        results = input.apply("WriteBundles", ParDo.of(windowedWrites ? new WriteWindowedBundles() : new WriteUnwindowedBundles()));
    } else {
        List<PCollectionView<?>> sideInputs = Lists.newArrayList();
        if (computeNumShards != null) {
            numShardsView = input.apply(computeNumShards);
            sideInputs.add(numShardsView);
        } else {
            numShardsView = null;
        }
        PCollection<KV<Integer, Iterable<T>>> sharded = input.apply("ApplyShardLabel", ParDo.of(new ApplyShardingKey<T>(numShardsView, (numShardsView != null) ? null : numShardsProvider)).withSideInputs(sideInputs)).apply("GroupIntoShards", GroupByKey.<Integer, T>create());
        shardedWindowCoder = (Coder<BoundedWindow>) sharded.getWindowingStrategy().getWindowFn().windowCoder();
        results = sharded.apply("WriteShardedBundles", ParDo.of(new WriteShardedBundles()));
    }
    results.setCoder(FileResultCoder.of(shardedWindowCoder));
    if (windowedWrites) {
        // When processing streaming windowed writes, results will arrive multiple times. This
        // means we can't share the below implementation that turns the results into a side input,
        // as new data arriving into a side input does not trigger the listening DoFn. Instead
        // we aggregate the result set using a singleton GroupByKey, so the DoFn will be triggered
        // whenever new data arrives.
        PCollection<KV<Void, FileResult>> keyedResults = results.apply("AttachSingletonKey", WithKeys.<Void, FileResult>of((Void) null));
        keyedResults.setCoder(KvCoder.of(VoidCoder.of(), FileResultCoder.of(shardedWindowCoder)));
        // Is the continuation trigger sufficient?
        keyedResults.apply("FinalizeGroupByKey", GroupByKey.<Void, FileResult>create()).apply("Finalize", ParDo.of(new DoFn<KV<Void, Iterable<FileResult>>, Integer>() {

            @ProcessElement
            public void processElement(ProcessContext c) throws Exception {
                LOG.info("Finalizing write operation {}.", writeOperation);
                List<FileResult> results = Lists.newArrayList(c.element().getValue());
                writeOperation.finalize(results);
                LOG.debug("Done finalizing write operation");
            }
        }));
    } else {
        final PCollectionView<Iterable<FileResult>> resultsView = results.apply(View.<FileResult>asIterable());
        ImmutableList.Builder<PCollectionView<?>> sideInputs = ImmutableList.<PCollectionView<?>>builder().add(resultsView);
        if (numShardsView != null) {
            sideInputs.add(numShardsView);
        }
        // Finalize the write in another do-once ParDo on the singleton collection containing the
        // Writer. The results from the per-bundle writes are given as an Iterable side input.
        // The WriteOperation's state is the same as after its initialization in the first
        // do-once ParDo. There is a dependency between this ParDo and the parallel write (the writer
        // results collection as a side input), so it will happen after the parallel write.
        // For the non-windowed case, we guarantee that  if no data is written but the user has
        // set numShards, then all shards will be written out as empty files. For this reason we
        // use a side input here.
        PCollection<Void> singletonCollection = p.apply(Create.of((Void) null));
        singletonCollection.apply("Finalize", ParDo.of(new DoFn<Void, Integer>() {

            @ProcessElement
            public void processElement(ProcessContext c) throws Exception {
                LOG.info("Finalizing write operation {}.", writeOperation);
                List<FileResult> results = Lists.newArrayList(c.sideInput(resultsView));
                LOG.debug("Side input initialized to finalize write operation {}.", writeOperation);
                // We must always output at least 1 shard, and honor user-specified numShards if
                // set.
                int minShardsNeeded;
                if (numShardsView != null) {
                    minShardsNeeded = c.sideInput(numShardsView);
                } else if (numShardsProvider != null) {
                    minShardsNeeded = numShardsProvider.get();
                } else {
                    minShardsNeeded = 1;
                }
                int extraShardsNeeded = minShardsNeeded - results.size();
                if (extraShardsNeeded > 0) {
                    LOG.info("Creating {} empty output shards in addition to {} written for a total of {}.", extraShardsNeeded, results.size(), minShardsNeeded);
                    for (int i = 0; i < extraShardsNeeded; ++i) {
                        Writer<T> writer = writeOperation.createWriter();
                        writer.openUnwindowed(UUID.randomUUID().toString(), UNKNOWN_SHARDNUM);
                        FileResult emptyWrite = writer.close();
                        results.add(emptyWrite);
                    }
                    LOG.debug("Done creating extra shards.");
                }
                writeOperation.finalize(results);
                LOG.debug("Done finalizing write operation {}", writeOperation);
            }
        }).withSideInputs(sideInputs.build()));
    }
    return PDone.in(input.getPipeline());
}
Also used : ImmutableList(com.google.common.collect.ImmutableList) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) Coder(org.apache.beam.sdk.coders.Coder) KvCoder(org.apache.beam.sdk.coders.KvCoder) FileResultCoder(org.apache.beam.sdk.io.FileBasedSink.FileResultCoder) VoidCoder(org.apache.beam.sdk.coders.VoidCoder) GlobalWindows(org.apache.beam.sdk.transforms.windowing.GlobalWindows) KV(org.apache.beam.sdk.values.KV) Pipeline(org.apache.beam.sdk.Pipeline) PCollectionView(org.apache.beam.sdk.values.PCollectionView) DoFn(org.apache.beam.sdk.transforms.DoFn) FileResult(org.apache.beam.sdk.io.FileBasedSink.FileResult) Writer(org.apache.beam.sdk.io.FileBasedSink.Writer)

Example 78 with DoFn

use of org.apache.beam.sdk.transforms.DoFn in project beam by apache.

the class ReadSourceITCase method runProgram.

private static void runProgram(String resultPath) throws Exception {
    Pipeline p = FlinkTestPipeline.createForBatch();
    PCollection<String> result = p.apply(GenerateSequence.from(0).to(10)).apply(ParDo.of(new DoFn<Long, String>() {

        @ProcessElement
        public void processElement(ProcessContext c) throws Exception {
            c.output(c.element().toString());
        }
    }));
    result.apply(TextIO.write().to(new URI(resultPath).getPath() + "/part"));
    p.run();
}
Also used : DoFn(org.apache.beam.sdk.transforms.DoFn) URI(java.net.URI) Pipeline(org.apache.beam.sdk.Pipeline)

Example 79 with DoFn

use of org.apache.beam.sdk.transforms.DoFn in project beam by apache.

the class DoFnSignatures method analyzeExtraParameter.

private static Parameter analyzeExtraParameter(ErrorReporter methodErrors, FnAnalysisContext fnContext, MethodAnalysisContext methodContext, TypeDescriptor<? extends DoFn<?, ?>> fnClass, ParameterDescription param, TypeDescriptor<?> inputT, TypeDescriptor<?> outputT) {
    TypeDescriptor<?> expectedProcessContextT = doFnProcessContextTypeOf(inputT, outputT);
    TypeDescriptor<?> expectedOnTimerContextT = doFnOnTimerContextTypeOf(inputT, outputT);
    TypeDescriptor<?> paramT = param.getType();
    Class<?> rawType = paramT.getRawType();
    ErrorReporter paramErrors = methodErrors.forParameter(param);
    if (rawType.equals(DoFn.ProcessContext.class)) {
        paramErrors.checkArgument(paramT.equals(expectedProcessContextT), "ProcessContext argument must have type %s", formatType(expectedProcessContextT));
        return Parameter.processContext();
    } else if (rawType.equals(DoFn.OnTimerContext.class)) {
        paramErrors.checkArgument(paramT.equals(expectedOnTimerContextT), "OnTimerContext argument must have type %s", formatType(expectedOnTimerContextT));
        return Parameter.onTimerContext();
    } else if (BoundedWindow.class.isAssignableFrom(rawType)) {
        methodErrors.checkArgument(!methodContext.hasWindowParameter(), "Multiple %s parameters", BoundedWindow.class.getSimpleName());
        return Parameter.boundedWindow((TypeDescriptor<? extends BoundedWindow>) paramT);
    } else if (RestrictionTracker.class.isAssignableFrom(rawType)) {
        methodErrors.checkArgument(!methodContext.hasRestrictionTrackerParameter(), "Multiple %s parameters", RestrictionTracker.class.getSimpleName());
        return Parameter.restrictionTracker(paramT);
    } else if (rawType.equals(Timer.class)) {
        // m.getParameters() is not available until Java 8
        String id = getTimerId(param.getAnnotations());
        paramErrors.checkArgument(id != null, "%s missing %s annotation", Timer.class.getSimpleName(), TimerId.class.getSimpleName());
        paramErrors.checkArgument(!methodContext.getTimerParameters().containsKey(id), "duplicate %s: \"%s\"", TimerId.class.getSimpleName(), id);
        TimerDeclaration timerDecl = fnContext.getTimerDeclarations().get(id);
        paramErrors.checkArgument(timerDecl != null, "reference to undeclared %s: \"%s\"", TimerId.class.getSimpleName(), id);
        paramErrors.checkArgument(timerDecl.field().getDeclaringClass().equals(param.getMethod().getDeclaringClass()), "%s %s declared in a different class %s." + " Timers may be referenced only in the lexical scope where they are declared.", TimerId.class.getSimpleName(), id, timerDecl.field().getDeclaringClass().getName());
        return Parameter.timerParameter(timerDecl);
    } else if (State.class.isAssignableFrom(rawType)) {
        // m.getParameters() is not available until Java 8
        String id = getStateId(param.getAnnotations());
        paramErrors.checkArgument(id != null, "missing %s annotation", DoFn.StateId.class.getSimpleName());
        paramErrors.checkArgument(!methodContext.getStateParameters().containsKey(id), "duplicate %s: \"%s\"", DoFn.StateId.class.getSimpleName(), id);
        // By static typing this is already a well-formed State subclass
        TypeDescriptor<? extends State> stateType = (TypeDescriptor<? extends State>) param.getType();
        StateDeclaration stateDecl = fnContext.getStateDeclarations().get(id);
        paramErrors.checkArgument(stateDecl != null, "reference to undeclared %s: \"%s\"", DoFn.StateId.class.getSimpleName(), id);
        paramErrors.checkArgument(stateDecl.stateType().equals(stateType), "reference to %s %s with different type %s", StateId.class.getSimpleName(), id, formatType(stateDecl.stateType()));
        paramErrors.checkArgument(stateDecl.field().getDeclaringClass().equals(param.getMethod().getDeclaringClass()), "%s %s declared in a different class %s." + " State may be referenced only in the class where it is declared.", StateId.class.getSimpleName(), id, stateDecl.field().getDeclaringClass().getName());
        return Parameter.stateParameter(stateDecl);
    } else {
        List<String> allowedParamTypes = Arrays.asList(formatType(new TypeDescriptor<BoundedWindow>() {
        }), formatType(new TypeDescriptor<RestrictionTracker<?>>() {
        }));
        paramErrors.throwIllegalArgument("%s is not a valid context parameter. Should be one of %s", formatType(paramT), allowedParamTypes);
        // Unreachable
        return null;
    }
}
Also used : RestrictionTracker(org.apache.beam.sdk.transforms.splittabledofn.RestrictionTracker) TimerId(org.apache.beam.sdk.transforms.DoFn.TimerId) TimerDeclaration(org.apache.beam.sdk.transforms.reflect.DoFnSignature.TimerDeclaration) DoFn(org.apache.beam.sdk.transforms.DoFn) Timer(org.apache.beam.sdk.state.Timer) TypeDescriptor(org.apache.beam.sdk.values.TypeDescriptor) State(org.apache.beam.sdk.state.State) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) StateDeclaration(org.apache.beam.sdk.transforms.reflect.DoFnSignature.StateDeclaration)

Example 80 with DoFn

use of org.apache.beam.sdk.transforms.DoFn in project beam by apache.

the class BigQueryIOTest method testReadFromTable.

@Test
public void testReadFromTable() throws IOException, InterruptedException {
    BigQueryOptions bqOptions = TestPipeline.testingPipelineOptions().as(BigQueryOptions.class);
    bqOptions.setProject("defaultproject");
    bqOptions.setTempLocation(testFolder.newFolder("BigQueryIOTest").getAbsolutePath());
    Job job = new Job();
    JobStatus status = new JobStatus();
    job.setStatus(status);
    JobStatistics jobStats = new JobStatistics();
    job.setStatistics(jobStats);
    JobStatistics4 extract = new JobStatistics4();
    jobStats.setExtract(extract);
    extract.setDestinationUriFileCounts(ImmutableList.of(1L));
    Table sometable = new Table();
    sometable.setSchema(new TableSchema().setFields(ImmutableList.of(new TableFieldSchema().setName("name").setType("STRING"), new TableFieldSchema().setName("number").setType("INTEGER"))));
    sometable.setTableReference(new TableReference().setProjectId("non-executing-project").setDatasetId("somedataset").setTableId("sometable"));
    sometable.setNumBytes(1024L * 1024L);
    FakeDatasetService fakeDatasetService = new FakeDatasetService();
    fakeDatasetService.createDataset("non-executing-project", "somedataset", "", "");
    fakeDatasetService.createTable(sometable);
    List<TableRow> records = Lists.newArrayList(new TableRow().set("name", "a").set("number", 1L), new TableRow().set("name", "b").set("number", 2L), new TableRow().set("name", "c").set("number", 3L));
    fakeDatasetService.insertAll(sometable.getTableReference(), records, null);
    FakeBigQueryServices fakeBqServices = new FakeBigQueryServices().withJobService(new FakeJobService()).withDatasetService(fakeDatasetService);
    Pipeline p = TestPipeline.create(bqOptions);
    PCollection<KV<String, Long>> output = p.apply(BigQueryIO.read().from("non-executing-project:somedataset.sometable").withTestServices(fakeBqServices).withoutValidation()).apply(ParDo.of(new DoFn<TableRow, KV<String, Long>>() {

        @ProcessElement
        public void processElement(ProcessContext c) throws Exception {
            c.output(KV.of((String) c.element().get("name"), Long.valueOf((String) c.element().get("number"))));
        }
    }));
    PAssert.that(output).containsInAnyOrder(ImmutableList.of(KV.of("a", 1L), KV.of("b", 2L), KV.of("c", 3L)));
    p.run();
}
Also used : JobStatistics(com.google.api.services.bigquery.model.JobStatistics) HashBasedTable(com.google.common.collect.HashBasedTable) Table(com.google.api.services.bigquery.model.Table) JobStatistics4(com.google.api.services.bigquery.model.JobStatistics4) TableSchema(com.google.api.services.bigquery.model.TableSchema) JsonSchemaToTableSchema(org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.JsonSchemaToTableSchema) BigQueryHelpers.toJsonString(org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.toJsonString) KV(org.apache.beam.sdk.values.KV) TableFieldSchema(com.google.api.services.bigquery.model.TableFieldSchema) TestPipeline(org.apache.beam.sdk.testing.TestPipeline) Pipeline(org.apache.beam.sdk.Pipeline) JobStatus(com.google.api.services.bigquery.model.JobStatus) BigQueryHelpers.createTempTableReference(org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.createTempTableReference) TableReference(com.google.api.services.bigquery.model.TableReference) DoFn(org.apache.beam.sdk.transforms.DoFn) TableRow(com.google.api.services.bigquery.model.TableRow) Job(com.google.api.services.bigquery.model.Job) Test(org.junit.Test)

Aggregations

DoFn (org.apache.beam.sdk.transforms.DoFn)154 Test (org.junit.Test)98 Pipeline (org.apache.beam.sdk.Pipeline)60 KV (org.apache.beam.sdk.values.KV)45 TupleTag (org.apache.beam.sdk.values.TupleTag)28 StateSpec (org.apache.beam.sdk.state.StateSpec)26 Instant (org.joda.time.Instant)26 ArrayList (java.util.ArrayList)23 TestPipeline (org.apache.beam.sdk.testing.TestPipeline)23 BoundedWindow (org.apache.beam.sdk.transforms.windowing.BoundedWindow)22 PCollection (org.apache.beam.sdk.values.PCollection)21 TimerSpec (org.apache.beam.sdk.state.TimerSpec)19 WindowedValue (org.apache.beam.sdk.util.WindowedValue)18 PCollectionView (org.apache.beam.sdk.values.PCollectionView)18 HashMap (java.util.HashMap)17 Coder (org.apache.beam.sdk.coders.Coder)17 List (java.util.List)16 Map (java.util.Map)14 ValueState (org.apache.beam.sdk.state.ValueState)14 RunnerApi (org.apache.beam.model.pipeline.v1.RunnerApi)13