Search in sources :

Example 1 with TimestampedValue

use of org.apache.beam.sdk.values.TimestampedValue in project beam by apache.

the class UnboundedReadFromBoundedSourceTest method testReadFromCheckpointBeforeStart.

@Test
public void testReadFromCheckpointBeforeStart() throws Exception {
    thrown.expect(NoSuchElementException.class);
    BoundedSource<Long> countingSource = CountingSource.upTo(100);
    BoundedToUnboundedSourceAdapter<Long> unboundedSource = new BoundedToUnboundedSourceAdapter<>(countingSource);
    PipelineOptions options = PipelineOptionsFactory.create();
    List<TimestampedValue<Long>> elements = ImmutableList.of(TimestampedValue.of(1L, new Instant(1L)));
    Checkpoint<Long> checkpoint = new Checkpoint<>(elements, countingSource);
    unboundedSource.createReader(options, checkpoint).getCurrent();
}
Also used : Checkpoint(org.apache.beam.runners.core.construction.UnboundedReadFromBoundedSource.BoundedToUnboundedSourceAdapter.Checkpoint) TimestampedValue(org.apache.beam.sdk.values.TimestampedValue) BoundedToUnboundedSourceAdapter(org.apache.beam.runners.core.construction.UnboundedReadFromBoundedSource.BoundedToUnboundedSourceAdapter) PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) Instant(org.joda.time.Instant) Test(org.junit.Test)

Example 2 with TimestampedValue

use of org.apache.beam.sdk.values.TimestampedValue in project beam by apache.

the class ReduceFnTester method injectElements.

/**
   * Inject all the timestamped values (after passing through the window function) as if they
   * arrived in a single chunk of a bundle (or work-unit).
   */
@SafeVarargs
public final void injectElements(TimestampedValue<InputT>... values) throws Exception {
    for (TimestampedValue<InputT> value : values) {
        WindowTracing.trace("TriggerTester.injectElements: {}", value);
    }
    ReduceFnRunner<String, InputT, OutputT, W> runner = createRunner();
    runner.processElements(Iterables.transform(Arrays.asList(values), new Function<TimestampedValue<InputT>, WindowedValue<InputT>>() {

        @Override
        public WindowedValue<InputT> apply(TimestampedValue<InputT> input) {
            try {
                InputT value = input.getValue();
                Instant timestamp = input.getTimestamp();
                Collection<W> windows = windowFn.assignWindows(new TestAssignContext<W>(windowFn, value, timestamp, GlobalWindow.INSTANCE));
                return WindowedValue.of(value, timestamp, windows, PaneInfo.NO_FIRING);
            } catch (Exception e) {
                throw new RuntimeException(e);
            }
        }
    }));
    // Persist after each bundle.
    runner.persist();
}
Also used : Instant(org.joda.time.Instant) Function(com.google.common.base.Function) TimestampedValue(org.apache.beam.sdk.values.TimestampedValue)

Example 3 with TimestampedValue

use of org.apache.beam.sdk.values.TimestampedValue in project beam by apache.

the class StreamingTransformTranslator method createFromQueue.

private static <T> TransformEvaluator<CreateStream<T>> createFromQueue() {
    return new TransformEvaluator<CreateStream<T>>() {

        @Override
        public void evaluate(CreateStream<T> transform, EvaluationContext context) {
            Coder<T> coder = context.getOutput(transform).getCoder();
            JavaStreamingContext jssc = context.getStreamingContext();
            Queue<Iterable<TimestampedValue<T>>> values = transform.getBatches();
            WindowedValue.FullWindowedValueCoder<T> windowCoder = WindowedValue.FullWindowedValueCoder.of(coder, GlobalWindow.Coder.INSTANCE);
            // create the DStream from queue.
            Queue<JavaRDD<WindowedValue<T>>> rddQueue = new LinkedBlockingQueue<>();
            for (Iterable<TimestampedValue<T>> tv : values) {
                Iterable<WindowedValue<T>> windowedValues = Iterables.transform(tv, new com.google.common.base.Function<TimestampedValue<T>, WindowedValue<T>>() {

                    @Override
                    public WindowedValue<T> apply(@Nonnull TimestampedValue<T> timestampedValue) {
                        return WindowedValue.of(timestampedValue.getValue(), timestampedValue.getTimestamp(), GlobalWindow.INSTANCE, PaneInfo.NO_FIRING);
                    }
                });
                JavaRDD<WindowedValue<T>> rdd = jssc.sparkContext().parallelize(CoderHelpers.toByteArrays(windowedValues, windowCoder)).map(CoderHelpers.fromByteFunction(windowCoder));
                rddQueue.offer(rdd);
            }
            JavaInputDStream<WindowedValue<T>> inputDStream = jssc.queueStream(rddQueue, true);
            UnboundedDataset<T> unboundedDataset = new UnboundedDataset<T>(inputDStream, Collections.singletonList(inputDStream.inputDStream().id()));
            // add pre-baked Watermarks for the pre-baked batches.
            Queue<GlobalWatermarkHolder.SparkWatermarks> times = transform.getTimes();
            GlobalWatermarkHolder.addAll(ImmutableMap.of(unboundedDataset.getStreamSources().get(0), times));
            context.putDataset(transform, unboundedDataset);
        }

        @Override
        public String toNativeString() {
            return "streamingContext.queueStream(...)";
        }
    };
}
Also used : LinkedBlockingQueue(java.util.concurrent.LinkedBlockingQueue) JavaStreamingContext(org.apache.spark.streaming.api.java.JavaStreamingContext) TimestampedValue(org.apache.beam.sdk.values.TimestampedValue) WindowedValue(org.apache.beam.sdk.util.WindowedValue) CreateStream(org.apache.beam.runners.spark.io.CreateStream) TransformEvaluator(org.apache.beam.runners.spark.translation.TransformEvaluator) JavaRDD(org.apache.spark.api.java.JavaRDD) EvaluationContext(org.apache.beam.runners.spark.translation.EvaluationContext)

Example 4 with TimestampedValue

use of org.apache.beam.sdk.values.TimestampedValue in project beam by apache.

the class SplittableDoFnTest method testPairWithIndexWindowedTimestamped.

@Test
@Category({ ValidatesRunner.class, UsesSplittableParDo.class })
public void testPairWithIndexWindowedTimestamped() {
    // Tests that Splittable DoFn correctly propagates windowing strategy, windows and timestamps
    // of elements in the input collection.
    MutableDateTime mutableNow = Instant.now().toMutableDateTime();
    mutableNow.setMillisOfSecond(0);
    Instant now = mutableNow.toInstant();
    Instant nowP1 = now.plus(Duration.standardSeconds(1));
    Instant nowP2 = now.plus(Duration.standardSeconds(2));
    SlidingWindows windowFn = SlidingWindows.of(Duration.standardSeconds(5)).every(Duration.standardSeconds(1));
    PCollection<KV<String, Integer>> res = p.apply(Create.timestamped(TimestampedValue.of("a", now), TimestampedValue.of("bb", nowP1), TimestampedValue.of("ccccc", nowP2))).apply(Window.<String>into(windowFn)).apply(ParDo.of(new PairStringWithIndexToLength())).setCoder(KvCoder.of(StringUtf8Coder.of(), BigEndianIntegerCoder.of()));
    assertEquals(windowFn, res.getWindowingStrategy().getWindowFn());
    PCollection<TimestampedValue<KV<String, Integer>>> timestamped = res.apply("Reify timestamps", ParDo.of(new ReifyTimestampsFn<KV<String, Integer>>()));
    for (int i = 0; i < 4; ++i) {
        Instant base = now.minus(Duration.standardSeconds(i));
        IntervalWindow window = new IntervalWindow(base, base.plus(Duration.standardSeconds(5)));
        List<TimestampedValue<KV<String, Integer>>> expectedUnfiltered = Arrays.asList(TimestampedValue.of(KV.of("a", 0), now), TimestampedValue.of(KV.of("bb", 0), nowP1), TimestampedValue.of(KV.of("bb", 1), nowP1), TimestampedValue.of(KV.of("ccccc", 0), nowP2), TimestampedValue.of(KV.of("ccccc", 1), nowP2), TimestampedValue.of(KV.of("ccccc", 2), nowP2), TimestampedValue.of(KV.of("ccccc", 3), nowP2), TimestampedValue.of(KV.of("ccccc", 4), nowP2));
        List<TimestampedValue<KV<String, Integer>>> expected = new ArrayList<>();
        for (TimestampedValue<KV<String, Integer>> tv : expectedUnfiltered) {
            if (!window.start().isAfter(tv.getTimestamp()) && !tv.getTimestamp().isAfter(window.maxTimestamp())) {
                expected.add(tv);
            }
        }
        assertFalse(expected.isEmpty());
        PAssert.that(timestamped).inWindow(window).containsInAnyOrder(expected);
    }
    p.run();
}
Also used : Instant(org.joda.time.Instant) ArrayList(java.util.ArrayList) MutableDateTime(org.joda.time.MutableDateTime) KV(org.apache.beam.sdk.values.KV) TimestampedValue(org.apache.beam.sdk.values.TimestampedValue) IntervalWindow(org.apache.beam.sdk.transforms.windowing.IntervalWindow) SlidingWindows(org.apache.beam.sdk.transforms.windowing.SlidingWindows) Category(org.junit.experimental.categories.Category) Test(org.junit.Test)

Example 5 with TimestampedValue

use of org.apache.beam.sdk.values.TimestampedValue in project beam by apache.

the class CreateStream method nextBatch.

/**
   * Enqueue next micro-batch elements.
   * This is backed by a {@link Queue} so stream input order would keep the population order (FIFO).
   */
@SafeVarargs
public final CreateStream<T> nextBatch(TimestampedValue<T>... batchElements) {
    // validate timestamps if timestamped elements.
    for (TimestampedValue<T> element : batchElements) {
        TimestampedValue timestampedValue = (TimestampedValue) element;
        checkArgument(timestampedValue.getTimestamp().isBefore(BoundedWindow.TIMESTAMP_MAX_VALUE), "Elements must have timestamps before %s. Got: %s", BoundedWindow.TIMESTAMP_MAX_VALUE, timestampedValue.getTimestamp());
    }
    batches.offer(Arrays.asList(batchElements));
    return this;
}
Also used : TimestampedValue(org.apache.beam.sdk.values.TimestampedValue)

Aggregations

TimestampedValue (org.apache.beam.sdk.values.TimestampedValue)10 Instant (org.joda.time.Instant)8 Test (org.junit.Test)7 Category (org.junit.experimental.categories.Category)3 ArrayList (java.util.ArrayList)2 IntervalWindow (org.apache.beam.sdk.transforms.windowing.IntervalWindow)2 KV (org.apache.beam.sdk.values.KV)2 Function (com.google.common.base.Function)1 File (java.io.File)1 Path (java.nio.file.Path)1 Random (java.util.Random)1 LinkedBlockingQueue (java.util.concurrent.LinkedBlockingQueue)1 DataFileReader (org.apache.avro.file.DataFileReader)1 CompletionCandidate (org.apache.beam.examples.complete.AutoComplete.CompletionCandidate)1 ComputeTopCompletions (org.apache.beam.examples.complete.AutoComplete.ComputeTopCompletions)1 BoundedToUnboundedSourceAdapter (org.apache.beam.runners.core.construction.UnboundedReadFromBoundedSource.BoundedToUnboundedSourceAdapter)1 Checkpoint (org.apache.beam.runners.core.construction.UnboundedReadFromBoundedSource.BoundedToUnboundedSourceAdapter.Checkpoint)1 CreateStream (org.apache.beam.runners.spark.io.CreateStream)1 EvaluationContext (org.apache.beam.runners.spark.translation.EvaluationContext)1 TransformEvaluator (org.apache.beam.runners.spark.translation.TransformEvaluator)1