use of org.apache.beam.sdk.values.TimestampedValue in project beam by apache.
the class UnboundedReadFromBoundedSourceTest method testReadFromCheckpointBeforeStart.
@Test
public void testReadFromCheckpointBeforeStart() throws Exception {
thrown.expect(NoSuchElementException.class);
BoundedSource<Long> countingSource = CountingSource.upTo(100);
BoundedToUnboundedSourceAdapter<Long> unboundedSource = new BoundedToUnboundedSourceAdapter<>(countingSource);
PipelineOptions options = PipelineOptionsFactory.create();
List<TimestampedValue<Long>> elements = ImmutableList.of(TimestampedValue.of(1L, new Instant(1L)));
Checkpoint<Long> checkpoint = new Checkpoint<>(elements, countingSource);
unboundedSource.createReader(options, checkpoint).getCurrent();
}
use of org.apache.beam.sdk.values.TimestampedValue in project beam by apache.
the class ReduceFnTester method injectElements.
/**
* Inject all the timestamped values (after passing through the window function) as if they
* arrived in a single chunk of a bundle (or work-unit).
*/
@SafeVarargs
public final void injectElements(TimestampedValue<InputT>... values) throws Exception {
for (TimestampedValue<InputT> value : values) {
WindowTracing.trace("TriggerTester.injectElements: {}", value);
}
ReduceFnRunner<String, InputT, OutputT, W> runner = createRunner();
runner.processElements(Iterables.transform(Arrays.asList(values), new Function<TimestampedValue<InputT>, WindowedValue<InputT>>() {
@Override
public WindowedValue<InputT> apply(TimestampedValue<InputT> input) {
try {
InputT value = input.getValue();
Instant timestamp = input.getTimestamp();
Collection<W> windows = windowFn.assignWindows(new TestAssignContext<W>(windowFn, value, timestamp, GlobalWindow.INSTANCE));
return WindowedValue.of(value, timestamp, windows, PaneInfo.NO_FIRING);
} catch (Exception e) {
throw new RuntimeException(e);
}
}
}));
// Persist after each bundle.
runner.persist();
}
use of org.apache.beam.sdk.values.TimestampedValue in project beam by apache.
the class StreamingTransformTranslator method createFromQueue.
private static <T> TransformEvaluator<CreateStream<T>> createFromQueue() {
return new TransformEvaluator<CreateStream<T>>() {
@Override
public void evaluate(CreateStream<T> transform, EvaluationContext context) {
Coder<T> coder = context.getOutput(transform).getCoder();
JavaStreamingContext jssc = context.getStreamingContext();
Queue<Iterable<TimestampedValue<T>>> values = transform.getBatches();
WindowedValue.FullWindowedValueCoder<T> windowCoder = WindowedValue.FullWindowedValueCoder.of(coder, GlobalWindow.Coder.INSTANCE);
// create the DStream from queue.
Queue<JavaRDD<WindowedValue<T>>> rddQueue = new LinkedBlockingQueue<>();
for (Iterable<TimestampedValue<T>> tv : values) {
Iterable<WindowedValue<T>> windowedValues = Iterables.transform(tv, new com.google.common.base.Function<TimestampedValue<T>, WindowedValue<T>>() {
@Override
public WindowedValue<T> apply(@Nonnull TimestampedValue<T> timestampedValue) {
return WindowedValue.of(timestampedValue.getValue(), timestampedValue.getTimestamp(), GlobalWindow.INSTANCE, PaneInfo.NO_FIRING);
}
});
JavaRDD<WindowedValue<T>> rdd = jssc.sparkContext().parallelize(CoderHelpers.toByteArrays(windowedValues, windowCoder)).map(CoderHelpers.fromByteFunction(windowCoder));
rddQueue.offer(rdd);
}
JavaInputDStream<WindowedValue<T>> inputDStream = jssc.queueStream(rddQueue, true);
UnboundedDataset<T> unboundedDataset = new UnboundedDataset<T>(inputDStream, Collections.singletonList(inputDStream.inputDStream().id()));
// add pre-baked Watermarks for the pre-baked batches.
Queue<GlobalWatermarkHolder.SparkWatermarks> times = transform.getTimes();
GlobalWatermarkHolder.addAll(ImmutableMap.of(unboundedDataset.getStreamSources().get(0), times));
context.putDataset(transform, unboundedDataset);
}
@Override
public String toNativeString() {
return "streamingContext.queueStream(...)";
}
};
}
use of org.apache.beam.sdk.values.TimestampedValue in project beam by apache.
the class SplittableDoFnTest method testPairWithIndexWindowedTimestamped.
@Test
@Category({ ValidatesRunner.class, UsesSplittableParDo.class })
public void testPairWithIndexWindowedTimestamped() {
// Tests that Splittable DoFn correctly propagates windowing strategy, windows and timestamps
// of elements in the input collection.
MutableDateTime mutableNow = Instant.now().toMutableDateTime();
mutableNow.setMillisOfSecond(0);
Instant now = mutableNow.toInstant();
Instant nowP1 = now.plus(Duration.standardSeconds(1));
Instant nowP2 = now.plus(Duration.standardSeconds(2));
SlidingWindows windowFn = SlidingWindows.of(Duration.standardSeconds(5)).every(Duration.standardSeconds(1));
PCollection<KV<String, Integer>> res = p.apply(Create.timestamped(TimestampedValue.of("a", now), TimestampedValue.of("bb", nowP1), TimestampedValue.of("ccccc", nowP2))).apply(Window.<String>into(windowFn)).apply(ParDo.of(new PairStringWithIndexToLength())).setCoder(KvCoder.of(StringUtf8Coder.of(), BigEndianIntegerCoder.of()));
assertEquals(windowFn, res.getWindowingStrategy().getWindowFn());
PCollection<TimestampedValue<KV<String, Integer>>> timestamped = res.apply("Reify timestamps", ParDo.of(new ReifyTimestampsFn<KV<String, Integer>>()));
for (int i = 0; i < 4; ++i) {
Instant base = now.minus(Duration.standardSeconds(i));
IntervalWindow window = new IntervalWindow(base, base.plus(Duration.standardSeconds(5)));
List<TimestampedValue<KV<String, Integer>>> expectedUnfiltered = Arrays.asList(TimestampedValue.of(KV.of("a", 0), now), TimestampedValue.of(KV.of("bb", 0), nowP1), TimestampedValue.of(KV.of("bb", 1), nowP1), TimestampedValue.of(KV.of("ccccc", 0), nowP2), TimestampedValue.of(KV.of("ccccc", 1), nowP2), TimestampedValue.of(KV.of("ccccc", 2), nowP2), TimestampedValue.of(KV.of("ccccc", 3), nowP2), TimestampedValue.of(KV.of("ccccc", 4), nowP2));
List<TimestampedValue<KV<String, Integer>>> expected = new ArrayList<>();
for (TimestampedValue<KV<String, Integer>> tv : expectedUnfiltered) {
if (!window.start().isAfter(tv.getTimestamp()) && !tv.getTimestamp().isAfter(window.maxTimestamp())) {
expected.add(tv);
}
}
assertFalse(expected.isEmpty());
PAssert.that(timestamped).inWindow(window).containsInAnyOrder(expected);
}
p.run();
}
use of org.apache.beam.sdk.values.TimestampedValue in project beam by apache.
the class CreateStream method nextBatch.
/**
* Enqueue next micro-batch elements.
* This is backed by a {@link Queue} so stream input order would keep the population order (FIFO).
*/
@SafeVarargs
public final CreateStream<T> nextBatch(TimestampedValue<T>... batchElements) {
// validate timestamps if timestamped elements.
for (TimestampedValue<T> element : batchElements) {
TimestampedValue timestampedValue = (TimestampedValue) element;
checkArgument(timestampedValue.getTimestamp().isBefore(BoundedWindow.TIMESTAMP_MAX_VALUE), "Elements must have timestamps before %s. Got: %s", BoundedWindow.TIMESTAMP_MAX_VALUE, timestampedValue.getTimestamp());
}
batches.offer(Arrays.asList(batchElements));
return this;
}
Aggregations