use of org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Stopwatch in project beam by apache.
the class WatermarkSyncedDStream method compute.
@Override
public scala.Option<RDD<WindowedValue<T>>> compute(final Time validTime) {
final long batchTime = validTime.milliseconds();
LOG.trace("BEFORE waiting for watermark sync, " + "LastWatermarkedBatchTime: {}, current batch time: {}", GlobalWatermarkHolder.getLastWatermarkedBatchTime(), batchTime);
final Stopwatch stopwatch = Stopwatch.createStarted();
awaitWatermarkSyncWith(batchTime);
stopwatch.stop();
LOG.info("Waited {} millis for watermarks to sync up with the current batch ({})", stopwatch.elapsed(TimeUnit.MILLISECONDS), batchTime);
LOG.info("Watermarks are now: {}", GlobalWatermarkHolder.get(batchDuration));
LOG.trace("AFTER waiting for watermark sync, " + "LastWatermarkedBatchTime: {}, current batch time: {}", GlobalWatermarkHolder.getLastWatermarkedBatchTime(), batchTime);
final RDD<WindowedValue<T>> rdd = generateRdd();
isFirst = false;
return scala.Option.apply(rdd);
}
use of org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Stopwatch in project beam by apache.
the class StateSpecFunctions method mapSourceFunction.
/**
* A {@link org.apache.spark.streaming.StateSpec} function to support reading from an {@link
* UnboundedSource}.
*
* <p>This StateSpec function expects the following:
*
* <ul>
* <li>Key: The (partitioned) Source to read from.
* <li>Value: An optional {@link UnboundedSource.CheckpointMark} to start from.
* <li>State: A byte representation of the (previously) persisted CheckpointMark.
* </ul>
*
* And returns an iterator over all read values (for the micro-batch).
*
* <p>This stateful operation could be described as a flatMap over a single-element stream, which
* outputs all the elements read from the {@link UnboundedSource} for this micro-batch. Since
* micro-batches are bounded, the provided UnboundedSource is wrapped by a {@link
* MicrobatchSource} that applies bounds in the form of duration and max records (per
* micro-batch).
*
* <p>In order to avoid using Spark Guava's classes which pollute the classpath, we use the {@link
* StateSpec#function(scala.Function3)} signature which employs scala's native {@link
* scala.Option}, instead of the {@link
* StateSpec#function(org.apache.spark.api.java.function.Function3)} signature, which employs
* Guava's {@link Optional}.
*
* <p>See also <a href="https://issues.apache.org/jira/browse/SPARK-4819">SPARK-4819</a>.
*
* @param options A serializable {@link SerializablePipelineOptions}.
* @param <T> The type of the input stream elements.
* @param <CheckpointMarkT> The type of the {@link UnboundedSource.CheckpointMark}.
* @return The appropriate {@link org.apache.spark.streaming.StateSpec} function.
*/
public static <T, CheckpointMarkT extends UnboundedSource.CheckpointMark> scala.Function3<Source<T>, Option<CheckpointMarkT>, State<Tuple2<byte[], Instant>>, Tuple2<Iterable<byte[]>, Metadata>> mapSourceFunction(final SerializablePipelineOptions options, final String stepName) {
return new SerializableFunction3<Source<T>, Option<CheckpointMarkT>, State<Tuple2<byte[], Instant>>, Tuple2<Iterable<byte[]>, Metadata>>() {
@Override
public Tuple2<Iterable<byte[]>, Metadata> apply(Source<T> source, Option<CheckpointMarkT> startCheckpointMark, State<Tuple2<byte[], Instant>> state) {
MetricsContainerStepMap metricsContainers = new MetricsContainerStepMap();
MetricsContainer metricsContainer = metricsContainers.getContainer(stepName);
// since they may report metrics.
try (Closeable ignored = MetricsEnvironment.scopedMetricsContainer(metricsContainer)) {
// source as MicrobatchSource
MicrobatchSource<T, CheckpointMarkT> microbatchSource = (MicrobatchSource<T, CheckpointMarkT>) source;
// Initial high/low watermarks.
Instant lowWatermark = BoundedWindow.TIMESTAMP_MIN_VALUE;
final Instant highWatermark;
// if state exists, use it, otherwise it's first time so use the startCheckpointMark.
// startCheckpointMark may be EmptyCheckpointMark (the Spark Java API tries to apply
// Optional(null)), which is handled by the UnboundedSource implementation.
Coder<CheckpointMarkT> checkpointCoder = microbatchSource.getCheckpointMarkCoder();
CheckpointMarkT checkpointMark;
if (state.exists()) {
// previous (output) watermark is now the low watermark.
lowWatermark = state.get()._2();
checkpointMark = CoderHelpers.fromByteArray(state.get()._1(), checkpointCoder);
LOG.info("Continue reading from an existing CheckpointMark.");
} else if (startCheckpointMark.isDefined() && !startCheckpointMark.get().equals(EmptyCheckpointMark.get())) {
checkpointMark = startCheckpointMark.get();
LOG.info("Start reading from a provided CheckpointMark.");
} else {
checkpointMark = null;
LOG.info("No CheckpointMark provided, start reading from default.");
}
// create reader.
final MicrobatchSource.Reader /*<T>*/
microbatchReader;
final Stopwatch stopwatch = Stopwatch.createStarted();
long readDurationMillis = 0;
try {
microbatchReader = (MicrobatchSource.Reader) microbatchSource.getOrCreateReader(options.get(), checkpointMark);
} catch (IOException e) {
throw new RuntimeException(e);
}
// read microbatch as a serialized collection.
final List<byte[]> readValues = new ArrayList<>();
WindowedValue.FullWindowedValueCoder<T> coder = WindowedValue.FullWindowedValueCoder.of(source.getOutputCoder(), GlobalWindow.Coder.INSTANCE);
try {
// measure how long a read takes per-partition.
boolean finished = !microbatchReader.start();
while (!finished) {
final WindowedValue<T> wv = WindowedValue.of((T) microbatchReader.getCurrent(), microbatchReader.getCurrentTimestamp(), GlobalWindow.INSTANCE, PaneInfo.NO_FIRING);
readValues.add(CoderHelpers.toByteArray(wv, coder));
finished = !microbatchReader.advance();
}
// end-of-read watermark is the high watermark, but don't allow decrease.
final Instant sourceWatermark = microbatchReader.getWatermark();
highWatermark = sourceWatermark.isAfter(lowWatermark) ? sourceWatermark : lowWatermark;
readDurationMillis = stopwatch.stop().elapsed(TimeUnit.MILLISECONDS);
LOG.info("Source id {} spent {} millis on reading.", microbatchSource.getId(), readDurationMillis);
// if the Source does not supply a CheckpointMark skip updating the state.
@SuppressWarnings("unchecked") final CheckpointMarkT finishedReadCheckpointMark = (CheckpointMarkT) microbatchReader.getCheckpointMark();
byte[] codedCheckpoint = CoderHelpers.toByteArray(finishedReadCheckpointMark, checkpointCoder);
// persist the end-of-read (high) watermark for following read, where it will become
// the next low watermark.
state.update(new Tuple2<>(codedCheckpoint, highWatermark));
} catch (IOException e) {
throw new RuntimeException("Failed to read from reader.", e);
}
final ArrayList<byte[]> payload = Lists.newArrayList(Iterators.unmodifiableIterator(readValues.iterator()));
return new Tuple2<>(payload, new Metadata(readValues.size(), lowWatermark, highWatermark, readDurationMillis, metricsContainers));
} catch (IOException e) {
throw new RuntimeException(e);
}
}
};
}
use of org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Stopwatch in project beam by apache.
the class SyntheticDelay method cpuDelay.
/**
* Keep cpu busy for {@code delayMillis} by calculating lots of hashes.
*/
private static void cpuDelay(long delayMillis) {
// Note that the delay is enforced in terms of walltime. That implies this thread may not
// keep CPU busy if it gets preempted by other threads. There is more of chance of this
// occurring in a streaming pipeline as there could be lots of threads running this. The loop
// measures cpu time spent for each iteration, so that these effects are some what minimized.
long cpuMicros = delayMillis * 1000;
Stopwatch timer = Stopwatch.createUnstarted();
while (timer.elapsed(TimeUnit.MICROSECONDS) < cpuMicros) {
// Find a long which hashes to HASH in lowest MASK bits.
// Values chosen to roughly take 1ms on typical workstation.
timer.start();
long p = INIT_PLAINTEXT;
while (true) {
long t = Hashing.murmur3_128().hashLong(p).asLong();
if ((t & MASK) == (HASH & MASK)) {
break;
}
p++;
}
timer.stop();
}
}
use of org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Stopwatch in project beam by apache.
the class ShardReadersPoolTest method shouldInterruptPuttingRecordsToQueueAndStopShortly.
@Test
public void shouldInterruptPuttingRecordsToQueueAndStopShortly() throws Exception {
when(read.getMaxCapacityPerShard()).thenReturn(2);
when(firstIterator.readNextBatch()).thenReturn(ImmutableList.of(a, b, c));
KinesisReaderCheckpoint checkpoint = new KinesisReaderCheckpoint(ImmutableList.of(firstCheckpoint, secondCheckpoint));
ShardReadersPool shardReadersPool = new ShardReadersPool(read, kinesis, checkpoint);
shardReadersPool.start();
Stopwatch stopwatch = Stopwatch.createStarted();
shardReadersPool.stop();
assertThat(stopwatch.elapsed(TimeUnit.MILLISECONDS)).isLessThan(TIMEOUT_IN_MILLIS);
}
use of org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Stopwatch in project beam by apache.
the class DockerCommandTest method killContainer.
@Test
public void killContainer() throws Exception {
DockerCommand docker = DockerCommand.getDefault();
String container = docker.runImage("debian", ImmutableList.of(), ImmutableList.of("/bin/bash", "-c", "sleep 60"));
Stopwatch stopwatch = Stopwatch.createStarted();
assertThat("Container should be running.", docker.isContainerRunning(container), is(true));
docker.killContainer(container);
long elapsedSec = stopwatch.elapsed(TimeUnit.SECONDS);
assertThat("Container termination should complete before image self-exits", elapsedSec, is(lessThan(60L)));
assertThat("Container should be terminated.", docker.isContainerRunning(container), is(false));
}
Aggregations