use of org.apache.beam.sdk.transforms.SerializableFunction in project beam by apache.
the class KafkaIOTest method mkMockConsumer.
// Update mock consumer with records distributed among the given topics, each with given number
// of partitions. Records are assigned in round-robin order among the partitions.
private static MockConsumer<byte[], byte[]> mkMockConsumer(List<String> topics, int partitionsPerTopic, int numElements, OffsetResetStrategy offsetResetStrategy, Map<String, Object> config, SerializableFunction<Integer, byte[]> keyFunction, SerializableFunction<Integer, byte[]> valueFunction) {
final List<TopicPartition> partitions = new ArrayList<>();
final Map<TopicPartition, List<ConsumerRecord<byte[], byte[]>>> records = new HashMap<>();
Map<String, List<PartitionInfo>> partitionMap = new HashMap<>();
for (String topic : topics) {
List<PartitionInfo> partIds = new ArrayList<>(partitionsPerTopic);
for (int i = 0; i < partitionsPerTopic; i++) {
TopicPartition tp = new TopicPartition(topic, i);
partitions.add(tp);
partIds.add(new PartitionInfo(topic, i, null, null, null));
records.put(tp, new ArrayList<>());
}
partitionMap.put(topic, partIds);
}
int numPartitions = partitions.size();
final long[] offsets = new long[numPartitions];
long timestampStartMillis = (Long) config.getOrDefault(TIMESTAMP_START_MILLIS_CONFIG, LOG_APPEND_START_TIME.getMillis());
TimestampType timestampType = TimestampType.forName((String) config.getOrDefault(TIMESTAMP_TYPE_CONFIG, TimestampType.LOG_APPEND_TIME.toString()));
for (int i = 0; i < numElements; i++) {
int pIdx = i % numPartitions;
TopicPartition tp = partitions.get(pIdx);
byte[] key = keyFunction.apply(i);
byte[] value = valueFunction.apply(i);
records.get(tp).add(new ConsumerRecord<>(tp.topic(), tp.partition(), offsets[pIdx]++, timestampStartMillis + Duration.standardSeconds(i).getMillis(), timestampType, 0, key.length, value.length, key, value));
}
// This is updated when reader assigns partitions.
final AtomicReference<List<TopicPartition>> assignedPartitions = new AtomicReference<>(Collections.<TopicPartition>emptyList());
final MockConsumer<byte[], byte[]> consumer = new MockConsumer<byte[], byte[]>(offsetResetStrategy) {
@Override
public synchronized void assign(final Collection<TopicPartition> assigned) {
super.assign(assigned);
assignedPartitions.set(ImmutableList.copyOf(assigned));
for (TopicPartition tp : assigned) {
updateBeginningOffsets(ImmutableMap.of(tp, 0L));
updateEndOffsets(ImmutableMap.of(tp, (long) records.get(tp).size()));
}
}
// Override offsetsForTimes() in order to look up the offsets by timestamp.
@Override
public synchronized Map<TopicPartition, OffsetAndTimestamp> offsetsForTimes(Map<TopicPartition, Long> timestampsToSearch) {
return timestampsToSearch.entrySet().stream().map(e -> {
// In test scope, timestamp == offset.
long maxOffset = offsets[partitions.indexOf(e.getKey())];
long offset = e.getValue();
OffsetAndTimestamp value = (offset >= maxOffset) ? null : new OffsetAndTimestamp(offset, offset);
return new SimpleEntry<>(e.getKey(), value);
}).collect(Collectors.toMap(SimpleEntry::getKey, SimpleEntry::getValue));
}
};
for (String topic : topics) {
consumer.updatePartitions(topic, partitionMap.get(topic));
}
// MockConsumer does not maintain any relationship between partition seek position and the
// records added. e.g. if we add 10 records to a partition and then seek to end of the
// partition, MockConsumer is still going to return the 10 records in next poll. It is
// our responsibility to make sure currently enqueued records sync with partition offsets.
// The following task will be called inside each invocation to MockConsumer.poll().
// We enqueue only the records with the offset >= partition's current position.
Runnable recordEnqueueTask = new Runnable() {
@Override
public void run() {
// add all the records with offset >= current partition position.
int recordsAdded = 0;
for (TopicPartition tp : assignedPartitions.get()) {
long curPos = consumer.position(tp);
for (ConsumerRecord<byte[], byte[]> r : records.get(tp)) {
if (r.offset() >= curPos) {
consumer.addRecord(r);
recordsAdded++;
}
}
}
if (recordsAdded == 0) {
if (config.get("inject.error.at.eof") != null) {
consumer.setException(new KafkaException("Injected error in consumer.poll()"));
}
// MockConsumer.poll(timeout) does not actually wait even when there aren't any
// records.
// Add a small wait here in order to avoid busy looping in the reader.
Uninterruptibles.sleepUninterruptibly(10, TimeUnit.MILLISECONDS);
// TODO: BEAM-4086: testUnboundedSourceWithoutBoundedWrapper() occasionally hangs
// without this wait. Need to look into it.
}
consumer.schedulePollTask(this);
}
};
consumer.schedulePollTask(recordEnqueueTask);
return consumer;
}
use of org.apache.beam.sdk.transforms.SerializableFunction in project beam by apache.
the class WatchKafkaTopicPartitionDoFnTest method testProcessElementWithStoppingReadingTopicPartition.
@Test
public void testProcessElementWithStoppingReadingTopicPartition() throws Exception {
Instant startReadTime = Instant.ofEpochMilli(1L);
SerializableFunction<TopicPartition, Boolean> checkStopReadingFn = new SerializableFunction<TopicPartition, Boolean>() {
@Override
public Boolean apply(TopicPartition input) {
if (input.equals(new TopicPartition("topic1", 1))) {
return true;
}
return false;
}
};
WatchKafkaTopicPartitionDoFn dofnInstance = new WatchKafkaTopicPartitionDoFn(Duration.millis(600L), consumerFn, checkStopReadingFn, ImmutableMap.of(), startReadTime, null, null);
MockOutputReceiver outputReceiver = new MockOutputReceiver();
when(mockConsumer.listTopics()).thenReturn(ImmutableMap.of("topic1", ImmutableList.of(new PartitionInfo("topic1", 0, null, null, null), new PartitionInfo("topic1", 1, null, null, null)), "topic2", ImmutableList.of(new PartitionInfo("topic2", 0, null, null, null), new PartitionInfo("topic2", 1, null, null, null))));
MockBagState bagState = new MockBagState(ImmutableList.of());
when(timer.offset(Duration.millis(600L))).thenReturn(timer);
dofnInstance.processElement(timer, bagState, outputReceiver);
verify(timer, times(1)).setRelative();
Set<TopicPartition> expectedOutputTopicPartitions = ImmutableSet.of(new TopicPartition("topic1", 0), new TopicPartition("topic2", 0), new TopicPartition("topic2", 1));
Set<KafkaSourceDescriptor> expectedOutputDescriptor = generateDescriptorsFromTopicPartitions(expectedOutputTopicPartitions, startReadTime);
assertEquals(expectedOutputDescriptor, new HashSet<>(outputReceiver.getOutputs()));
assertEquals(expectedOutputTopicPartitions, bagState.getCurrentStates());
}
use of org.apache.beam.sdk.transforms.SerializableFunction in project beam by apache.
the class WatchKafkaTopicPartitionDoFnTest method testOnTimerWithStoppedTopicPartitions.
@Test
public void testOnTimerWithStoppedTopicPartitions() throws Exception {
Instant startReadTime = Instant.ofEpochMilli(1L);
SerializableFunction<TopicPartition, Boolean> checkStopReadingFn = new SerializableFunction<TopicPartition, Boolean>() {
@Override
public Boolean apply(TopicPartition input) {
if (input.equals(new TopicPartition("topic1", 1))) {
return true;
}
return false;
}
};
WatchKafkaTopicPartitionDoFn dofnInstance = new WatchKafkaTopicPartitionDoFn(Duration.millis(600L), consumerFn, checkStopReadingFn, ImmutableMap.of(), startReadTime, null, null);
MockOutputReceiver outputReceiver = new MockOutputReceiver();
when(mockConsumer.listTopics()).thenReturn(ImmutableMap.of("topic1", ImmutableList.of(new PartitionInfo("topic1", 0, null, null, null), new PartitionInfo("topic1", 1, null, null, null)), "topic2", ImmutableList.of(new PartitionInfo("topic2", 0, null, null, null), new PartitionInfo("topic2", 1, null, null, null))));
MockBagState bagState = new MockBagState(ImmutableList.of(new TopicPartition("topic1", 0), new TopicPartition("topic2", 0), new TopicPartition("topic2", 1)));
Instant now = Instant.EPOCH;
mockStatic(Instant.class);
when(Instant.now()).thenReturn(now);
dofnInstance.onTimer(timer, bagState, outputReceiver);
Set<TopicPartition> expectedCurrentTopicPartitions = ImmutableSet.of(new TopicPartition("topic1", 0), new TopicPartition("topic2", 0), new TopicPartition("topic2", 1));
verify(timer, times(1)).set(now.plus(Duration.millis(600L)));
assertTrue(outputReceiver.getOutputs().isEmpty());
assertEquals(expectedCurrentTopicPartitions, bagState.getCurrentStates());
}
use of org.apache.beam.sdk.transforms.SerializableFunction in project beam by apache.
the class WatermarkPolicyTest method shouldAdvanceWatermarkWithCustomTimePolicy.
@Test
public void shouldAdvanceWatermarkWithCustomTimePolicy() {
SerializableFunction<KinesisRecord, Instant> timestampFn = (record) -> record.getApproximateArrivalTimestamp().plus(Duration.standardMinutes(1));
WatermarkPolicy policy = WatermarkPolicyFactory.withCustomWatermarkPolicy(WatermarkParameters.create().withTimestampFn(timestampFn)).createWatermarkPolicy();
KinesisRecord a = mock(KinesisRecord.class);
KinesisRecord b = mock(KinesisRecord.class);
Instant time1 = NOW.minus(Duration.standardSeconds(30L));
Instant time2 = NOW.minus(Duration.standardSeconds(20L));
when(a.getApproximateArrivalTimestamp()).thenReturn(time1);
when(b.getApproximateArrivalTimestamp()).thenReturn(time2);
policy.update(a);
assertThat(policy.getWatermark()).isEqualTo(time1.plus(Duration.standardMinutes(1)));
policy.update(b);
assertThat(policy.getWatermark()).isEqualTo(time2.plus(Duration.standardMinutes(1)));
}
use of org.apache.beam.sdk.transforms.SerializableFunction in project beam by apache.
the class CreateStreamTest method testInStreamingModeCountByKey.
@Test
public void testInStreamingModeCountByKey() throws Exception {
Instant instant = new Instant(0);
CreateStream<KV<Integer, Long>> kvSource = CreateStream.of(KvCoder.of(VarIntCoder.of(), VarLongCoder.of()), batchDuration()).emptyBatch().advanceWatermarkForNextBatch(instant).nextBatch(TimestampedValue.of(KV.of(1, 100L), instant.plus(Duration.standardSeconds(3L))), TimestampedValue.of(KV.of(1, 300L), instant.plus(Duration.standardSeconds(4L)))).advanceWatermarkForNextBatch(instant.plus(Duration.standardSeconds(7L))).nextBatch(TimestampedValue.of(KV.of(1, 400L), instant.plus(Duration.standardSeconds(8L)))).advanceNextBatchWatermarkToInfinity();
PCollection<KV<Integer, Long>> output = p.apply("create kv Source", kvSource).apply("window input", Window.<KV<Integer, Long>>into(FixedWindows.of(Duration.standardSeconds(3L))).withAllowedLateness(Duration.ZERO)).apply(Count.perKey());
PAssert.that("Wrong count value ", output).satisfies((SerializableFunction<Iterable<KV<Integer, Long>>, Void>) input -> {
for (KV<Integer, Long> element : input) {
if (element.getKey() == 1) {
Long countValue = element.getValue();
assertNotEquals("Count Value is 0 !!!", 0L, countValue.longValue());
} else {
fail("Unknown key in the output PCollection");
}
}
return null;
});
p.run();
}
Aggregations