use of org.apache.beam.sdk.io.gcp.pubsub.PubsubUnboundedSource.PubsubCheckpoint in project beam by apache.
the class PubsubUnboundedSourceTest method readManyMessages.
@Test
public void readManyMessages() throws Exception {
Map<String, Integer> dataToMessageNum = new HashMap<>();
final int m = 97;
final int n = 10000;
List<IncomingMessage> incoming = new ArrayList<>();
for (int i = 0; i < n; i++) {
// Make the messages timestamps slightly out of order.
int messageNum = ((i / m) * m) + (m - 1) - (i % m);
String data = String.format("data_%d", messageNum);
dataToMessageNum.put(data, messageNum);
String recid = String.format("recordid_%d", messageNum);
String ackId = String.format("ackid_%d", messageNum);
incoming.add(IncomingMessage.of(com.google.pubsub.v1.PubsubMessage.newBuilder().setData(ByteString.copyFromUtf8(data)).build(), messageNumToTimestamp(messageNum), 0, ackId, recid));
}
setupOneMessage(incoming);
PubsubReader reader = primSource.createReader(p.getOptions(), null);
PubsubTestClient pubsubClient = (PubsubTestClient) reader.getPubsubClient();
for (int i = 0; i < n; i++) {
if (i == 0) {
assertTrue(reader.start());
} else {
assertTrue(reader.advance());
}
// We'll checkpoint and ack within the 2min limit.
now.addAndGet(30);
pubsubClient.advance();
String data = data(reader.getCurrent(), !(primSource.outer.getNeedsAttributes() || primSource.outer.getNeedsMessageId()));
Integer messageNum = dataToMessageNum.remove(data);
// No duplicate messages.
assertNotNull(messageNum);
// Preserve timestamp.
assertEquals(new Instant(messageNumToTimestamp(messageNum)), reader.getCurrentTimestamp());
// Preserve record id.
String recid = String.format("recordid_%d", messageNum);
assertArrayEquals(recid.getBytes(StandardCharsets.UTF_8), reader.getCurrentRecordId());
if (i % 1000 == 999) {
// Estimated watermark can never get ahead of actual outstanding messages.
long watermark = reader.getWatermark().getMillis();
long minOutstandingTimestamp = Long.MAX_VALUE;
for (Integer outstandingMessageNum : dataToMessageNum.values()) {
minOutstandingTimestamp = Math.min(minOutstandingTimestamp, messageNumToTimestamp(outstandingMessageNum));
}
assertThat(watermark, lessThanOrEqualTo(minOutstandingTimestamp));
// Ack messages, but only every other finalization.
PubsubCheckpoint checkpoint = reader.getCheckpointMark();
if (i % 2000 == 1999) {
checkpoint.finalizeCheckpoint();
}
}
}
// We are done.
assertFalse(reader.advance());
// We saw each message exactly once.
assertTrue(dataToMessageNum.isEmpty());
reader.close();
}
use of org.apache.beam.sdk.io.gcp.pubsub.PubsubUnboundedSource.PubsubCheckpoint in project beam by apache.
the class PubsubUnboundedSourceTest method timeoutAckAndRereadOneMessage.
@Test
public void timeoutAckAndRereadOneMessage() throws Exception {
setupOneMessage();
PubsubReader reader = primSource.createReader(p.getOptions(), null);
PubsubTestClient pubsubClient = (PubsubTestClient) reader.getPubsubClient();
assertTrue(reader.start());
assertEquals(DATA, data(reader.getCurrent(), !(primSource.outer.getNeedsAttributes() || primSource.outer.getNeedsMessageId())));
// Let the ACK deadline for the above expire.
now.addAndGet(65 * 1000);
pubsubClient.advance();
// We'll now receive the same message again.
assertTrue(reader.advance());
assertEquals(DATA, data(reader.getCurrent(), !(primSource.outer.getNeedsAttributes() || primSource.outer.getNeedsMessageId())));
assertFalse(reader.advance());
// Now ACK the message.
PubsubCheckpoint checkpoint = reader.getCheckpointMark();
checkpoint.finalizeCheckpoint();
reader.close();
}
use of org.apache.beam.sdk.io.gcp.pubsub.PubsubUnboundedSource.PubsubCheckpoint in project beam by apache.
the class PubsubUnboundedSourceTest method extendAck.
@Test
public void extendAck() throws Exception {
setupOneMessage();
PubsubReader reader = primSource.createReader(p.getOptions(), null);
PubsubTestClient pubsubClient = (PubsubTestClient) reader.getPubsubClient();
// Pull the first message but don't take a checkpoint for it.
assertTrue(reader.start());
assertEquals(DATA, data(reader.getCurrent(), !(primSource.outer.getNeedsAttributes() || primSource.outer.getNeedsMessageId())));
// Extend the ack
now.addAndGet(55 * 1000);
pubsubClient.advance();
assertFalse(reader.advance());
// Extend the ack again
now.addAndGet(25 * 1000);
pubsubClient.advance();
assertFalse(reader.advance());
// Now ACK the message.
PubsubCheckpoint checkpoint = reader.getCheckpointMark();
checkpoint.finalizeCheckpoint();
reader.close();
}
use of org.apache.beam.sdk.io.gcp.pubsub.PubsubUnboundedSource.PubsubCheckpoint in project beam by apache.
the class PubsubUnboundedSourceTest method closeWithActiveCheckpoints.
/**
* Tests that checkpoints finalized after the reader is closed succeed.
*/
@Test
public void closeWithActiveCheckpoints() throws Exception {
setupOneMessage();
PubsubReader reader = primSource.createReader(p.getOptions(), null);
reader.start();
PubsubCheckpoint checkpoint = reader.getCheckpointMark();
reader.close();
checkpoint.finalizeCheckpoint();
}
use of org.apache.beam.sdk.io.gcp.pubsub.PubsubUnboundedSource.PubsubCheckpoint in project beam by apache.
the class PubsubUnboundedSourceTest method multipleReaders.
@Test
public void multipleReaders() throws Exception {
List<IncomingMessage> incoming = new ArrayList<>();
for (int i = 0; i < 2; i++) {
String data = String.format("data_%d", i);
String ackid = String.format("ackid_%d", i);
incoming.add(IncomingMessage.of(com.google.pubsub.v1.PubsubMessage.newBuilder().setData(ByteString.copyFromUtf8(data)).build(), TIMESTAMP, 0, ackid, RECORD_ID));
}
setupOneMessage(incoming);
PubsubReader reader = primSource.createReader(p.getOptions(), null);
// Consume two messages, only read one.
assertTrue(reader.start());
assertEquals("data_0", data(reader.getCurrent(), !(primSource.outer.getNeedsAttributes() || primSource.outer.getNeedsMessageId())));
// Grab checkpoint.
PubsubCheckpoint checkpoint = reader.getCheckpointMark();
checkpoint.finalizeCheckpoint();
assertEquals(1, checkpoint.notYetReadIds.size());
assertEquals("ackid_1", checkpoint.notYetReadIds.get(0));
// Read second message.
assertTrue(reader.advance());
assertEquals("data_1", data(reader.getCurrent(), !(primSource.outer.getNeedsAttributes() || primSource.outer.getNeedsMessageId())));
// Restore from checkpoint.
byte[] checkpointBytes = CoderUtils.encodeToByteArray(primSource.getCheckpointMarkCoder(), checkpoint);
checkpoint = CoderUtils.decodeFromByteArray(primSource.getCheckpointMarkCoder(), checkpointBytes);
assertEquals(1, checkpoint.notYetReadIds.size());
assertEquals("ackid_1", checkpoint.notYetReadIds.get(0));
// Re-read second message.
reader = primSource.createReader(p.getOptions(), checkpoint);
assertTrue(reader.start());
assertEquals("data_1", data(reader.getCurrent(), !(primSource.outer.getNeedsAttributes() || primSource.outer.getNeedsMessageId())));
// We are done.
assertFalse(reader.advance());
// ACK final message.
checkpoint = reader.getCheckpointMark();
checkpoint.finalizeCheckpoint();
reader.close();
}
Aggregations