Search in sources :

Example 1 with PubsubCheckpoint

use of org.apache.beam.sdk.io.gcp.pubsub.PubsubUnboundedSource.PubsubCheckpoint in project beam by apache.

the class PubsubUnboundedSourceTest method readManyMessages.

@Test
public void readManyMessages() throws Exception {
    Map<String, Integer> dataToMessageNum = new HashMap<>();
    final int m = 97;
    final int n = 10000;
    List<IncomingMessage> incoming = new ArrayList<>();
    for (int i = 0; i < n; i++) {
        // Make the messages timestamps slightly out of order.
        int messageNum = ((i / m) * m) + (m - 1) - (i % m);
        String data = String.format("data_%d", messageNum);
        dataToMessageNum.put(data, messageNum);
        String recid = String.format("recordid_%d", messageNum);
        String ackId = String.format("ackid_%d", messageNum);
        incoming.add(IncomingMessage.of(com.google.pubsub.v1.PubsubMessage.newBuilder().setData(ByteString.copyFromUtf8(data)).build(), messageNumToTimestamp(messageNum), 0, ackId, recid));
    }
    setupOneMessage(incoming);
    PubsubReader reader = primSource.createReader(p.getOptions(), null);
    PubsubTestClient pubsubClient = (PubsubTestClient) reader.getPubsubClient();
    for (int i = 0; i < n; i++) {
        if (i == 0) {
            assertTrue(reader.start());
        } else {
            assertTrue(reader.advance());
        }
        // We'll checkpoint and ack within the 2min limit.
        now.addAndGet(30);
        pubsubClient.advance();
        String data = data(reader.getCurrent(), !(primSource.outer.getNeedsAttributes() || primSource.outer.getNeedsMessageId()));
        Integer messageNum = dataToMessageNum.remove(data);
        // No duplicate messages.
        assertNotNull(messageNum);
        // Preserve timestamp.
        assertEquals(new Instant(messageNumToTimestamp(messageNum)), reader.getCurrentTimestamp());
        // Preserve record id.
        String recid = String.format("recordid_%d", messageNum);
        assertArrayEquals(recid.getBytes(StandardCharsets.UTF_8), reader.getCurrentRecordId());
        if (i % 1000 == 999) {
            // Estimated watermark can never get ahead of actual outstanding messages.
            long watermark = reader.getWatermark().getMillis();
            long minOutstandingTimestamp = Long.MAX_VALUE;
            for (Integer outstandingMessageNum : dataToMessageNum.values()) {
                minOutstandingTimestamp = Math.min(minOutstandingTimestamp, messageNumToTimestamp(outstandingMessageNum));
            }
            assertThat(watermark, lessThanOrEqualTo(minOutstandingTimestamp));
            // Ack messages, but only every other finalization.
            PubsubCheckpoint checkpoint = reader.getCheckpointMark();
            if (i % 2000 == 1999) {
                checkpoint.finalizeCheckpoint();
            }
        }
    }
    // We are done.
    assertFalse(reader.advance());
    // We saw each message exactly once.
    assertTrue(dataToMessageNum.isEmpty());
    reader.close();
}
Also used : HashMap(java.util.HashMap) Instant(org.joda.time.Instant) ArrayList(java.util.ArrayList) ByteString(com.google.protobuf.ByteString) PubsubCheckpoint(org.apache.beam.sdk.io.gcp.pubsub.PubsubUnboundedSource.PubsubCheckpoint) IncomingMessage(org.apache.beam.sdk.io.gcp.pubsub.PubsubClient.IncomingMessage) PubsubCheckpoint(org.apache.beam.sdk.io.gcp.pubsub.PubsubUnboundedSource.PubsubCheckpoint) PubsubReader(org.apache.beam.sdk.io.gcp.pubsub.PubsubUnboundedSource.PubsubReader) Test(org.junit.Test)

Example 2 with PubsubCheckpoint

use of org.apache.beam.sdk.io.gcp.pubsub.PubsubUnboundedSource.PubsubCheckpoint in project beam by apache.

the class PubsubUnboundedSourceTest method timeoutAckAndRereadOneMessage.

@Test
public void timeoutAckAndRereadOneMessage() throws Exception {
    setupOneMessage();
    PubsubReader reader = primSource.createReader(p.getOptions(), null);
    PubsubTestClient pubsubClient = (PubsubTestClient) reader.getPubsubClient();
    assertTrue(reader.start());
    assertEquals(DATA, data(reader.getCurrent(), !(primSource.outer.getNeedsAttributes() || primSource.outer.getNeedsMessageId())));
    // Let the ACK deadline for the above expire.
    now.addAndGet(65 * 1000);
    pubsubClient.advance();
    // We'll now receive the same message again.
    assertTrue(reader.advance());
    assertEquals(DATA, data(reader.getCurrent(), !(primSource.outer.getNeedsAttributes() || primSource.outer.getNeedsMessageId())));
    assertFalse(reader.advance());
    // Now ACK the message.
    PubsubCheckpoint checkpoint = reader.getCheckpointMark();
    checkpoint.finalizeCheckpoint();
    reader.close();
}
Also used : PubsubCheckpoint(org.apache.beam.sdk.io.gcp.pubsub.PubsubUnboundedSource.PubsubCheckpoint) PubsubReader(org.apache.beam.sdk.io.gcp.pubsub.PubsubUnboundedSource.PubsubReader) Test(org.junit.Test)

Example 3 with PubsubCheckpoint

use of org.apache.beam.sdk.io.gcp.pubsub.PubsubUnboundedSource.PubsubCheckpoint in project beam by apache.

the class PubsubUnboundedSourceTest method extendAck.

@Test
public void extendAck() throws Exception {
    setupOneMessage();
    PubsubReader reader = primSource.createReader(p.getOptions(), null);
    PubsubTestClient pubsubClient = (PubsubTestClient) reader.getPubsubClient();
    // Pull the first message but don't take a checkpoint for it.
    assertTrue(reader.start());
    assertEquals(DATA, data(reader.getCurrent(), !(primSource.outer.getNeedsAttributes() || primSource.outer.getNeedsMessageId())));
    // Extend the ack
    now.addAndGet(55 * 1000);
    pubsubClient.advance();
    assertFalse(reader.advance());
    // Extend the ack again
    now.addAndGet(25 * 1000);
    pubsubClient.advance();
    assertFalse(reader.advance());
    // Now ACK the message.
    PubsubCheckpoint checkpoint = reader.getCheckpointMark();
    checkpoint.finalizeCheckpoint();
    reader.close();
}
Also used : PubsubCheckpoint(org.apache.beam.sdk.io.gcp.pubsub.PubsubUnboundedSource.PubsubCheckpoint) PubsubReader(org.apache.beam.sdk.io.gcp.pubsub.PubsubUnboundedSource.PubsubReader) Test(org.junit.Test)

Example 4 with PubsubCheckpoint

use of org.apache.beam.sdk.io.gcp.pubsub.PubsubUnboundedSource.PubsubCheckpoint in project beam by apache.

the class PubsubUnboundedSourceTest method closeWithActiveCheckpoints.

/**
 * Tests that checkpoints finalized after the reader is closed succeed.
 */
@Test
public void closeWithActiveCheckpoints() throws Exception {
    setupOneMessage();
    PubsubReader reader = primSource.createReader(p.getOptions(), null);
    reader.start();
    PubsubCheckpoint checkpoint = reader.getCheckpointMark();
    reader.close();
    checkpoint.finalizeCheckpoint();
}
Also used : PubsubCheckpoint(org.apache.beam.sdk.io.gcp.pubsub.PubsubUnboundedSource.PubsubCheckpoint) PubsubReader(org.apache.beam.sdk.io.gcp.pubsub.PubsubUnboundedSource.PubsubReader) Test(org.junit.Test)

Example 5 with PubsubCheckpoint

use of org.apache.beam.sdk.io.gcp.pubsub.PubsubUnboundedSource.PubsubCheckpoint in project beam by apache.

the class PubsubUnboundedSourceTest method multipleReaders.

@Test
public void multipleReaders() throws Exception {
    List<IncomingMessage> incoming = new ArrayList<>();
    for (int i = 0; i < 2; i++) {
        String data = String.format("data_%d", i);
        String ackid = String.format("ackid_%d", i);
        incoming.add(IncomingMessage.of(com.google.pubsub.v1.PubsubMessage.newBuilder().setData(ByteString.copyFromUtf8(data)).build(), TIMESTAMP, 0, ackid, RECORD_ID));
    }
    setupOneMessage(incoming);
    PubsubReader reader = primSource.createReader(p.getOptions(), null);
    // Consume two messages, only read one.
    assertTrue(reader.start());
    assertEquals("data_0", data(reader.getCurrent(), !(primSource.outer.getNeedsAttributes() || primSource.outer.getNeedsMessageId())));
    // Grab checkpoint.
    PubsubCheckpoint checkpoint = reader.getCheckpointMark();
    checkpoint.finalizeCheckpoint();
    assertEquals(1, checkpoint.notYetReadIds.size());
    assertEquals("ackid_1", checkpoint.notYetReadIds.get(0));
    // Read second message.
    assertTrue(reader.advance());
    assertEquals("data_1", data(reader.getCurrent(), !(primSource.outer.getNeedsAttributes() || primSource.outer.getNeedsMessageId())));
    // Restore from checkpoint.
    byte[] checkpointBytes = CoderUtils.encodeToByteArray(primSource.getCheckpointMarkCoder(), checkpoint);
    checkpoint = CoderUtils.decodeFromByteArray(primSource.getCheckpointMarkCoder(), checkpointBytes);
    assertEquals(1, checkpoint.notYetReadIds.size());
    assertEquals("ackid_1", checkpoint.notYetReadIds.get(0));
    // Re-read second message.
    reader = primSource.createReader(p.getOptions(), checkpoint);
    assertTrue(reader.start());
    assertEquals("data_1", data(reader.getCurrent(), !(primSource.outer.getNeedsAttributes() || primSource.outer.getNeedsMessageId())));
    // We are done.
    assertFalse(reader.advance());
    // ACK final message.
    checkpoint = reader.getCheckpointMark();
    checkpoint.finalizeCheckpoint();
    reader.close();
}
Also used : IncomingMessage(org.apache.beam.sdk.io.gcp.pubsub.PubsubClient.IncomingMessage) ArrayList(java.util.ArrayList) ByteString(com.google.protobuf.ByteString) PubsubCheckpoint(org.apache.beam.sdk.io.gcp.pubsub.PubsubUnboundedSource.PubsubCheckpoint) PubsubReader(org.apache.beam.sdk.io.gcp.pubsub.PubsubUnboundedSource.PubsubReader) PubsubCheckpoint(org.apache.beam.sdk.io.gcp.pubsub.PubsubUnboundedSource.PubsubCheckpoint) Test(org.junit.Test)

Aggregations

PubsubCheckpoint (org.apache.beam.sdk.io.gcp.pubsub.PubsubUnboundedSource.PubsubCheckpoint)8 PubsubReader (org.apache.beam.sdk.io.gcp.pubsub.PubsubUnboundedSource.PubsubReader)8 Test (org.junit.Test)8 ByteString (com.google.protobuf.ByteString)2 ArrayList (java.util.ArrayList)2 IncomingMessage (org.apache.beam.sdk.io.gcp.pubsub.PubsubClient.IncomingMessage)2 HashMap (java.util.HashMap)1 SubscriptionPath (org.apache.beam.sdk.io.gcp.pubsub.PubsubClient.SubscriptionPath)1 TopicPath (org.apache.beam.sdk.io.gcp.pubsub.PubsubClient.TopicPath)1 PubsubSource (org.apache.beam.sdk.io.gcp.pubsub.PubsubUnboundedSource.PubsubSource)1 PipelineOptions (org.apache.beam.sdk.options.PipelineOptions)1 Instant (org.joda.time.Instant)1