Search in sources :

Example 1 with CheckpointableWatermark

use of org.apache.gobblin.source.extractor.CheckpointableWatermark in project incubator-gobblin by apache.

the class FineGrainedWatermarkTrackerTest method testConcurrentWatermarkTracker.

/**
 * A concurrent test, attempts fired in a single thread, but acks come in from multiple threads,
 * out of order.
 */
@Test
public static void testConcurrentWatermarkTracker() throws IOException, InterruptedException {
    Random random = new Random();
    ScheduledExecutorService ackingService = new ScheduledThreadPoolExecutor(100, ExecutorsUtils.defaultThreadFactory());
    for (int j = 0; j < 100; ++j) {
        FineGrainedWatermarkTracker tracker = new FineGrainedWatermarkTracker(ConfigFactory.empty());
        tracker.start();
        int numWatermarks = 1 + random.nextInt(1000);
        AcknowledgableWatermark[] acknowledgableWatermarks = new AcknowledgableWatermark[numWatermarks];
        SortedSet<Integer> holes = new TreeSet<>();
        final AtomicInteger numAcks = new AtomicInteger(0);
        for (int i = 0; i < numWatermarks; ++i) {
            CheckpointableWatermark checkpointableWatermark = new DefaultCheckpointableWatermark("default", new LongWatermark(i));
            final AcknowledgableWatermark ackable = new AcknowledgableWatermark(checkpointableWatermark);
            tracker.track(ackable);
            acknowledgableWatermarks[i] = ackable;
            // ack or not
            boolean ack = random.nextBoolean();
            if (ack) {
                numAcks.incrementAndGet();
                long sleepTime = random.nextInt(100);
                ackingService.schedule(new Callable<Object>() {

                    @Override
                    public Object call() throws Exception {
                        ackable.ack();
                        numAcks.decrementAndGet();
                        return null;
                    }
                }, sleepTime, TimeUnit.MILLISECONDS);
            } else {
                holes.add(i);
            }
        }
        while (numAcks.get() != 0) {
            log.info("Waiting for " + numAcks.get() + " acks");
            Thread.sleep(100);
        }
        verifyCommitables(tracker, holes, numWatermarks - 1);
        tracker.close();
    }
}
Also used : ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) ScheduledThreadPoolExecutor(java.util.concurrent.ScheduledThreadPoolExecutor) DefaultCheckpointableWatermark(org.apache.gobblin.source.extractor.DefaultCheckpointableWatermark) IOException(java.io.IOException) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Random(java.util.Random) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) TreeSet(java.util.TreeSet) CheckpointableWatermark(org.apache.gobblin.source.extractor.CheckpointableWatermark) DefaultCheckpointableWatermark(org.apache.gobblin.source.extractor.DefaultCheckpointableWatermark) LongWatermark(org.apache.gobblin.source.extractor.extract.LongWatermark) Test(org.testng.annotations.Test)

Example 2 with CheckpointableWatermark

use of org.apache.gobblin.source.extractor.CheckpointableWatermark in project incubator-gobblin by apache.

the class FineGrainedWatermarkTrackerTest method testSweep.

/**
 * Tests that sweep is sweeping the correct number of entries.
 */
@Test
public static void testSweep() {
    Random random = new Random();
    for (int j = 0; j < 1000; ++j) {
        FineGrainedWatermarkTracker tracker = new FineGrainedWatermarkTracker(ConfigFactory.empty());
        tracker.setAutoStart(false);
        int numWatermarks = 1 + random.nextInt(1000);
        AcknowledgableWatermark[] acknowledgableWatermarks = new AcknowledgableWatermark[numWatermarks];
        for (int i = 0; i < numWatermarks; ++i) {
            CheckpointableWatermark checkpointableWatermark = new DefaultCheckpointableWatermark("default", new LongWatermark(i));
            AcknowledgableWatermark ackable = new AcknowledgableWatermark(checkpointableWatermark);
            acknowledgableWatermarks[i] = ackable;
            tracker.track(ackable);
        }
        int numMissingAcks = random.nextInt(numWatermarks);
        SortedSet<Integer> holes = new TreeSet<>();
        for (int i = 0; i < numMissingAcks; ++i) {
            holes.add(random.nextInt(numWatermarks));
        }
        for (int i = 0; i < numWatermarks; ++i) {
            if (!holes.contains(i)) {
                acknowledgableWatermarks[i].ack();
            }
        }
        verifyCommitables(tracker, holes, numWatermarks - 1);
        int swept = tracker.sweep();
        if (holes.isEmpty()) {
            Assert.assertEquals(swept, numWatermarks - 1);
        } else {
            if (holes.contains(0)) {
                Assert.assertEquals(swept, 0);
            } else {
                Assert.assertEquals(swept, holes.first() - 1);
            }
        }
        verifyCommitables(tracker, holes, numWatermarks - 1);
    }
}
Also used : AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Random(java.util.Random) TreeSet(java.util.TreeSet) DefaultCheckpointableWatermark(org.apache.gobblin.source.extractor.DefaultCheckpointableWatermark) CheckpointableWatermark(org.apache.gobblin.source.extractor.CheckpointableWatermark) DefaultCheckpointableWatermark(org.apache.gobblin.source.extractor.DefaultCheckpointableWatermark) LongWatermark(org.apache.gobblin.source.extractor.extract.LongWatermark) Test(org.testng.annotations.Test)

Example 3 with CheckpointableWatermark

use of org.apache.gobblin.source.extractor.CheckpointableWatermark in project incubator-gobblin by apache.

the class MultiWriterWatermarkManagerTest method testFlakyWatermarkStorage.

/**
 * Test that in the presence of intermittent commit successes and failures, we continue to make progress
 */
@Test
public void testFlakyWatermarkStorage() throws IOException, InterruptedException {
    final int failEvery = 2;
    WatermarkStorage mockWatermarkStorage = new WatermarkStorage() {

        private int watermarkInstance = 0;

        private List<CheckpointableWatermark> checkpointed = new ArrayList<>();

        @Override
        public void commitWatermarks(java.lang.Iterable<CheckpointableWatermark> watermarks) throws IOException {
            ++watermarkInstance;
            if (watermarkInstance % failEvery == 0) {
                throw new IOException("Failed to write");
            } else {
                checkpointed.clear();
                for (CheckpointableWatermark watermark : watermarks) {
                    checkpointed.add(watermark);
                }
            }
        }

        @Override
        public Map<String, CheckpointableWatermark> getCommittedWatermarks(Class<? extends CheckpointableWatermark> watermarkClass, Iterable<String> sourcePartitions) throws IOException {
            return null;
        }
    };
    WatermarkAwareWriter mockWatermarkWriter = new WatermarkAwareWriter() {

        private long watermark = 0;

        @Override
        public boolean isWatermarkCapable() {
            return true;
        }

        @Override
        public void writeEnvelope(RecordEnvelope recordEnvelope) throws IOException {
            throw new UnsupportedOperationException();
        }

        @Override
        public Map<String, CheckpointableWatermark> getCommittableWatermark() {
            watermark++;
            return Collections.singletonMap("default", (CheckpointableWatermark) new DefaultCheckpointableWatermark("default", new LongWatermark(watermark)));
        }

        @Override
        public Map<String, CheckpointableWatermark> getUnacknowledgedWatermark() {
            return null;
        }

        @Override
        public void write(Object record) throws IOException {
            throw new UnsupportedOperationException();
        }

        @Override
        public void commit() throws IOException {
            throw new UnsupportedOperationException();
        }

        @Override
        public void cleanup() throws IOException {
            throw new UnsupportedOperationException();
        }

        @Override
        public long recordsWritten() {
            return 0;
        }

        @Override
        public long bytesWritten() throws IOException {
            return 0;
        }

        @Override
        public void close() throws IOException {
        }
    };
    MultiWriterWatermarkManager watermarkManager = new MultiWriterWatermarkManager(mockWatermarkStorage, 1000, Optional.<Logger>absent());
    watermarkManager.registerWriter(mockWatermarkWriter);
    try {
        watermarkManager.start();
    } catch (Exception e) {
        Assert.fail("Should not throw exception", e);
    }
    Thread.sleep(2000);
    watermarkManager.close();
    MultiWriterWatermarkManager.CommitStatus commitStatus = watermarkManager.getCommitStatus();
    System.out.println(commitStatus);
    MultiWriterWatermarkManager.RetrievalStatus retrievalStatus = watermarkManager.getRetrievalStatus();
    Assert.assertTrue(retrievalStatus.getLastWatermarkRetrievalAttemptTimestampMillis() > 0);
    Assert.assertTrue(retrievalStatus.getLastWatermarkRetrievalSuccessTimestampMillis() > 0);
    Assert.assertTrue(retrievalStatus.getLastWatermarkRetrievalFailureTimestampMillis() == 0);
    System.out.println(retrievalStatus);
}
Also used : RecordEnvelope(org.apache.gobblin.stream.RecordEnvelope) IOException(java.io.IOException) DefaultCheckpointableWatermark(org.apache.gobblin.source.extractor.DefaultCheckpointableWatermark) IOException(java.io.IOException) ArrayList(java.util.ArrayList) List(java.util.List) CheckpointableWatermark(org.apache.gobblin.source.extractor.CheckpointableWatermark) DefaultCheckpointableWatermark(org.apache.gobblin.source.extractor.DefaultCheckpointableWatermark) LongWatermark(org.apache.gobblin.source.extractor.extract.LongWatermark) Test(org.testng.annotations.Test)

Example 4 with CheckpointableWatermark

use of org.apache.gobblin.source.extractor.CheckpointableWatermark in project incubator-gobblin by apache.

the class PartitionedWriterTest method testWatermarkComputation.

public void testWatermarkComputation(Long committed, Long unacknowledged, Long expected) throws IOException {
    State state = new State();
    state.setProp(ConfigurationKeys.WRITER_PARTITIONER_CLASS, TestPartitioner.class.getCanonicalName());
    String defaultSource = "default";
    WatermarkAwareWriter mockDataWriter = mock(WatermarkAwareWriter.class);
    when(mockDataWriter.isWatermarkCapable()).thenReturn(true);
    when(mockDataWriter.getCommittableWatermark()).thenReturn(Collections.singletonMap(defaultSource, new DefaultCheckpointableWatermark(defaultSource, new LongWatermark(committed))));
    when(mockDataWriter.getUnacknowledgedWatermark()).thenReturn(Collections.singletonMap(defaultSource, new DefaultCheckpointableWatermark(defaultSource, new LongWatermark(unacknowledged))));
    PartitionAwareDataWriterBuilder builder = mock(PartitionAwareDataWriterBuilder.class);
    when(builder.validatePartitionSchema(any(Schema.class))).thenReturn(true);
    when(builder.forPartition(any(GenericRecord.class))).thenReturn(builder);
    when(builder.withWriterId(any(String.class))).thenReturn(builder);
    when(builder.build()).thenReturn(mockDataWriter);
    PartitionedDataWriter writer = new PartitionedDataWriter<String, String>(builder, state);
    RecordEnvelope<String> recordEnvelope = new RecordEnvelope<String>("0");
    recordEnvelope.addCallBack(new AcknowledgableWatermark(new DefaultCheckpointableWatermark(defaultSource, new LongWatermark(0))));
    writer.writeEnvelope(recordEnvelope);
    Map<String, CheckpointableWatermark> watermark = writer.getCommittableWatermark();
    System.out.println(watermark.toString());
    if (expected == null) {
        Assert.assertTrue(watermark.isEmpty(), "Expected watermark to be absent");
    } else {
        Assert.assertTrue(watermark.size() == 1);
        Assert.assertEquals((long) expected, ((LongWatermark) watermark.values().iterator().next().getWatermark()).getValue());
    }
}
Also used : RecordEnvelope(org.apache.gobblin.stream.RecordEnvelope) TestPartitioner(org.apache.gobblin.writer.test.TestPartitioner) Schema(org.apache.avro.Schema) DefaultCheckpointableWatermark(org.apache.gobblin.source.extractor.DefaultCheckpointableWatermark) State(org.apache.gobblin.configuration.State) GenericRecord(org.apache.avro.generic.GenericRecord) CheckpointableWatermark(org.apache.gobblin.source.extractor.CheckpointableWatermark) DefaultCheckpointableWatermark(org.apache.gobblin.source.extractor.DefaultCheckpointableWatermark) LongWatermark(org.apache.gobblin.source.extractor.extract.LongWatermark)

Example 5 with CheckpointableWatermark

use of org.apache.gobblin.source.extractor.CheckpointableWatermark in project incubator-gobblin by apache.

the class KafkaSimpleStreamingTest method testExtractor.

/**
 * testExtractor checks that the extractor code does the right thing. First it creates a topic, and sets up a source to point
 * to it. workUnits are generated from the source (only a single wU should be returned). Then it writes a record to this topic
 * and reads back from the extractor to verify the right record is returned. A second record is then written and read back
 * through the extractor to verify poll works as expected. Finally we test the commit api by forcing a commit and then starting
 * a new extractor to ensure we fetch data from after the commit. The commit is also verified in Kafka directly
 * @throws IOException
 * @throws InterruptedException
 * @throws DataRecordException
 */
@Test(timeOut = 10000)
public void testExtractor() throws IOException, InterruptedException, DataRecordException {
    final String topic = "testSimpleStreamingExtractor";
    _kafkaTestHelper.provisionTopic(topic);
    Properties props = new Properties();
    props.put("bootstrap.servers", "localhost:" + _kafkaTestHelper.getKafkaServerPort());
    props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
    props.put("value.serializer", "org.apache.kafka.common.serialization.ByteArraySerializer");
    Producer<String, byte[]> producer = new KafkaProducer<>(props);
    final byte[] record_1 = { 0, 1, 3 };
    final byte[] record_2 = { 2, 4, 6 };
    final byte[] record_3 = { 5, 7, 9 };
    // Write a sample record to the topic
    producer.send(new ProducerRecord<String, byte[]>(topic, topic, record_1));
    producer.flush();
    KafkaSimpleStreamingExtractor<String, byte[]> kSSE = getStreamingExtractor(topic);
    TopicPartition tP = new TopicPartition(topic, 0);
    KafkaSimpleStreamingExtractor.KafkaWatermark kwm = new KafkaSimpleStreamingExtractor.KafkaWatermark(tP, new LongWatermark(0));
    byte[] reuse = new byte[1];
    RecordEnvelope<byte[]> oldRecord = new RecordEnvelope<>(reuse, kwm);
    Map<String, CheckpointableWatermark> committedWatermarks = new HashMap<>();
    WatermarkStorage mockWatermarkStorage = mock(WatermarkStorage.class);
    when(mockWatermarkStorage.getCommittedWatermarks(any(Class.class), any(Iterable.class))).thenReturn(committedWatermarks);
    kSSE.start(mockWatermarkStorage);
    // read and verify the record matches we just wrote
    RecordEnvelope<byte[]> record = kSSE.readRecordEnvelope();
    Assert.assertEquals(record.getRecord(), record_1);
    // write a second record.
    producer.send(new ProducerRecord<String, byte[]>(topic, topic, record_2));
    producer.flush();
    // read the second record using same extractor to verify it matches whats expected
    record = kSSE.readRecordEnvelope();
    Assert.assertEquals(record.getRecord(), record_2);
    // Commit the watermark
    committedWatermarks.put(record.getWatermark().getSource(), record.getWatermark());
    // write a third record.
    producer.send(new ProducerRecord<String, byte[]>(topic, topic, record_3));
    producer.flush();
    // recreate extractor to force a seek.
    kSSE = getStreamingExtractor(topic);
    kSSE.start(mockWatermarkStorage);
    record = kSSE.readRecordEnvelope();
    // check it matches the data written
    Assert.assertEquals(record.getRecord(), record_3);
}
Also used : KafkaProducer(org.apache.kafka.clients.producer.KafkaProducer) WatermarkStorage(org.apache.gobblin.writer.WatermarkStorage) RecordEnvelope(org.apache.gobblin.stream.RecordEnvelope) HashMap(java.util.HashMap) Properties(java.util.Properties) KafkaSimpleStreamingExtractor(org.apache.gobblin.source.extractor.extract.kafka.KafkaSimpleStreamingExtractor) TopicPartition(org.apache.kafka.common.TopicPartition) CheckpointableWatermark(org.apache.gobblin.source.extractor.CheckpointableWatermark) LongWatermark(org.apache.gobblin.source.extractor.extract.LongWatermark) Test(org.testng.annotations.Test)

Aggregations

CheckpointableWatermark (org.apache.gobblin.source.extractor.CheckpointableWatermark)15 DefaultCheckpointableWatermark (org.apache.gobblin.source.extractor.DefaultCheckpointableWatermark)11 LongWatermark (org.apache.gobblin.source.extractor.extract.LongWatermark)9 Test (org.testng.annotations.Test)9 RecordEnvelope (org.apache.gobblin.stream.RecordEnvelope)4 IOException (java.io.IOException)3 ArrayList (java.util.ArrayList)3 Properties (java.util.Properties)3 Random (java.util.Random)3 TreeSet (java.util.TreeSet)3 ScheduledExecutorService (java.util.concurrent.ScheduledExecutorService)3 ScheduledThreadPoolExecutor (java.util.concurrent.ScheduledThreadPoolExecutor)3 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)3 HashMap (java.util.HashMap)2 WatermarkStorage (org.apache.gobblin.writer.WatermarkStorage)2 Config (com.typesafe.config.Config)1 List (java.util.List)1 Schema (org.apache.avro.Schema)1 GenericRecord (org.apache.avro.generic.GenericRecord)1 State (org.apache.gobblin.configuration.State)1