use of org.apache.gobblin.source.extractor.CheckpointableWatermark in project incubator-gobblin by apache.
the class FineGrainedWatermarkTrackerTest method testConcurrentWatermarkTracker.
/**
* A concurrent test, attempts fired in a single thread, but acks come in from multiple threads,
* out of order.
*/
@Test
public static void testConcurrentWatermarkTracker() throws IOException, InterruptedException {
Random random = new Random();
ScheduledExecutorService ackingService = new ScheduledThreadPoolExecutor(100, ExecutorsUtils.defaultThreadFactory());
for (int j = 0; j < 100; ++j) {
FineGrainedWatermarkTracker tracker = new FineGrainedWatermarkTracker(ConfigFactory.empty());
tracker.start();
int numWatermarks = 1 + random.nextInt(1000);
AcknowledgableWatermark[] acknowledgableWatermarks = new AcknowledgableWatermark[numWatermarks];
SortedSet<Integer> holes = new TreeSet<>();
final AtomicInteger numAcks = new AtomicInteger(0);
for (int i = 0; i < numWatermarks; ++i) {
CheckpointableWatermark checkpointableWatermark = new DefaultCheckpointableWatermark("default", new LongWatermark(i));
final AcknowledgableWatermark ackable = new AcknowledgableWatermark(checkpointableWatermark);
tracker.track(ackable);
acknowledgableWatermarks[i] = ackable;
// ack or not
boolean ack = random.nextBoolean();
if (ack) {
numAcks.incrementAndGet();
long sleepTime = random.nextInt(100);
ackingService.schedule(new Callable<Object>() {
@Override
public Object call() throws Exception {
ackable.ack();
numAcks.decrementAndGet();
return null;
}
}, sleepTime, TimeUnit.MILLISECONDS);
} else {
holes.add(i);
}
}
while (numAcks.get() != 0) {
log.info("Waiting for " + numAcks.get() + " acks");
Thread.sleep(100);
}
verifyCommitables(tracker, holes, numWatermarks - 1);
tracker.close();
}
}
use of org.apache.gobblin.source.extractor.CheckpointableWatermark in project incubator-gobblin by apache.
the class FineGrainedWatermarkTrackerTest method testSweep.
/**
* Tests that sweep is sweeping the correct number of entries.
*/
@Test
public static void testSweep() {
Random random = new Random();
for (int j = 0; j < 1000; ++j) {
FineGrainedWatermarkTracker tracker = new FineGrainedWatermarkTracker(ConfigFactory.empty());
tracker.setAutoStart(false);
int numWatermarks = 1 + random.nextInt(1000);
AcknowledgableWatermark[] acknowledgableWatermarks = new AcknowledgableWatermark[numWatermarks];
for (int i = 0; i < numWatermarks; ++i) {
CheckpointableWatermark checkpointableWatermark = new DefaultCheckpointableWatermark("default", new LongWatermark(i));
AcknowledgableWatermark ackable = new AcknowledgableWatermark(checkpointableWatermark);
acknowledgableWatermarks[i] = ackable;
tracker.track(ackable);
}
int numMissingAcks = random.nextInt(numWatermarks);
SortedSet<Integer> holes = new TreeSet<>();
for (int i = 0; i < numMissingAcks; ++i) {
holes.add(random.nextInt(numWatermarks));
}
for (int i = 0; i < numWatermarks; ++i) {
if (!holes.contains(i)) {
acknowledgableWatermarks[i].ack();
}
}
verifyCommitables(tracker, holes, numWatermarks - 1);
int swept = tracker.sweep();
if (holes.isEmpty()) {
Assert.assertEquals(swept, numWatermarks - 1);
} else {
if (holes.contains(0)) {
Assert.assertEquals(swept, 0);
} else {
Assert.assertEquals(swept, holes.first() - 1);
}
}
verifyCommitables(tracker, holes, numWatermarks - 1);
}
}
use of org.apache.gobblin.source.extractor.CheckpointableWatermark in project incubator-gobblin by apache.
the class MultiWriterWatermarkManagerTest method testFlakyWatermarkStorage.
/**
* Test that in the presence of intermittent commit successes and failures, we continue to make progress
*/
@Test
public void testFlakyWatermarkStorage() throws IOException, InterruptedException {
final int failEvery = 2;
WatermarkStorage mockWatermarkStorage = new WatermarkStorage() {
private int watermarkInstance = 0;
private List<CheckpointableWatermark> checkpointed = new ArrayList<>();
@Override
public void commitWatermarks(java.lang.Iterable<CheckpointableWatermark> watermarks) throws IOException {
++watermarkInstance;
if (watermarkInstance % failEvery == 0) {
throw new IOException("Failed to write");
} else {
checkpointed.clear();
for (CheckpointableWatermark watermark : watermarks) {
checkpointed.add(watermark);
}
}
}
@Override
public Map<String, CheckpointableWatermark> getCommittedWatermarks(Class<? extends CheckpointableWatermark> watermarkClass, Iterable<String> sourcePartitions) throws IOException {
return null;
}
};
WatermarkAwareWriter mockWatermarkWriter = new WatermarkAwareWriter() {
private long watermark = 0;
@Override
public boolean isWatermarkCapable() {
return true;
}
@Override
public void writeEnvelope(RecordEnvelope recordEnvelope) throws IOException {
throw new UnsupportedOperationException();
}
@Override
public Map<String, CheckpointableWatermark> getCommittableWatermark() {
watermark++;
return Collections.singletonMap("default", (CheckpointableWatermark) new DefaultCheckpointableWatermark("default", new LongWatermark(watermark)));
}
@Override
public Map<String, CheckpointableWatermark> getUnacknowledgedWatermark() {
return null;
}
@Override
public void write(Object record) throws IOException {
throw new UnsupportedOperationException();
}
@Override
public void commit() throws IOException {
throw new UnsupportedOperationException();
}
@Override
public void cleanup() throws IOException {
throw new UnsupportedOperationException();
}
@Override
public long recordsWritten() {
return 0;
}
@Override
public long bytesWritten() throws IOException {
return 0;
}
@Override
public void close() throws IOException {
}
};
MultiWriterWatermarkManager watermarkManager = new MultiWriterWatermarkManager(mockWatermarkStorage, 1000, Optional.<Logger>absent());
watermarkManager.registerWriter(mockWatermarkWriter);
try {
watermarkManager.start();
} catch (Exception e) {
Assert.fail("Should not throw exception", e);
}
Thread.sleep(2000);
watermarkManager.close();
MultiWriterWatermarkManager.CommitStatus commitStatus = watermarkManager.getCommitStatus();
System.out.println(commitStatus);
MultiWriterWatermarkManager.RetrievalStatus retrievalStatus = watermarkManager.getRetrievalStatus();
Assert.assertTrue(retrievalStatus.getLastWatermarkRetrievalAttemptTimestampMillis() > 0);
Assert.assertTrue(retrievalStatus.getLastWatermarkRetrievalSuccessTimestampMillis() > 0);
Assert.assertTrue(retrievalStatus.getLastWatermarkRetrievalFailureTimestampMillis() == 0);
System.out.println(retrievalStatus);
}
use of org.apache.gobblin.source.extractor.CheckpointableWatermark in project incubator-gobblin by apache.
the class PartitionedWriterTest method testWatermarkComputation.
public void testWatermarkComputation(Long committed, Long unacknowledged, Long expected) throws IOException {
State state = new State();
state.setProp(ConfigurationKeys.WRITER_PARTITIONER_CLASS, TestPartitioner.class.getCanonicalName());
String defaultSource = "default";
WatermarkAwareWriter mockDataWriter = mock(WatermarkAwareWriter.class);
when(mockDataWriter.isWatermarkCapable()).thenReturn(true);
when(mockDataWriter.getCommittableWatermark()).thenReturn(Collections.singletonMap(defaultSource, new DefaultCheckpointableWatermark(defaultSource, new LongWatermark(committed))));
when(mockDataWriter.getUnacknowledgedWatermark()).thenReturn(Collections.singletonMap(defaultSource, new DefaultCheckpointableWatermark(defaultSource, new LongWatermark(unacknowledged))));
PartitionAwareDataWriterBuilder builder = mock(PartitionAwareDataWriterBuilder.class);
when(builder.validatePartitionSchema(any(Schema.class))).thenReturn(true);
when(builder.forPartition(any(GenericRecord.class))).thenReturn(builder);
when(builder.withWriterId(any(String.class))).thenReturn(builder);
when(builder.build()).thenReturn(mockDataWriter);
PartitionedDataWriter writer = new PartitionedDataWriter<String, String>(builder, state);
RecordEnvelope<String> recordEnvelope = new RecordEnvelope<String>("0");
recordEnvelope.addCallBack(new AcknowledgableWatermark(new DefaultCheckpointableWatermark(defaultSource, new LongWatermark(0))));
writer.writeEnvelope(recordEnvelope);
Map<String, CheckpointableWatermark> watermark = writer.getCommittableWatermark();
System.out.println(watermark.toString());
if (expected == null) {
Assert.assertTrue(watermark.isEmpty(), "Expected watermark to be absent");
} else {
Assert.assertTrue(watermark.size() == 1);
Assert.assertEquals((long) expected, ((LongWatermark) watermark.values().iterator().next().getWatermark()).getValue());
}
}
use of org.apache.gobblin.source.extractor.CheckpointableWatermark in project incubator-gobblin by apache.
the class KafkaSimpleStreamingTest method testExtractor.
/**
* testExtractor checks that the extractor code does the right thing. First it creates a topic, and sets up a source to point
* to it. workUnits are generated from the source (only a single wU should be returned). Then it writes a record to this topic
* and reads back from the extractor to verify the right record is returned. A second record is then written and read back
* through the extractor to verify poll works as expected. Finally we test the commit api by forcing a commit and then starting
* a new extractor to ensure we fetch data from after the commit. The commit is also verified in Kafka directly
* @throws IOException
* @throws InterruptedException
* @throws DataRecordException
*/
@Test(timeOut = 10000)
public void testExtractor() throws IOException, InterruptedException, DataRecordException {
final String topic = "testSimpleStreamingExtractor";
_kafkaTestHelper.provisionTopic(topic);
Properties props = new Properties();
props.put("bootstrap.servers", "localhost:" + _kafkaTestHelper.getKafkaServerPort());
props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
props.put("value.serializer", "org.apache.kafka.common.serialization.ByteArraySerializer");
Producer<String, byte[]> producer = new KafkaProducer<>(props);
final byte[] record_1 = { 0, 1, 3 };
final byte[] record_2 = { 2, 4, 6 };
final byte[] record_3 = { 5, 7, 9 };
// Write a sample record to the topic
producer.send(new ProducerRecord<String, byte[]>(topic, topic, record_1));
producer.flush();
KafkaSimpleStreamingExtractor<String, byte[]> kSSE = getStreamingExtractor(topic);
TopicPartition tP = new TopicPartition(topic, 0);
KafkaSimpleStreamingExtractor.KafkaWatermark kwm = new KafkaSimpleStreamingExtractor.KafkaWatermark(tP, new LongWatermark(0));
byte[] reuse = new byte[1];
RecordEnvelope<byte[]> oldRecord = new RecordEnvelope<>(reuse, kwm);
Map<String, CheckpointableWatermark> committedWatermarks = new HashMap<>();
WatermarkStorage mockWatermarkStorage = mock(WatermarkStorage.class);
when(mockWatermarkStorage.getCommittedWatermarks(any(Class.class), any(Iterable.class))).thenReturn(committedWatermarks);
kSSE.start(mockWatermarkStorage);
// read and verify the record matches we just wrote
RecordEnvelope<byte[]> record = kSSE.readRecordEnvelope();
Assert.assertEquals(record.getRecord(), record_1);
// write a second record.
producer.send(new ProducerRecord<String, byte[]>(topic, topic, record_2));
producer.flush();
// read the second record using same extractor to verify it matches whats expected
record = kSSE.readRecordEnvelope();
Assert.assertEquals(record.getRecord(), record_2);
// Commit the watermark
committedWatermarks.put(record.getWatermark().getSource(), record.getWatermark());
// write a third record.
producer.send(new ProducerRecord<String, byte[]>(topic, topic, record_3));
producer.flush();
// recreate extractor to force a seek.
kSSE = getStreamingExtractor(topic);
kSSE.start(mockWatermarkStorage);
record = kSSE.readRecordEnvelope();
// check it matches the data written
Assert.assertEquals(record.getRecord(), record_3);
}
Aggregations