use of org.apache.gobblin.source.extractor.extract.LongWatermark in project incubator-gobblin by apache.
the class KafkaSimpleStreamingTest method testThreadedExtractor.
/**
* testThreadedExtractor verifies its safe to call close from a different thread when the original thread is stuck in poll
* We create a topic and then wait for the extractor to return a record (which it never does) in a side thread. The
* original thread calls close on the extractor and verifies the waiting thread gets an expected exception and exits
* as expected.
*/
@Test(timeOut = 10000)
public void testThreadedExtractor() {
final String topic = "testThreadedExtractor";
final KafkaSimpleStreamingExtractor<String, byte[]> kSSE = getStreamingExtractor(topic);
Thread waitingThread = new Thread() {
public void run() {
TopicPartition tP = new TopicPartition(topic, 0);
KafkaSimpleStreamingExtractor.KafkaWatermark kwm = new KafkaSimpleStreamingExtractor.KafkaWatermark(tP, new LongWatermark(0));
byte[] reuse = new byte[1];
RecordEnvelope<byte[]> oldRecord = new RecordEnvelope<>(reuse, kwm);
try {
RecordEnvelope<byte[]> record = kSSE.readRecordEnvelope();
} catch (Exception e) {
Assert.assertTrue((e instanceof WakeupException) || (e instanceof ClosedChannelException));
}
}
};
waitingThread.start();
try {
kSSE.close();
waitingThread.join();
} catch (Exception e) {
// should never come here
throw new Error(e);
}
}
use of org.apache.gobblin.source.extractor.extract.LongWatermark in project incubator-gobblin by apache.
the class KafkaSimpleStreamingTest method testExtractor.
/**
* testExtractor checks that the extractor code does the right thing. First it creates a topic, and sets up a source to point
* to it. workUnits are generated from the source (only a single wU should be returned). Then it writes a record to this topic
* and reads back from the extractor to verify the right record is returned. A second record is then written and read back
* through the extractor to verify poll works as expected. Finally we test the commit api by forcing a commit and then starting
* a new extractor to ensure we fetch data from after the commit. The commit is also verified in Kafka directly
* @throws IOException
* @throws InterruptedException
* @throws DataRecordException
*/
@Test(timeOut = 10000)
public void testExtractor() throws IOException, InterruptedException, DataRecordException {
final String topic = "testSimpleStreamingExtractor";
_kafkaTestHelper.provisionTopic(topic);
Properties props = new Properties();
props.put("bootstrap.servers", "localhost:" + _kafkaTestHelper.getKafkaServerPort());
props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
props.put("value.serializer", "org.apache.kafka.common.serialization.ByteArraySerializer");
Producer<String, byte[]> producer = new KafkaProducer<>(props);
final byte[] record_1 = { 0, 1, 3 };
final byte[] record_2 = { 2, 4, 6 };
final byte[] record_3 = { 5, 7, 9 };
// Write a sample record to the topic
producer.send(new ProducerRecord<String, byte[]>(topic, topic, record_1));
producer.flush();
KafkaSimpleStreamingExtractor<String, byte[]> kSSE = getStreamingExtractor(topic);
TopicPartition tP = new TopicPartition(topic, 0);
KafkaSimpleStreamingExtractor.KafkaWatermark kwm = new KafkaSimpleStreamingExtractor.KafkaWatermark(tP, new LongWatermark(0));
byte[] reuse = new byte[1];
RecordEnvelope<byte[]> oldRecord = new RecordEnvelope<>(reuse, kwm);
Map<String, CheckpointableWatermark> committedWatermarks = new HashMap<>();
WatermarkStorage mockWatermarkStorage = mock(WatermarkStorage.class);
when(mockWatermarkStorage.getCommittedWatermarks(any(Class.class), any(Iterable.class))).thenReturn(committedWatermarks);
kSSE.start(mockWatermarkStorage);
// read and verify the record matches we just wrote
RecordEnvelope<byte[]> record = kSSE.readRecordEnvelope();
Assert.assertEquals(record.getRecord(), record_1);
// write a second record.
producer.send(new ProducerRecord<String, byte[]>(topic, topic, record_2));
producer.flush();
// read the second record using same extractor to verify it matches whats expected
record = kSSE.readRecordEnvelope();
Assert.assertEquals(record.getRecord(), record_2);
// Commit the watermark
committedWatermarks.put(record.getWatermark().getSource(), record.getWatermark());
// write a third record.
producer.send(new ProducerRecord<String, byte[]>(topic, topic, record_3));
producer.flush();
// recreate extractor to force a seek.
kSSE = getStreamingExtractor(topic);
kSSE.start(mockWatermarkStorage);
record = kSSE.readRecordEnvelope();
// check it matches the data written
Assert.assertEquals(record.getRecord(), record_3);
}
use of org.apache.gobblin.source.extractor.extract.LongWatermark in project incubator-gobblin by apache.
the class PartitionLevelWatermarkerTest method testNoPreviousWatermarkWorkunits.
@Test
public void testNoPreviousWatermarkWorkunits() throws Exception {
// Create one previous workunit with IS_WATERMARK_WORKUNIT_KEY set to true
WorkUnitState previousWus = new WorkUnitState();
previousWus.setProp(ConfigurationKeys.DATASET_URN_KEY, "test_dataset_urn");
previousWus.setProp(PartitionLevelWatermarker.IS_WATERMARK_WORKUNIT_KEY, true);
previousWus.setActualHighWatermark(new MultiKeyValueLongWatermark(ImmutableMap.of("2015", 100l)));
// Create one previous workunit with IS_WATERMARK_WORKUNIT_KEY not set (false)
WorkUnitState previousWus2 = new WorkUnitState();
previousWus2.setProp(ConfigurationKeys.DATASET_URN_KEY, "test_dataset_urn2");
previousWus2.setActualHighWatermark(new LongWatermark(101l));
SourceState state = new SourceState(new State(), Lists.newArrayList(previousWus, previousWus2));
PartitionLevelWatermarker watermarker = new PartitionLevelWatermarker(state);
Assert.assertEquals(watermarker.getPreviousWatermarks().size(), 1);
Assert.assertEquals(watermarker.getPreviousWatermarks().get("test_dataset_urn"), ImmutableMap.of("2015", 100l));
}
use of org.apache.gobblin.source.extractor.extract.LongWatermark in project incubator-gobblin by apache.
the class TableLevelWatermarkerTest method testPartitionWatermarks.
/**
* Make sure that all partitions get the same previous high watermark (table's watermark)
*/
@Test
public void testPartitionWatermarks() throws Exception {
WorkUnitState previousWus = new WorkUnitState();
previousWus.setProp(ConfigurationKeys.DATASET_URN_KEY, "test_table");
previousWus.setActualHighWatermark(new LongWatermark(100l));
SourceState state = new SourceState(new State(), Lists.newArrayList(previousWus));
TableLevelWatermarker watermarker = new TableLevelWatermarker(state);
Table mockTable = mockTable("test_table");
Assert.assertEquals(watermarker.getPreviousHighWatermark(mockTable), new LongWatermark(100l));
Assert.assertEquals(watermarker.getPreviousHighWatermark(mockPartition(mockTable, ImmutableList.of("2015"))), new LongWatermark(100l));
Assert.assertEquals(watermarker.getPreviousHighWatermark(mockPartition(mockTable, ImmutableList.of("2016"))), new LongWatermark(100l));
}
use of org.apache.gobblin.source.extractor.extract.LongWatermark in project incubator-gobblin by apache.
the class BackfillHiveSourceTest method testWhitelist.
@Test
public void testWhitelist() throws Exception {
BackfillHiveSource backfillHiveSource = new BackfillHiveSource();
SourceState state = new SourceState();
state.setProp(BackfillHiveSource.BACKFILL_SOURCE_PARTITION_WHITELIST_KEY, "service@logEvent@datepartition=2016-08-04-00,service@logEvent@datepartition=2016-08-05-00");
backfillHiveSource.initBackfillHiveSource(state);
Partition pass1 = Mockito.mock(Partition.class, Mockito.RETURNS_SMART_NULLS);
Mockito.when(pass1.getCompleteName()).thenReturn("service@logEvent@datepartition=2016-08-04-00");
Partition pass2 = Mockito.mock(Partition.class, Mockito.RETURNS_SMART_NULLS);
Mockito.when(pass2.getCompleteName()).thenReturn("service@logEvent@datepartition=2016-08-05-00");
Partition fail = Mockito.mock(Partition.class, Mockito.RETURNS_SMART_NULLS);
Mockito.when(fail.getCompleteName()).thenReturn("service@logEvent@datepartition=2016-08-06-00");
Assert.assertTrue(backfillHiveSource.shouldCreateWorkunit(pass1, new LongWatermark(0)));
Assert.assertTrue(backfillHiveSource.shouldCreateWorkunit(pass2, new LongWatermark(0)));
Assert.assertFalse(backfillHiveSource.shouldCreateWorkunit(fail, new LongWatermark(0)));
}
Aggregations