use of org.apache.gobblin.configuration.SourceState in project incubator-gobblin by apache.
the class KafkaSimpleStreamingTest method getWorkUnits.
private List<WorkUnit> getWorkUnits(String topic) {
SourceState ss = new SourceState();
ss.setProp(ConfigurationKeys.KAFKA_BROKERS, "localhost:" + _kafkaTestHelper.getKafkaServerPort());
ss.setProp(KafkaSimpleStreamingSource.TOPIC_WHITELIST, topic);
ss.setProp(ConfigurationKeys.JOB_NAME_KEY, topic);
ss.setProp(KafkaSimpleStreamingSource.TOPIC_KEY_DESERIALIZER, "org.apache.kafka.common.serialization.StringDeserializer");
ss.setProp(KafkaSimpleStreamingSource.TOPIC_VALUE_DESERIALIZER, "org.apache.kafka.common.serialization.ByteArrayDeserializer");
KafkaSimpleStreamingSource<String, byte[]> simpleSource = new KafkaSimpleStreamingSource<String, byte[]>();
return simpleSource.getWorkunits(ss);
}
use of org.apache.gobblin.configuration.SourceState in project incubator-gobblin by apache.
the class Kafka09JsonIntegrationTest method testHappyPath.
@Test
public void testHappyPath() throws IOException, DataRecordException {
String topic = "testKafka09JsonSource";
kafkaTestHelper.provisionTopic(topic);
SourceState state = createSourceState(topic);
// Produce a record
state.setProp(KAFKA_PRODUCER_CONFIG_PREFIX + "bootstrap.servers", "localhost:" + kafkaTestHelper.getKafkaServerPort());
state.setProp(KAFKA_TOPIC, topic);
Destination destination = Destination.of(Destination.DestinationType.KAFKA, state);
Kafka09JsonObjectWriterBuilder writerBuilder = new Kafka09JsonObjectWriterBuilder();
writerBuilder.writeTo(destination);
DataWriter<JsonObject> writer = writerBuilder.build();
final String json = "{\"number\":27}";
JsonObject record = gson.fromJson(json, JsonObject.class);
writer.write(record);
writer.flush();
writer.close();
Kafka09JsonSource source = new Kafka09JsonSource();
List<WorkUnit> workUnitList = source.getWorkunits(state);
// Test the right value serializer is set
Assert.assertEquals(state.getProp(Kafka09ConsumerClient.GOBBLIN_CONFIG_VALUE_DESERIALIZER_CLASS_KEY), Kafka09JsonSource.KafkaGsonDeserializer.class.getName());
// Test there is only one non-empty work unit
MultiWorkUnitUnpackingIterator iterator = new MultiWorkUnitUnpackingIterator(workUnitList.iterator());
Assert.assertTrue(iterator.hasNext());
WorkUnit workUnit = iterator.next();
Assert.assertEquals(workUnit.getProp(ConfigurationKeys.EXTRACT_TABLE_NAME_KEY), topic);
Assert.assertFalse(iterator.hasNext());
// Test extractor
WorkUnitState workUnitState = new WorkUnitState(workUnit, state);
final String jsonSchema = "[{\"columnName\":\"number\",\"comment\":\"\",\"isNullable\":\"false\",\"dataType\":{\"type\":\"int\"}}]";
workUnitState.setProp("source.kafka.json.schema", jsonSchema);
Extractor<JsonArray, JsonObject> extractor = source.getExtractor(workUnitState);
Assert.assertEquals(extractor.getSchema().toString(), jsonSchema);
Assert.assertEquals(extractor.readRecord(null).toString(), json);
}
use of org.apache.gobblin.configuration.SourceState in project incubator-gobblin by apache.
the class PartitionLevelWatermarkerTest method testNoPreviousWatermarkWorkunits.
@Test
public void testNoPreviousWatermarkWorkunits() throws Exception {
// Create one previous workunit with IS_WATERMARK_WORKUNIT_KEY set to true
WorkUnitState previousWus = new WorkUnitState();
previousWus.setProp(ConfigurationKeys.DATASET_URN_KEY, "test_dataset_urn");
previousWus.setProp(PartitionLevelWatermarker.IS_WATERMARK_WORKUNIT_KEY, true);
previousWus.setActualHighWatermark(new MultiKeyValueLongWatermark(ImmutableMap.of("2015", 100l)));
// Create one previous workunit with IS_WATERMARK_WORKUNIT_KEY not set (false)
WorkUnitState previousWus2 = new WorkUnitState();
previousWus2.setProp(ConfigurationKeys.DATASET_URN_KEY, "test_dataset_urn2");
previousWus2.setActualHighWatermark(new LongWatermark(101l));
SourceState state = new SourceState(new State(), Lists.newArrayList(previousWus, previousWus2));
PartitionLevelWatermarker watermarker = new PartitionLevelWatermarker(state);
Assert.assertEquals(watermarker.getPreviousWatermarks().size(), 1);
Assert.assertEquals(watermarker.getPreviousWatermarks().get("test_dataset_urn"), ImmutableMap.of("2015", 100l));
}
use of org.apache.gobblin.configuration.SourceState in project incubator-gobblin by apache.
the class PartitionLevelWatermarkerTest method testStateStoreReadWrite.
@Test
public void testStateStoreReadWrite() throws Exception {
String dbName = "testStateStoreReadWrite";
LocalHiveMetastoreTestUtils.getInstance().dropDatabaseIfExists(dbName);
PartitionLevelWatermarker watermarker0 = new PartitionLevelWatermarker(new SourceState());
Table mockTable = localTestTable(dbName, "table1", true);
watermarker0.onTableProcessBegin(mockTable, 0l);
long now = new DateTime().getMillis();
watermarker0.onPartitionProcessBegin(localTestPartition(mockTable, ImmutableList.of("2016")), 0, now);
List<WorkUnit> workunits = Lists.newArrayList();
watermarker0.onGetWorkunitsEnd(workunits);
@SuppressWarnings("deprecation") WorkUnitState previousWus = new WorkUnitState(workunits.get(0));
watermarker0.setActualHighWatermark(previousWus);
SourceState state = new SourceState(new State(), Lists.newArrayList(previousWus));
PartitionLevelWatermarker watermarker = new PartitionLevelWatermarker(state);
Assert.assertEquals(watermarker.getPreviousWatermarks().size(), 1);
Assert.assertEquals(watermarker.getPreviousWatermarks().get(dbName + "@table1"), ImmutableMap.of("2016", now));
}
use of org.apache.gobblin.configuration.SourceState in project incubator-gobblin by apache.
the class PartitionLevelWatermarkerTest method testDroppedPartitions.
@Test
public void testDroppedPartitions() throws Exception {
WorkUnitState previousWus = new WorkUnitState();
previousWus.setProp(ConfigurationKeys.DATASET_URN_KEY, "db@test_dataset_urn");
previousWus.setProp(PartitionLevelWatermarker.IS_WATERMARK_WORKUNIT_KEY, true);
previousWus.setActualHighWatermark(new MultiKeyValueLongWatermark(ImmutableMap.of("2015-01", 100l, "2015-02", 101l)));
SourceState state = new SourceState(new State(), Lists.newArrayList(previousWus));
PartitionLevelWatermarker watermarker = new PartitionLevelWatermarker(state);
Table table = mockTable("test_dataset_urn");
Mockito.when(table.getPartitionKeys()).thenReturn(ImmutableList.of(new FieldSchema("year", "string", "")));
Partition partition2015 = mockPartition(table, ImmutableList.of("2015"));
// partition 2015 replaces 2015-01 and 2015-02
Mockito.when(partition2015.getParameters()).thenReturn(ImmutableMap.of(AbstractAvroToOrcConverter.REPLACED_PARTITIONS_HIVE_METASTORE_KEY, "2015-01|2015-02"));
watermarker.onPartitionProcessBegin(partition2015, 0l, 0l);
Assert.assertEquals(watermarker.getExpectedHighWatermarks().get("db@test_dataset_urn"), ImmutableMap.of("2015", 0l));
}
Aggregations