use of org.apache.gobblin.configuration.WorkUnitState in project incubator-gobblin by apache.
the class KafkaDeserializerExtractorTest method testBuiltInConfluentAvroDeserializer.
@Test
public void testBuiltInConfluentAvroDeserializer() throws ReflectiveOperationException {
WorkUnitState mockWorkUnitState = getMockWorkUnitState();
mockWorkUnitState.setProp(KafkaDeserializerExtractor.KAFKA_DESERIALIZER_TYPE, KafkaDeserializerExtractor.Deserializers.CONFLUENT_AVRO.name());
KafkaDeserializerExtractor kafkaDecoderExtractor = new KafkaDeserializerExtractor(mockWorkUnitState) {
@Override
public Object getSchema() {
return SchemaBuilder.record(TEST_RECORD_NAME).namespace(TEST_NAMESPACE).fields().name(TEST_FIELD_NAME).type().stringType().noDefault().endRecord();
}
};
Assert.assertEquals(kafkaDecoderExtractor.getKafkaDeserializer().getClass(), KafkaDeserializerExtractor.Deserializers.CONFLUENT_AVRO.getDeserializerClass());
Assert.assertEquals(kafkaDecoderExtractor.getKafkaSchemaRegistry().getClass(), KafkaDeserializerExtractor.Deserializers.CONFLUENT_AVRO.getSchemaRegistryClass());
}
use of org.apache.gobblin.configuration.WorkUnitState in project incubator-gobblin by apache.
the class KafkaDeserializerExtractorTest method testConfluentJsonDeserializer.
@Test
public void testConfluentJsonDeserializer() throws IOException {
WorkUnitState mockWorkUnitState = getMockWorkUnitState();
mockWorkUnitState.setProp("json.value.type", KafkaRecord.class.getName());
KafkaRecord testKafkaRecord = new KafkaRecord("Hello World");
Serializer<KafkaRecord> kafkaEncoder = new KafkaJsonSerializer<>();
kafkaEncoder.configure(PropertiesUtils.propsToStringKeyMap(mockWorkUnitState.getProperties()), false);
Deserializer<KafkaRecord> kafkaDecoder = new KafkaJsonDeserializer<>();
kafkaDecoder.configure(PropertiesUtils.propsToStringKeyMap(mockWorkUnitState.getProperties()), false);
ByteBuffer testKafkaRecordByteBuffer = ByteBuffer.wrap(kafkaEncoder.serialize(TEST_TOPIC_NAME, testKafkaRecord));
KafkaSchemaRegistry<?, ?> mockKafkaSchemaRegistry = mock(KafkaSchemaRegistry.class);
KafkaDeserializerExtractor kafkaDecoderExtractor = new KafkaDeserializerExtractor(mockWorkUnitState, Optional.fromNullable(Deserializers.CONFLUENT_JSON), kafkaDecoder, mockKafkaSchemaRegistry);
ByteArrayBasedKafkaRecord mockMessageAndOffset = getMockMessageAndOffset(testKafkaRecordByteBuffer);
Assert.assertEquals(kafkaDecoderExtractor.decodeRecord(mockMessageAndOffset), testKafkaRecord);
}
use of org.apache.gobblin.configuration.WorkUnitState in project incubator-gobblin by apache.
the class Kafka09JsonIntegrationTest method testHappyPath.
@Test
public void testHappyPath() throws IOException, DataRecordException {
String topic = "testKafka09JsonSource";
kafkaTestHelper.provisionTopic(topic);
SourceState state = createSourceState(topic);
// Produce a record
state.setProp(KAFKA_PRODUCER_CONFIG_PREFIX + "bootstrap.servers", "localhost:" + kafkaTestHelper.getKafkaServerPort());
state.setProp(KAFKA_TOPIC, topic);
Destination destination = Destination.of(Destination.DestinationType.KAFKA, state);
Kafka09JsonObjectWriterBuilder writerBuilder = new Kafka09JsonObjectWriterBuilder();
writerBuilder.writeTo(destination);
DataWriter<JsonObject> writer = writerBuilder.build();
final String json = "{\"number\":27}";
JsonObject record = gson.fromJson(json, JsonObject.class);
writer.write(record);
writer.flush();
writer.close();
Kafka09JsonSource source = new Kafka09JsonSource();
List<WorkUnit> workUnitList = source.getWorkunits(state);
// Test the right value serializer is set
Assert.assertEquals(state.getProp(Kafka09ConsumerClient.GOBBLIN_CONFIG_VALUE_DESERIALIZER_CLASS_KEY), Kafka09JsonSource.KafkaGsonDeserializer.class.getName());
// Test there is only one non-empty work unit
MultiWorkUnitUnpackingIterator iterator = new MultiWorkUnitUnpackingIterator(workUnitList.iterator());
Assert.assertTrue(iterator.hasNext());
WorkUnit workUnit = iterator.next();
Assert.assertEquals(workUnit.getProp(ConfigurationKeys.EXTRACT_TABLE_NAME_KEY), topic);
Assert.assertFalse(iterator.hasNext());
// Test extractor
WorkUnitState workUnitState = new WorkUnitState(workUnit, state);
final String jsonSchema = "[{\"columnName\":\"number\",\"comment\":\"\",\"isNullable\":\"false\",\"dataType\":{\"type\":\"int\"}}]";
workUnitState.setProp("source.kafka.json.schema", jsonSchema);
Extractor<JsonArray, JsonObject> extractor = source.getExtractor(workUnitState);
Assert.assertEquals(extractor.getSchema().toString(), jsonSchema);
Assert.assertEquals(extractor.readRecord(null).toString(), json);
}
use of org.apache.gobblin.configuration.WorkUnitState in project incubator-gobblin by apache.
the class GoogleWebmasterDataFetcherImplTest method testGetAllPagesWhenDataSizeLessThan5000AndRequestAll.
@Test
public void testGetAllPagesWhenDataSizeLessThan5000AndRequestAll() throws Exception {
GoogleWebmasterClient client = Mockito.mock(GoogleWebmasterClient.class);
List<String> allPages = new ArrayList<>();
for (int i = 0; i < 10; ++i) {
allPages.add(Integer.toString(i));
}
Mockito.when(client.getPages(eq(_property), any(String.class), any(String.class), eq("ALL"), any(Integer.class), any(List.class), any(List.class), eq(0))).thenReturn(allPages);
WorkUnitState workUnitState = new WorkUnitState();
workUnitState.setProp(GoogleWebMasterSource.KEY_PROPERTY, _property);
GoogleWebmasterDataFetcher dataFetcher = new GoogleWebmasterDataFetcherImpl(_property, client, workUnitState);
Collection<ProducerJob> response = dataFetcher.getAllPages(null, null, "ALL", 5000);
List<String> pageStrings = new ArrayList<>();
for (ProducerJob page : response) {
pageStrings.add(page.getPage());
}
Assert.assertTrue(CollectionUtils.isEqualCollection(pageStrings, allPages));
Mockito.verify(client, Mockito.times(2)).getPages(eq(_property), any(String.class), any(String.class), eq("ALL"), any(Integer.class), any(List.class), any(List.class), eq(0));
}
use of org.apache.gobblin.configuration.WorkUnitState in project incubator-gobblin by apache.
the class GoogleWebmasterExtractorIteratorTest method testIterator.
/**
* Test the GoogleWebmasterExtractorIterator to make sure that it first gets all pages based on the filters
* and then for each page, it asks for the queries.
* @throws IOException
*/
@Test
public void testIterator() throws IOException {
GoogleWebmasterDataFetcher client = Mockito.mock(GoogleWebmasterDataFetcher.class);
String country = "USA";
String date = "2016-11-01";
ArrayList<GoogleWebmasterFilter.Dimension> requestedDimensions = new ArrayList<>();
ArrayList<GoogleWebmasterDataFetcher.Metric> requestedMetrics = new ArrayList<>();
ArrayDeque<ProducerJob> allJobs = new ArrayDeque<>();
String page1 = siteProperty + "a/1";
String page2 = siteProperty + "b/1";
allJobs.add(new SimpleProducerJob(page1, date, date));
allJobs.add(new SimpleProducerJob(page2, date, date));
Mockito.when(client.getAllPages(eq(date), eq(date), eq(country), eq(GoogleWebmasterClient.API_ROW_LIMIT))).thenReturn(allJobs);
// Set performSearchAnalyticsQuery Mock1
String[] a1 = { "r1-c1", "r1-c2" };
List<String[]> results1 = new ArrayList<>();
results1.add(a1);
List<ApiDimensionFilter> filters1 = new ArrayList<>();
filters1.add(GoogleWebmasterFilter.countryEqFilter(country));
filters1.add(GoogleWebmasterFilter.pageFilter(GoogleWebmasterFilter.FilterOperator.EQUALS, page1));
Mockito.when(client.performSearchAnalyticsQuery(eq(date), eq(date), eq(GoogleWebmasterClient.API_ROW_LIMIT), eq(requestedDimensions), eq(requestedMetrics), argThat(new CollectionEquals(filters1)))).thenReturn(results1);
// Set performSearchAnalyticsQuery Mock2
String[] a2 = { "r2-c1", "r2-c2" };
List<String[]> results2 = new ArrayList<>();
results2.add(a2);
List<ApiDimensionFilter> filters2 = new ArrayList<>();
filters2.add(GoogleWebmasterFilter.countryEqFilter(country));
filters2.add(GoogleWebmasterFilter.pageFilter(GoogleWebmasterFilter.FilterOperator.EQUALS, page2));
Mockito.when(client.performSearchAnalyticsQuery(eq(date), eq(date), eq(5000), eq(requestedDimensions), eq(requestedMetrics), argThat(new CollectionEquals(filters2)))).thenReturn(results2);
Map<GoogleWebmasterFilter.Dimension, ApiDimensionFilter> map = new HashMap<>();
map.put(GoogleWebmasterFilter.Dimension.COUNTRY, GoogleWebmasterFilter.countryEqFilter(country));
WorkUnitState defaultState = GoogleWebmasterExtractorTest.getWorkUnitState1();
defaultState.setProp(GoogleWebMasterSource.KEY_QUERIES_TUNING_BATCH_SIZE, 1);
GoogleWebmasterExtractorIterator iterator = new GoogleWebmasterExtractorIterator(client, date, date, requestedDimensions, requestedMetrics, map, defaultState);
List<String[]> response = new ArrayList<>();
response.add(iterator.next());
response.add(iterator.next());
Assert.assertTrue(!iterator.hasNext());
Assert.assertTrue(response.contains(a1));
Assert.assertTrue(response.contains(a2));
Mockito.verify(client, Mockito.times(1)).getAllPages(eq(date), eq(date), eq(country), eq(5000));
Mockito.verify(client, Mockito.times(1)).performSearchAnalyticsQuery(eq(date), eq(date), eq(5000), eq(requestedDimensions), eq(requestedMetrics), argThat(new CollectionEquals(filters1)));
Mockito.verify(client, Mockito.times(1)).performSearchAnalyticsQuery(eq(date), eq(date), eq(5000), eq(requestedDimensions), eq(requestedMetrics), argThat(new CollectionEquals(filters2)));
}
Aggregations