Search in sources :

Example 26 with WorkUnitState

use of org.apache.gobblin.configuration.WorkUnitState in project incubator-gobblin by apache.

the class KafkaDeserializerExtractorTest method testBuiltInConfluentAvroDeserializer.

@Test
public void testBuiltInConfluentAvroDeserializer() throws ReflectiveOperationException {
    WorkUnitState mockWorkUnitState = getMockWorkUnitState();
    mockWorkUnitState.setProp(KafkaDeserializerExtractor.KAFKA_DESERIALIZER_TYPE, KafkaDeserializerExtractor.Deserializers.CONFLUENT_AVRO.name());
    KafkaDeserializerExtractor kafkaDecoderExtractor = new KafkaDeserializerExtractor(mockWorkUnitState) {

        @Override
        public Object getSchema() {
            return SchemaBuilder.record(TEST_RECORD_NAME).namespace(TEST_NAMESPACE).fields().name(TEST_FIELD_NAME).type().stringType().noDefault().endRecord();
        }
    };
    Assert.assertEquals(kafkaDecoderExtractor.getKafkaDeserializer().getClass(), KafkaDeserializerExtractor.Deserializers.CONFLUENT_AVRO.getDeserializerClass());
    Assert.assertEquals(kafkaDecoderExtractor.getKafkaSchemaRegistry().getClass(), KafkaDeserializerExtractor.Deserializers.CONFLUENT_AVRO.getSchemaRegistryClass());
}
Also used : WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) Test(org.testng.annotations.Test)

Example 27 with WorkUnitState

use of org.apache.gobblin.configuration.WorkUnitState in project incubator-gobblin by apache.

the class KafkaDeserializerExtractorTest method testConfluentJsonDeserializer.

@Test
public void testConfluentJsonDeserializer() throws IOException {
    WorkUnitState mockWorkUnitState = getMockWorkUnitState();
    mockWorkUnitState.setProp("json.value.type", KafkaRecord.class.getName());
    KafkaRecord testKafkaRecord = new KafkaRecord("Hello World");
    Serializer<KafkaRecord> kafkaEncoder = new KafkaJsonSerializer<>();
    kafkaEncoder.configure(PropertiesUtils.propsToStringKeyMap(mockWorkUnitState.getProperties()), false);
    Deserializer<KafkaRecord> kafkaDecoder = new KafkaJsonDeserializer<>();
    kafkaDecoder.configure(PropertiesUtils.propsToStringKeyMap(mockWorkUnitState.getProperties()), false);
    ByteBuffer testKafkaRecordByteBuffer = ByteBuffer.wrap(kafkaEncoder.serialize(TEST_TOPIC_NAME, testKafkaRecord));
    KafkaSchemaRegistry<?, ?> mockKafkaSchemaRegistry = mock(KafkaSchemaRegistry.class);
    KafkaDeserializerExtractor kafkaDecoderExtractor = new KafkaDeserializerExtractor(mockWorkUnitState, Optional.fromNullable(Deserializers.CONFLUENT_JSON), kafkaDecoder, mockKafkaSchemaRegistry);
    ByteArrayBasedKafkaRecord mockMessageAndOffset = getMockMessageAndOffset(testKafkaRecordByteBuffer);
    Assert.assertEquals(kafkaDecoderExtractor.decodeRecord(mockMessageAndOffset), testKafkaRecord);
}
Also used : KafkaJsonDeserializer(io.confluent.kafka.serializers.KafkaJsonDeserializer) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) ByteArrayBasedKafkaRecord(org.apache.gobblin.kafka.client.ByteArrayBasedKafkaRecord) KafkaJsonSerializer(io.confluent.kafka.serializers.KafkaJsonSerializer) ByteBuffer(java.nio.ByteBuffer) ByteArrayBasedKafkaRecord(org.apache.gobblin.kafka.client.ByteArrayBasedKafkaRecord) Test(org.testng.annotations.Test)

Example 28 with WorkUnitState

use of org.apache.gobblin.configuration.WorkUnitState in project incubator-gobblin by apache.

the class Kafka09JsonIntegrationTest method testHappyPath.

@Test
public void testHappyPath() throws IOException, DataRecordException {
    String topic = "testKafka09JsonSource";
    kafkaTestHelper.provisionTopic(topic);
    SourceState state = createSourceState(topic);
    // Produce a record
    state.setProp(KAFKA_PRODUCER_CONFIG_PREFIX + "bootstrap.servers", "localhost:" + kafkaTestHelper.getKafkaServerPort());
    state.setProp(KAFKA_TOPIC, topic);
    Destination destination = Destination.of(Destination.DestinationType.KAFKA, state);
    Kafka09JsonObjectWriterBuilder writerBuilder = new Kafka09JsonObjectWriterBuilder();
    writerBuilder.writeTo(destination);
    DataWriter<JsonObject> writer = writerBuilder.build();
    final String json = "{\"number\":27}";
    JsonObject record = gson.fromJson(json, JsonObject.class);
    writer.write(record);
    writer.flush();
    writer.close();
    Kafka09JsonSource source = new Kafka09JsonSource();
    List<WorkUnit> workUnitList = source.getWorkunits(state);
    // Test the right value serializer is set
    Assert.assertEquals(state.getProp(Kafka09ConsumerClient.GOBBLIN_CONFIG_VALUE_DESERIALIZER_CLASS_KEY), Kafka09JsonSource.KafkaGsonDeserializer.class.getName());
    // Test there is only one non-empty work unit
    MultiWorkUnitUnpackingIterator iterator = new MultiWorkUnitUnpackingIterator(workUnitList.iterator());
    Assert.assertTrue(iterator.hasNext());
    WorkUnit workUnit = iterator.next();
    Assert.assertEquals(workUnit.getProp(ConfigurationKeys.EXTRACT_TABLE_NAME_KEY), topic);
    Assert.assertFalse(iterator.hasNext());
    // Test extractor
    WorkUnitState workUnitState = new WorkUnitState(workUnit, state);
    final String jsonSchema = "[{\"columnName\":\"number\",\"comment\":\"\",\"isNullable\":\"false\",\"dataType\":{\"type\":\"int\"}}]";
    workUnitState.setProp("source.kafka.json.schema", jsonSchema);
    Extractor<JsonArray, JsonObject> extractor = source.getExtractor(workUnitState);
    Assert.assertEquals(extractor.getSchema().toString(), jsonSchema);
    Assert.assertEquals(extractor.readRecord(null).toString(), json);
}
Also used : Destination(org.apache.gobblin.writer.Destination) SourceState(org.apache.gobblin.configuration.SourceState) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) JsonObject(com.google.gson.JsonObject) JsonArray(com.google.gson.JsonArray) Kafka09JsonSource(org.apache.gobblin.source.extractor.extract.kafka.Kafka09JsonSource) MultiWorkUnitUnpackingIterator(org.apache.gobblin.runtime.util.MultiWorkUnitUnpackingIterator) WorkUnit(org.apache.gobblin.source.workunit.WorkUnit) Kafka09JsonObjectWriterBuilder(org.apache.gobblin.kafka.writer.Kafka09JsonObjectWriterBuilder) Test(org.testng.annotations.Test)

Example 29 with WorkUnitState

use of org.apache.gobblin.configuration.WorkUnitState in project incubator-gobblin by apache.

the class GoogleWebmasterDataFetcherImplTest method testGetAllPagesWhenDataSizeLessThan5000AndRequestAll.

@Test
public void testGetAllPagesWhenDataSizeLessThan5000AndRequestAll() throws Exception {
    GoogleWebmasterClient client = Mockito.mock(GoogleWebmasterClient.class);
    List<String> allPages = new ArrayList<>();
    for (int i = 0; i < 10; ++i) {
        allPages.add(Integer.toString(i));
    }
    Mockito.when(client.getPages(eq(_property), any(String.class), any(String.class), eq("ALL"), any(Integer.class), any(List.class), any(List.class), eq(0))).thenReturn(allPages);
    WorkUnitState workUnitState = new WorkUnitState();
    workUnitState.setProp(GoogleWebMasterSource.KEY_PROPERTY, _property);
    GoogleWebmasterDataFetcher dataFetcher = new GoogleWebmasterDataFetcherImpl(_property, client, workUnitState);
    Collection<ProducerJob> response = dataFetcher.getAllPages(null, null, "ALL", 5000);
    List<String> pageStrings = new ArrayList<>();
    for (ProducerJob page : response) {
        pageStrings.add(page.getPage());
    }
    Assert.assertTrue(CollectionUtils.isEqualCollection(pageStrings, allPages));
    Mockito.verify(client, Mockito.times(2)).getPages(eq(_property), any(String.class), any(String.class), eq("ALL"), any(Integer.class), any(List.class), any(List.class), eq(0));
}
Also used : WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) ArrayList(java.util.ArrayList) List(java.util.List) ArrayList(java.util.ArrayList) Test(org.testng.annotations.Test)

Example 30 with WorkUnitState

use of org.apache.gobblin.configuration.WorkUnitState in project incubator-gobblin by apache.

the class GoogleWebmasterExtractorIteratorTest method testIterator.

/**
 * Test the GoogleWebmasterExtractorIterator to make sure that it first gets all pages based on the filters
 * and then for each page, it asks for the queries.
 * @throws IOException
 */
@Test
public void testIterator() throws IOException {
    GoogleWebmasterDataFetcher client = Mockito.mock(GoogleWebmasterDataFetcher.class);
    String country = "USA";
    String date = "2016-11-01";
    ArrayList<GoogleWebmasterFilter.Dimension> requestedDimensions = new ArrayList<>();
    ArrayList<GoogleWebmasterDataFetcher.Metric> requestedMetrics = new ArrayList<>();
    ArrayDeque<ProducerJob> allJobs = new ArrayDeque<>();
    String page1 = siteProperty + "a/1";
    String page2 = siteProperty + "b/1";
    allJobs.add(new SimpleProducerJob(page1, date, date));
    allJobs.add(new SimpleProducerJob(page2, date, date));
    Mockito.when(client.getAllPages(eq(date), eq(date), eq(country), eq(GoogleWebmasterClient.API_ROW_LIMIT))).thenReturn(allJobs);
    // Set performSearchAnalyticsQuery Mock1
    String[] a1 = { "r1-c1", "r1-c2" };
    List<String[]> results1 = new ArrayList<>();
    results1.add(a1);
    List<ApiDimensionFilter> filters1 = new ArrayList<>();
    filters1.add(GoogleWebmasterFilter.countryEqFilter(country));
    filters1.add(GoogleWebmasterFilter.pageFilter(GoogleWebmasterFilter.FilterOperator.EQUALS, page1));
    Mockito.when(client.performSearchAnalyticsQuery(eq(date), eq(date), eq(GoogleWebmasterClient.API_ROW_LIMIT), eq(requestedDimensions), eq(requestedMetrics), argThat(new CollectionEquals(filters1)))).thenReturn(results1);
    // Set performSearchAnalyticsQuery Mock2
    String[] a2 = { "r2-c1", "r2-c2" };
    List<String[]> results2 = new ArrayList<>();
    results2.add(a2);
    List<ApiDimensionFilter> filters2 = new ArrayList<>();
    filters2.add(GoogleWebmasterFilter.countryEqFilter(country));
    filters2.add(GoogleWebmasterFilter.pageFilter(GoogleWebmasterFilter.FilterOperator.EQUALS, page2));
    Mockito.when(client.performSearchAnalyticsQuery(eq(date), eq(date), eq(5000), eq(requestedDimensions), eq(requestedMetrics), argThat(new CollectionEquals(filters2)))).thenReturn(results2);
    Map<GoogleWebmasterFilter.Dimension, ApiDimensionFilter> map = new HashMap<>();
    map.put(GoogleWebmasterFilter.Dimension.COUNTRY, GoogleWebmasterFilter.countryEqFilter(country));
    WorkUnitState defaultState = GoogleWebmasterExtractorTest.getWorkUnitState1();
    defaultState.setProp(GoogleWebMasterSource.KEY_QUERIES_TUNING_BATCH_SIZE, 1);
    GoogleWebmasterExtractorIterator iterator = new GoogleWebmasterExtractorIterator(client, date, date, requestedDimensions, requestedMetrics, map, defaultState);
    List<String[]> response = new ArrayList<>();
    response.add(iterator.next());
    response.add(iterator.next());
    Assert.assertTrue(!iterator.hasNext());
    Assert.assertTrue(response.contains(a1));
    Assert.assertTrue(response.contains(a2));
    Mockito.verify(client, Mockito.times(1)).getAllPages(eq(date), eq(date), eq(country), eq(5000));
    Mockito.verify(client, Mockito.times(1)).performSearchAnalyticsQuery(eq(date), eq(date), eq(5000), eq(requestedDimensions), eq(requestedMetrics), argThat(new CollectionEquals(filters1)));
    Mockito.verify(client, Mockito.times(1)).performSearchAnalyticsQuery(eq(date), eq(date), eq(5000), eq(requestedDimensions), eq(requestedMetrics), argThat(new CollectionEquals(filters2)));
}
Also used : ApiDimensionFilter(com.google.api.services.webmasters.model.ApiDimensionFilter) HashMap(java.util.HashMap) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) ArrayList(java.util.ArrayList) ArrayDeque(java.util.ArrayDeque) Test(org.testng.annotations.Test)

Aggregations

WorkUnitState (org.apache.gobblin.configuration.WorkUnitState)222 Test (org.testng.annotations.Test)143 State (org.apache.gobblin.configuration.State)48 SourceState (org.apache.gobblin.configuration.SourceState)39 WorkUnit (org.apache.gobblin.source.workunit.WorkUnit)39 Schema (org.apache.avro.Schema)29 Path (org.apache.hadoop.fs.Path)26 GenericRecord (org.apache.avro.generic.GenericRecord)19 JsonObject (com.google.gson.JsonObject)17 ArrayList (java.util.ArrayList)16 File (java.io.File)14 TaskState (org.apache.hadoop.mapreduce.v2.api.records.TaskState)12 List (java.util.List)11 Configuration (org.apache.hadoop.conf.Configuration)11 IOException (java.io.IOException)10 LongWatermark (org.apache.gobblin.source.extractor.extract.LongWatermark)10 Extract (org.apache.gobblin.source.workunit.Extract)10 FileSystem (org.apache.hadoop.fs.FileSystem)10 Closer (com.google.common.io.Closer)8 JsonParser (com.google.gson.JsonParser)8