Search in sources :

Example 1 with MultiWorkUnitUnpackingIterator

use of org.apache.gobblin.runtime.util.MultiWorkUnitUnpackingIterator in project incubator-gobblin by apache.

the class Kafka09JsonIntegrationTest method testHappyPath.

@Test
public void testHappyPath() throws IOException, DataRecordException {
    String topic = "testKafka09JsonSource";
    kafkaTestHelper.provisionTopic(topic);
    SourceState state = createSourceState(topic);
    // Produce a record
    state.setProp(KAFKA_PRODUCER_CONFIG_PREFIX + "bootstrap.servers", "localhost:" + kafkaTestHelper.getKafkaServerPort());
    state.setProp(KAFKA_TOPIC, topic);
    Destination destination = Destination.of(Destination.DestinationType.KAFKA, state);
    Kafka09JsonObjectWriterBuilder writerBuilder = new Kafka09JsonObjectWriterBuilder();
    writerBuilder.writeTo(destination);
    DataWriter<JsonObject> writer = writerBuilder.build();
    final String json = "{\"number\":27}";
    JsonObject record = gson.fromJson(json, JsonObject.class);
    writer.write(record);
    writer.flush();
    writer.close();
    Kafka09JsonSource source = new Kafka09JsonSource();
    List<WorkUnit> workUnitList = source.getWorkunits(state);
    // Test the right value serializer is set
    Assert.assertEquals(state.getProp(Kafka09ConsumerClient.GOBBLIN_CONFIG_VALUE_DESERIALIZER_CLASS_KEY), Kafka09JsonSource.KafkaGsonDeserializer.class.getName());
    // Test there is only one non-empty work unit
    MultiWorkUnitUnpackingIterator iterator = new MultiWorkUnitUnpackingIterator(workUnitList.iterator());
    Assert.assertTrue(iterator.hasNext());
    WorkUnit workUnit = iterator.next();
    Assert.assertEquals(workUnit.getProp(ConfigurationKeys.EXTRACT_TABLE_NAME_KEY), topic);
    Assert.assertFalse(iterator.hasNext());
    // Test extractor
    WorkUnitState workUnitState = new WorkUnitState(workUnit, state);
    final String jsonSchema = "[{\"columnName\":\"number\",\"comment\":\"\",\"isNullable\":\"false\",\"dataType\":{\"type\":\"int\"}}]";
    workUnitState.setProp("source.kafka.json.schema", jsonSchema);
    Extractor<JsonArray, JsonObject> extractor = source.getExtractor(workUnitState);
    Assert.assertEquals(extractor.getSchema().toString(), jsonSchema);
    Assert.assertEquals(extractor.readRecord(null).toString(), json);
}
Also used : Destination(org.apache.gobblin.writer.Destination) SourceState(org.apache.gobblin.configuration.SourceState) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) JsonObject(com.google.gson.JsonObject) JsonArray(com.google.gson.JsonArray) Kafka09JsonSource(org.apache.gobblin.source.extractor.extract.kafka.Kafka09JsonSource) MultiWorkUnitUnpackingIterator(org.apache.gobblin.runtime.util.MultiWorkUnitUnpackingIterator) WorkUnit(org.apache.gobblin.source.workunit.WorkUnit) Kafka09JsonObjectWriterBuilder(org.apache.gobblin.kafka.writer.Kafka09JsonObjectWriterBuilder) Test(org.testng.annotations.Test)

Example 2 with MultiWorkUnitUnpackingIterator

use of org.apache.gobblin.runtime.util.MultiWorkUnitUnpackingIterator in project incubator-gobblin by apache.

the class LocalJobLauncher method runWorkUnitStream.

@Override
protected void runWorkUnitStream(WorkUnitStream workUnitStream) throws Exception {
    String jobId = this.jobContext.getJobId();
    final JobState jobState = this.jobContext.getJobState();
    Iterator<WorkUnit> workUnitIterator = workUnitStream.getWorkUnits();
    if (!workUnitIterator.hasNext()) {
        LOG.warn("No work units to run");
        return;
    }
    TimingEvent workUnitsRunTimer = this.eventSubmitter.getTimingEvent(TimingEvent.RunJobTimings.WORK_UNITS_RUN);
    Iterator<WorkUnit> flattenedWorkUnits = new MultiWorkUnitUnpackingIterator(workUnitStream.getWorkUnits());
    Iterator<WorkUnit> workUnitsWithJobState = Iterators.transform(flattenedWorkUnits, new Function<WorkUnit, WorkUnit>() {

        @Override
        public WorkUnit apply(WorkUnit workUnit) {
            workUnit.addAllIfNotExist(jobState);
            return workUnit;
        }
    });
    GobblinMultiTaskAttempt.runWorkUnits(this.jobContext, workUnitsWithJobState, this.taskStateTracker, this.taskExecutor, GobblinMultiTaskAttempt.CommitPolicy.IMMEDIATE);
    if (this.cancellationRequested) {
        // Wait for the cancellation execution if it has been requested
        synchronized (this.cancellationExecution) {
            if (this.cancellationExecuted) {
                return;
            }
        }
    }
    workUnitsRunTimer.stop();
    LOG.info(String.format("All tasks of job %s have completed", jobId));
    if (jobState.getState() == JobState.RunningState.RUNNING) {
        jobState.setState(JobState.RunningState.SUCCESSFUL);
    }
}
Also used : MultiWorkUnitUnpackingIterator(org.apache.gobblin.runtime.util.MultiWorkUnitUnpackingIterator) JobState(org.apache.gobblin.runtime.JobState) WorkUnit(org.apache.gobblin.source.workunit.WorkUnit) TimingEvent(org.apache.gobblin.metrics.event.TimingEvent)

Aggregations

MultiWorkUnitUnpackingIterator (org.apache.gobblin.runtime.util.MultiWorkUnitUnpackingIterator)2 WorkUnit (org.apache.gobblin.source.workunit.WorkUnit)2 JsonArray (com.google.gson.JsonArray)1 JsonObject (com.google.gson.JsonObject)1 SourceState (org.apache.gobblin.configuration.SourceState)1 WorkUnitState (org.apache.gobblin.configuration.WorkUnitState)1 Kafka09JsonObjectWriterBuilder (org.apache.gobblin.kafka.writer.Kafka09JsonObjectWriterBuilder)1 TimingEvent (org.apache.gobblin.metrics.event.TimingEvent)1 JobState (org.apache.gobblin.runtime.JobState)1 Kafka09JsonSource (org.apache.gobblin.source.extractor.extract.kafka.Kafka09JsonSource)1 Destination (org.apache.gobblin.writer.Destination)1 Test (org.testng.annotations.Test)1