Search in sources :

Example 41 with WorkUnit

use of org.apache.gobblin.source.workunit.WorkUnit in project incubator-gobblin by apache.

the class GoogleWebMasterSource method getExtractor.

@Override
public Extractor<String, String[]> getExtractor(WorkUnitState state) throws IOException {
    List<GoogleWebmasterFilter.Dimension> requestedDimensions = getRequestedDimensions(state);
    List<GoogleWebmasterDataFetcher.Metric> requestedMetrics = getRequestedMetrics(state);
    WorkUnit workunit = state.getWorkunit();
    String schema = workunit.getProp(ConfigurationKeys.SOURCE_SCHEMA);
    JsonArray schemaJson = new JsonParser().parse(schema).getAsJsonArray();
    Map<String, Integer> columnPositionMap = new HashMap<>();
    for (int i = 0; i < schemaJson.size(); ++i) {
        JsonElement jsonElement = schemaJson.get(i);
        String columnName = jsonElement.getAsJsonObject().get("columnName").getAsString().toUpperCase();
        columnPositionMap.put(columnName, i);
    }
    if (workunit.getPropAsBoolean(GoogleWebMasterSource.KEY_INCLUDE_SOURCE_PROPERTY, DEFAULT_INCLUDE_SOURCE_PROPERTY)) {
        String columnName = workunit.getProp(KEY_SOURCE_PROPERTY_COLUMN_NAME, DEFAULT_SOURCE_PROPERTY_COLUMN_NAME);
        schemaJson.add(SchemaUtil.createColumnJson(columnName, false, JsonElementConversionFactory.Type.STRING));
    }
    validateFilters(state.getProp(GoogleWebMasterSource.KEY_REQUEST_FILTERS));
    validateRequests(columnPositionMap, requestedDimensions, requestedMetrics);
    return createExtractor(state, columnPositionMap, requestedDimensions, requestedMetrics, schemaJson);
}
Also used : HashMap(java.util.HashMap) JsonArray(com.google.gson.JsonArray) JsonElement(com.google.gson.JsonElement) WorkUnit(org.apache.gobblin.source.workunit.WorkUnit) JsonParser(com.google.gson.JsonParser)

Example 42 with WorkUnit

use of org.apache.gobblin.source.workunit.WorkUnit in project incubator-gobblin by apache.

the class GobblinMultiTaskAttempt method runWorkUnits.

/**
 * Run a given list of {@link WorkUnit}s of a job.
 *
 * <p>
 *   This method assumes that the given list of {@link WorkUnit}s have already been flattened and
 *   each {@link WorkUnit} contains the task ID in the property {@link ConfigurationKeys#TASK_ID_KEY}.
 * </p>
 *
 * @param countDownLatch a {@link java.util.concurrent.CountDownLatch} waited on for job completion
 * @return a list of {@link Task}s from the {@link WorkUnit}s
 */
private List<Task> runWorkUnits(CountUpAndDownLatch countDownLatch) {
    List<Task> tasks = Lists.newArrayList();
    while (this.workUnits.hasNext()) {
        WorkUnit workUnit = this.workUnits.next();
        String taskId = workUnit.getProp(ConfigurationKeys.TASK_ID_KEY);
        // skip tasks that executed successsfully in a prior attempt
        if (taskSuccessfulInPriorAttempt(taskId)) {
            continue;
        }
        countDownLatch.countUp();
        SubscopedBrokerBuilder<GobblinScopeTypes, ?> taskBrokerBuilder = this.jobBroker.newSubscopedBuilder(new TaskScopeInstance(taskId));
        WorkUnitState workUnitState = new WorkUnitState(workUnit, this.jobState, taskBrokerBuilder);
        workUnitState.setId(taskId);
        workUnitState.setProp(ConfigurationKeys.JOB_ID_KEY, this.jobId);
        workUnitState.setProp(ConfigurationKeys.TASK_ID_KEY, taskId);
        if (this.containerIdOptional.isPresent()) {
            workUnitState.setProp(ConfigurationKeys.TASK_ATTEMPT_ID_KEY, this.containerIdOptional.get());
        }
        // Create a new task from the work unit and submit the task to run
        Task task = createTaskRunnable(workUnitState, countDownLatch);
        this.taskStateTracker.registerNewTask(task);
        task.setTaskFuture(this.taskExecutor.submit(task));
        tasks.add(task);
    }
    new EventSubmitter.Builder(JobMetrics.get(this.jobId).getMetricContext(), "gobblin.runtime").build().submit(JobEvent.TASKS_SUBMITTED, "tasksCount", Long.toString(countDownLatch.getRegisteredParties()));
    return tasks;
}
Also used : GobblinScopeTypes(org.apache.gobblin.broker.gobblin_scopes.GobblinScopeTypes) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) SubscopedBrokerBuilder(org.apache.gobblin.broker.iface.SubscopedBrokerBuilder) WorkUnit(org.apache.gobblin.source.workunit.WorkUnit) TaskScopeInstance(org.apache.gobblin.broker.gobblin_scopes.TaskScopeInstance)

Example 43 with WorkUnit

use of org.apache.gobblin.source.workunit.WorkUnit in project incubator-gobblin by apache.

the class LimiterStopEventTest method testGetLimiterStopMetadataCase0.

@Test
public void testGetLimiterStopMetadataCase0() throws InterruptedException {
    Properties properties = new Properties();
    String key1 = "topic";
    String key2 = "partition.id";
    String key3 = "others";
    String keyList = Joiner.on(',').join(key1, key2);
    properties.setProperty(LimiterConfigurationKeys.LIMITER_REPORT_KEY_LIST, keyList);
    properties.setProperty(key1, "1111");
    properties.setProperty(key2, "1111");
    Extractor extractor = mock(Extractor.class);
    Limiter limiter = mock(Limiter.class);
    TaskState taskState = mock(TaskState.class);
    WorkUnit workUnit = mock(WorkUnit.class);
    Mockito.when(taskState.getWorkunit()).thenReturn(workUnit);
    Mockito.when(taskState.getJobId()).thenReturn("123");
    Mockito.when(taskState.getTaskAttemptId()).thenReturn(Optional.of("555"));
    Mockito.when(taskState.getTaskId()).thenReturn("888");
    Mockito.when(limiter.acquirePermits(1)).thenReturn(null);
    Mockito.when(taskState.getProp(ConfigurationKeys.DATASET_URN_KEY, ConfigurationKeys.DEFAULT_DATASET_URN)).thenReturn("file://xyz");
    Mockito.when(workUnit.getProperties()).thenReturn(properties);
    LimitingExtractorDecorator<String, String> decorator = new LimitingExtractorDecorator<>(extractor, limiter, taskState);
    try {
        Method method = LimitingExtractorDecorator.class.getDeclaredMethod("getLimiterStopMetadata");
        method.setAccessible(true);
        ImmutableMap<String, String> metaData = (ImmutableMap<String, String>) method.invoke(decorator);
        Assert.assertEquals(metaData.containsKey(key1), true);
        Assert.assertEquals(metaData.containsKey(key2), true);
        Assert.assertEquals(metaData.containsKey(key3), false);
    } catch (Exception e) {
        Assert.fail();
    }
}
Also used : Extractor(org.apache.gobblin.source.extractor.Extractor) WorkUnit(org.apache.gobblin.source.workunit.WorkUnit) Method(java.lang.reflect.Method) Properties(java.util.Properties) ImmutableMap(com.google.common.collect.ImmutableMap) Limiter(org.apache.gobblin.util.limiter.Limiter) Test(org.testng.annotations.Test)

Example 44 with WorkUnit

use of org.apache.gobblin.source.workunit.WorkUnit in project incubator-gobblin by apache.

the class LimiterStopEventTest method testGetLimiterStopMetadataCase3.

@Test
public void testGetLimiterStopMetadataCase3() throws InterruptedException {
    Properties properties = new Properties();
    String key1 = "topic";
    String key2 = "partition.id";
    String keyList = Joiner.on(',').join(key1, key2);
    String subKey1 = key2 + "....";
    String subKey2 = key2 + "##fjpaierbng;";
    String subKey3 = key2 + "x[n  sdf";
    String subKey4 = key2 + "";
    properties.setProperty(LimiterConfigurationKeys.LIMITER_REPORT_KEY_LIST, keyList);
    properties.setProperty(subKey1, "1111");
    properties.setProperty(subKey2, "1111");
    properties.setProperty(subKey3, "1111");
    properties.setProperty(subKey4, "1111");
    properties.setProperty(key1, "1111");
    properties.setProperty(key2, "1111");
    Extractor extractor = mock(Extractor.class);
    Limiter limiter = mock(Limiter.class);
    TaskState taskState = mock(TaskState.class);
    WorkUnit workUnit = mock(WorkUnit.class);
    Mockito.when(taskState.getWorkunit()).thenReturn(workUnit);
    Mockito.when(taskState.getJobId()).thenReturn("123");
    Mockito.when(taskState.getTaskAttemptId()).thenReturn(Optional.of("555"));
    Mockito.when(taskState.getTaskId()).thenReturn("888");
    Mockito.when(limiter.acquirePermits(1)).thenReturn(null);
    Mockito.when(taskState.getProp(ConfigurationKeys.DATASET_URN_KEY, ConfigurationKeys.DEFAULT_DATASET_URN)).thenReturn("file://xyz");
    Mockito.when(workUnit.getProperties()).thenReturn(properties);
    LimitingExtractorDecorator<String, String> decorator = new LimitingExtractorDecorator<>(extractor, limiter, taskState);
    try {
        Method method = LimitingExtractorDecorator.class.getDeclaredMethod("getLimiterStopMetadata");
        method.setAccessible(true);
        ImmutableMap<String, String> metaData = (ImmutableMap<String, String>) method.invoke(decorator);
        Assert.assertEquals(metaData.containsKey(key1), true);
        Assert.assertEquals(metaData.containsKey(key2), true);
        Assert.assertEquals(metaData.containsKey(subKey1), true);
        Assert.assertEquals(metaData.containsKey(subKey2), true);
        Assert.assertEquals(metaData.containsKey(subKey3), true);
        Assert.assertEquals(metaData.containsKey(subKey4), true);
    } catch (Exception e) {
        Assert.fail();
    }
}
Also used : Extractor(org.apache.gobblin.source.extractor.Extractor) WorkUnit(org.apache.gobblin.source.workunit.WorkUnit) Method(java.lang.reflect.Method) Properties(java.util.Properties) ImmutableMap(com.google.common.collect.ImmutableMap) Limiter(org.apache.gobblin.util.limiter.Limiter) Test(org.testng.annotations.Test)

Example 45 with WorkUnit

use of org.apache.gobblin.source.workunit.WorkUnit in project incubator-gobblin by apache.

the class TaskContextTest method setUp.

@BeforeClass
public void setUp() throws Exception {
    WorkUnit workUnit = WorkUnit.createEmpty();
    Properties properties = new Properties();
    properties.load(new StringReader(TEST_JOB_CONFIG));
    workUnit.addAll(properties);
    workUnit.setProp(ConfigurationKeys.JOB_ID_KEY, JobLauncherUtils.newJobId("GobblinTest1"));
    workUnit.setProp(ConfigurationKeys.TASK_ID_KEY, JobLauncherUtils.newTaskId(workUnit.getProp(ConfigurationKeys.JOB_ID_KEY), 0));
    this.taskContext = new TaskContext(new WorkUnitState(workUnit));
}
Also used : WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) StringReader(java.io.StringReader) WorkUnit(org.apache.gobblin.source.workunit.WorkUnit) Properties(java.util.Properties) BeforeClass(org.testng.annotations.BeforeClass)

Aggregations

WorkUnit (org.apache.gobblin.source.workunit.WorkUnit)133 Test (org.testng.annotations.Test)59 SourceState (org.apache.gobblin.configuration.SourceState)40 WorkUnitState (org.apache.gobblin.configuration.WorkUnitState)40 MultiWorkUnit (org.apache.gobblin.source.workunit.MultiWorkUnit)35 Extract (org.apache.gobblin.source.workunit.Extract)24 Path (org.apache.hadoop.fs.Path)19 State (org.apache.gobblin.configuration.State)13 IOException (java.io.IOException)11 ArrayList (java.util.ArrayList)10 Closer (com.google.common.io.Closer)9 Properties (java.util.Properties)9 WatermarkInterval (org.apache.gobblin.source.extractor.WatermarkInterval)8 List (java.util.List)7 Table (org.apache.hadoop.hive.ql.metadata.Table)7 ImmutableMap (com.google.common.collect.ImmutableMap)6 Config (com.typesafe.config.Config)6 File (java.io.File)6 IterableDatasetFinder (org.apache.gobblin.dataset.IterableDatasetFinder)6 WorkUnitStream (org.apache.gobblin.source.workunit.WorkUnitStream)6