Search in sources :

Example 46 with WorkUnit

use of org.apache.gobblin.source.workunit.WorkUnit in project incubator-gobblin by apache.

the class GobblinOutputCommitterTest method createAndSetWorkUnit.

/**
 * Helper method to create a {@link WorkUnit}, set it's staging directories, and create the staging directories on the
 * local fs
 * @param workUnitName is the name of the {@link WorkUnit} to create
 * @return the {@link WorkUnit} that was created
 * @throws IOException
 */
private WorkUnit createAndSetWorkUnit(String workUnitName) throws IOException {
    WorkUnit wu = WorkUnit.createEmpty();
    wu.setProp(ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.TASK_ID_KEY, 1, 0), System.nanoTime());
    Path wuStagingDir = new Path(OUTPUT_PATH, JOB_NAME + Path.SEPARATOR + workUnitName + Path.SEPARATOR + STAGING_DIR_NAME);
    wu.setProp(ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.WRITER_STAGING_DIR, 1, 0), wuStagingDir.toString());
    this.fs.mkdirs(wuStagingDir);
    this.stagingDirs.add(wuStagingDir);
    Path wuOutputDir = new Path(OUTPUT_PATH, JOB_NAME + Path.SEPARATOR + workUnitName + Path.SEPARATOR + OUTPUT_DIR_NAME);
    wu.setProp(ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.WRITER_OUTPUT_DIR, 1, 0), wuOutputDir.toString());
    this.fs.mkdirs(wuOutputDir);
    this.stagingDirs.add(wuOutputDir);
    return wu;
}
Also used : Path(org.apache.hadoop.fs.Path) MultiWorkUnit(org.apache.gobblin.source.workunit.MultiWorkUnit) WorkUnit(org.apache.gobblin.source.workunit.WorkUnit)

Example 47 with WorkUnit

use of org.apache.gobblin.source.workunit.WorkUnit in project incubator-gobblin by apache.

the class MultiWorkUnitUnpackingIteratorTest method createWorkUnit.

private WorkUnit createWorkUnit(String... names) {
    if (names.length == 1) {
        WorkUnit workUnit = new WorkUnit();
        workUnit.setProp(WORK_UNIT_NAME, names[0]);
        return workUnit;
    }
    MultiWorkUnit mwu = new MultiWorkUnit();
    for (String name : names) {
        mwu.addWorkUnit(createWorkUnit(name));
    }
    return mwu;
}
Also used : MultiWorkUnit(org.apache.gobblin.source.workunit.MultiWorkUnit) MultiWorkUnit(org.apache.gobblin.source.workunit.MultiWorkUnit) WorkUnit(org.apache.gobblin.source.workunit.WorkUnit)

Example 48 with WorkUnit

use of org.apache.gobblin.source.workunit.WorkUnit in project incubator-gobblin by apache.

the class TaskMetricsTest method testTaskGetMetrics.

@Test
public void testTaskGetMetrics() {
    String jobId = "job_456";
    String taskId = "task_456";
    String jobName = "jobName";
    JobState jobState = new JobState(jobName, jobId);
    JobMetrics jobMetrics = JobMetrics.get(jobState);
    State props = new State();
    props.setProp(ConfigurationKeys.JOB_ID_KEY, jobId);
    props.setProp(ConfigurationKeys.TASK_ID_KEY, taskId);
    SourceState sourceState = new SourceState(props, new ArrayList<WorkUnitState>());
    WorkUnit workUnit = new WorkUnit(sourceState, null);
    WorkUnitState workUnitState = new WorkUnitState(workUnit);
    TaskState taskState = new TaskState(workUnitState);
    TaskMetrics taskMetrics = new TaskMetrics(taskState);
    Assert.assertNotNull(taskMetrics.getMetricContext());
    Assert.assertTrue(taskMetrics.getMetricContext().getParent().isPresent());
    Assert.assertEquals(taskMetrics.getMetricContext().getParent().get(), jobMetrics.getMetricContext());
}
Also used : SourceState(org.apache.gobblin.configuration.SourceState) JobState(org.apache.gobblin.runtime.JobState) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) SourceState(org.apache.gobblin.configuration.SourceState) State(org.apache.gobblin.configuration.State) TaskState(org.apache.gobblin.runtime.TaskState) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) JobState(org.apache.gobblin.runtime.JobState) WorkUnit(org.apache.gobblin.source.workunit.WorkUnit) TaskState(org.apache.gobblin.runtime.TaskState) Test(org.testng.annotations.Test)

Example 49 with WorkUnit

use of org.apache.gobblin.source.workunit.WorkUnit in project incubator-gobblin by apache.

the class TestSource method getWorkunits.

@Override
public List<WorkUnit> getWorkunits(SourceState state) {
    String nameSpace = state.getProp(ConfigurationKeys.EXTRACT_NAMESPACE_NAME_KEY);
    Extract extract1 = createExtract(TableType.SNAPSHOT_ONLY, nameSpace, "TestTable1");
    Extract extract2 = createExtract(TableType.SNAPSHOT_ONLY, nameSpace, "TestTable2");
    String sourceFileList = state.getProp(SOURCE_FILE_LIST_KEY);
    List<String> list = SPLITTER.splitToList(sourceFileList);
    List<WorkUnit> workUnits = Lists.newArrayList();
    for (int i = 0; i < list.size(); i++) {
        WorkUnit workUnit = WorkUnit.create(i % 2 == 0 ? extract1 : extract2);
        workUnit.setProp(SOURCE_FILE_KEY, list.get(i));
        workUnits.add(workUnit);
    }
    if (state.getPropAsBoolean("use.multiworkunit", false)) {
        MultiWorkUnit multiWorkUnit = MultiWorkUnit.createEmpty();
        multiWorkUnit.addWorkUnits(workUnits);
        workUnits.clear();
        workUnits.add(multiWorkUnit);
    }
    return workUnits;
}
Also used : MultiWorkUnit(org.apache.gobblin.source.workunit.MultiWorkUnit) Extract(org.apache.gobblin.source.workunit.Extract) MultiWorkUnit(org.apache.gobblin.source.workunit.MultiWorkUnit) WorkUnit(org.apache.gobblin.source.workunit.WorkUnit)

Example 50 with WorkUnit

use of org.apache.gobblin.source.workunit.WorkUnit in project incubator-gobblin by apache.

the class TestSkipWorkUnitsSource method getWorkunits.

public List<WorkUnit> getWorkunits(SourceState state) {
    List<WorkUnit> workUnits = new ArrayList<>();
    if (state.contains(TEST_WORKUNIT_PERSISTENCE)) {
        testSkipWorkUnitPersistence(state);
        return workUnits;
    }
    for (int i = 0; i < NUMBER_OF_WORKUNITS; i++) {
        WorkUnit workUnit = WorkUnit.createEmpty();
        if (i < NUMBER_OF_SKIP_WORKUNITS) {
            workUnit.skip();
        }
        workUnits.add(workUnit);
    }
    return workUnits;
}
Also used : ArrayList(java.util.ArrayList) WorkUnit(org.apache.gobblin.source.workunit.WorkUnit)

Aggregations

WorkUnit (org.apache.gobblin.source.workunit.WorkUnit)133 Test (org.testng.annotations.Test)59 SourceState (org.apache.gobblin.configuration.SourceState)40 WorkUnitState (org.apache.gobblin.configuration.WorkUnitState)40 MultiWorkUnit (org.apache.gobblin.source.workunit.MultiWorkUnit)35 Extract (org.apache.gobblin.source.workunit.Extract)24 Path (org.apache.hadoop.fs.Path)19 State (org.apache.gobblin.configuration.State)13 IOException (java.io.IOException)11 ArrayList (java.util.ArrayList)10 Closer (com.google.common.io.Closer)9 Properties (java.util.Properties)9 WatermarkInterval (org.apache.gobblin.source.extractor.WatermarkInterval)8 List (java.util.List)7 Table (org.apache.hadoop.hive.ql.metadata.Table)7 ImmutableMap (com.google.common.collect.ImmutableMap)6 Config (com.typesafe.config.Config)6 File (java.io.File)6 IterableDatasetFinder (org.apache.gobblin.dataset.IterableDatasetFinder)6 WorkUnitStream (org.apache.gobblin.source.workunit.WorkUnitStream)6