use of org.apache.gobblin.source.workunit.MultiWorkUnit in project incubator-gobblin by apache.
the class GobblinHelixJobLauncher method persistWorkUnit.
* Persist a single {@link WorkUnit} (flattened) to a file.
private String persistWorkUnit(final Path workUnitFileDir, final WorkUnit workUnit, ParallelRunner stateSerDeRunner) throws IOException {
final StateStore stateStore;
String workUnitFileName = workUnit.getId();
if (workUnit instanceof MultiWorkUnit) {
stateStore = stateStores.getMwuStateStore();
} else {
stateStore = stateStores.getWuStateStore();
Path workUnitFile = new Path(workUnitFileDir, workUnitFileName);
final String fileName = workUnitFile.getName();
final String storeName = workUnitFile.getParent().getName();
stateSerDeRunner.submitCallable(new Callable<Void>() {
public Void call() throws Exception {
stateStore.put(storeName, fileName, workUnit);
return null;
}, "Serialize state to store " + storeName + " file " + fileName);
return workUnitFile.toString();
use of org.apache.gobblin.source.workunit.MultiWorkUnit in project incubator-gobblin by apache.
the class GobblinOutputCommitter method abortJob.
public void abortJob(JobContext jobContext, JobStatus.State state) throws IOException {"Aborting Job: " + jobContext.getJobID() + " with state: " + state);
Configuration conf = jobContext.getConfiguration();
URI fsUri = URI.create(conf.get(ConfigurationKeys.FS_URI_KEY, ConfigurationKeys.LOCAL_FS_URI));
FileSystem fs = FileSystem.get(fsUri, conf);
Path mrJobDir = new Path(conf.get(ConfigurationKeys.MR_JOB_ROOT_DIR_KEY), conf.get(ConfigurationKeys.JOB_NAME_KEY));
Path jobInputDir = new Path(mrJobDir, MRJobLauncher.INPUT_DIR_NAME);
if (!fs.exists(jobInputDir) || !fs.isDirectory(jobInputDir)) {
LOG.warn(String.format("%s either does not exist or is not a directory. No data to cleanup.", jobInputDir));
// Iterate through all files in the jobInputDir, each file should correspond to a serialized wu or mwu
try {
for (FileStatus status : fs.listStatus(jobInputDir, new WorkUnitFilter())) {
Closer workUnitFileCloser = Closer.create();
// If the file ends with ".wu" de-serialize it into a WorkUnit
if (status.getPath().getName().endsWith(AbstractJobLauncher.WORK_UNIT_FILE_EXTENSION)) {
WorkUnit wu = WorkUnit.createEmpty();
try {
wu.readFields(workUnitFileCloser.register(new DataInputStream(;
} finally {
JobLauncherUtils.cleanTaskStagingData(new WorkUnitState(wu), LOG);
// If the file ends with ".mwu" de-serialize it into a MultiWorkUnit
if (status.getPath().getName().endsWith(AbstractJobLauncher.MULTI_WORK_UNIT_FILE_EXTENSION)) {
MultiWorkUnit mwu = MultiWorkUnit.createEmpty();
try {
mwu.readFields(workUnitFileCloser.register(new DataInputStream(;
} finally {
for (WorkUnit wu : mwu.getWorkUnits()) {
JobLauncherUtils.cleanTaskStagingData(new WorkUnitState(wu), LOG);
} finally {
try {
cleanUpWorkingDirectory(mrJobDir, fs);
} finally {
super.abortJob(jobContext, state);
use of org.apache.gobblin.source.workunit.MultiWorkUnit in project incubator-gobblin by apache.
the class MRJobLauncher method prepareJobInput.
* Prepare the job input.
* @throws IOException
private void prepareJobInput(List<WorkUnit> workUnits) throws IOException {
Closer closer = Closer.create();
try {
ParallelRunner parallelRunner = closer.register(new ParallelRunner(this.parallelRunnerThreads, this.fs));
int multiTaskIdSequence = 0;
// Serialize each work unit into a file named after the task ID
for (WorkUnit workUnit : workUnits) {
String workUnitFileName;
if (workUnit instanceof MultiWorkUnit) {
workUnitFileName = JobLauncherUtils.newMultiTaskId(this.jobContext.getJobId(), multiTaskIdSequence++) + MULTI_WORK_UNIT_FILE_EXTENSION;
} else {
workUnitFileName = workUnit.getProp(ConfigurationKeys.TASK_ID_KEY) + WORK_UNIT_FILE_EXTENSION;
Path workUnitFile = new Path(this.jobInputPath, workUnitFileName);
LOG.debug("Writing work unit file " + workUnitFileName);
parallelRunner.serializeToFile(workUnit, workUnitFile);
// Append the work unit file path to the job input file
} catch (Throwable t) {
throw closer.rethrow(t);
} finally {