use of org.apache.gobblin.util.ParallelRunner in project incubator-gobblin by apache.
the class AbstractJobLauncher method cleanupStagingDataPerTask.
private static void cleanupStagingDataPerTask(JobState jobState) {
Closer closer = Closer.create();
Map<String, ParallelRunner> parallelRunners = Maps.newHashMap();
try {
for (TaskState taskState : jobState.getTaskStates()) {
try {
JobLauncherUtils.cleanTaskStagingData(taskState, LOG, closer, parallelRunners);
} catch (IOException e) {
LOG.error(String.format("Failed to clean staging data for task %s: %s", taskState.getTaskId(), e), e);
}
}
} finally {
try {
closer.close();
} catch (IOException e) {
LOG.error("Failed to clean staging data", e);
}
}
}
use of org.apache.gobblin.util.ParallelRunner in project incubator-gobblin by apache.
the class AbstractJobLauncher method cleanLeftoverStagingData.
/**
* Cleanup the left-over staging data possibly from the previous run of the job that may have failed
* and not cleaned up its staging data.
*
* Property {@link ConfigurationKeys#CLEANUP_STAGING_DATA_PER_TASK} controls whether to cleanup
* staging data per task, or to cleanup entire job's staging data at once.
*
* Staging data will not be cleaned if the job has unfinished {@link CommitSequence}s.
*/
private void cleanLeftoverStagingData(WorkUnitStream workUnits, JobState jobState) throws JobException {
if (jobState.getPropAsBoolean(ConfigurationKeys.CLEANUP_STAGING_DATA_BY_INITIALIZER, false)) {
// Clean up will be done by initializer.
return;
}
try {
if (!canCleanStagingData(jobState)) {
LOG.error("Job " + jobState.getJobName() + " has unfinished commit sequences. Will not clean up staging data.");
return;
}
} catch (IOException e) {
throw new JobException("Failed to check unfinished commit sequences", e);
}
try {
if (this.jobContext.shouldCleanupStagingDataPerTask()) {
if (workUnits.isSafeToMaterialize()) {
Closer closer = Closer.create();
Map<String, ParallelRunner> parallelRunners = Maps.newHashMap();
try {
for (WorkUnit workUnit : JobLauncherUtils.flattenWorkUnits(workUnits.getMaterializedWorkUnitCollection())) {
JobLauncherUtils.cleanTaskStagingData(new WorkUnitState(workUnit, jobState), LOG, closer, parallelRunners);
}
} catch (Throwable t) {
throw closer.rethrow(t);
} finally {
closer.close();
}
} else {
throw new RuntimeException("Work unit streams do not support cleaning staging data per task.");
}
} else {
if (jobState.getPropAsBoolean(ConfigurationKeys.CLEANUP_OLD_JOBS_DATA, ConfigurationKeys.DEFAULT_CLEANUP_OLD_JOBS_DATA)) {
JobLauncherUtils.cleanUpOldJobData(jobState, LOG, jobContext.getStagingDirProvided(), jobContext.getOutputDirProvided());
}
JobLauncherUtils.cleanJobStagingData(jobState, LOG);
}
} catch (Throwable t) {
// Catch Throwable instead of just IOException to make sure failure of this won't affect the current run
LOG.error("Failed to clean leftover staging data", t);
}
}
use of org.apache.gobblin.util.ParallelRunner in project incubator-gobblin by apache.
the class BaseDataPublisher method publishData.
protected void publishData(WorkUnitState state, int branchId, boolean publishSingleTaskData, Set<Path> writerOutputPathsMoved) throws IOException {
// Get a ParallelRunner instance for moving files in parallel
ParallelRunner parallelRunner = this.getParallelRunner(this.writerFileSystemByBranches.get(branchId));
// The directory where the workUnitState wrote its output data.
Path writerOutputDir = WriterUtils.getWriterOutputDir(state, this.numBranches, branchId);
if (!this.writerFileSystemByBranches.get(branchId).exists(writerOutputDir)) {
LOG.warn(String.format("Branch %d of WorkUnit %s produced no data", branchId, state.getId()));
return;
}
// The directory where the final output directory for this job will be placed.
// It is a combination of DATA_PUBLISHER_FINAL_DIR and WRITER_FILE_PATH.
Path publisherOutputDir = getPublisherOutputDir(state, branchId);
if (publishSingleTaskData) {
// Create final output directory
WriterUtils.mkdirsWithRecursivePermissionWithRetry(this.publisherFileSystemByBranches.get(branchId), publisherOutputDir, this.permissions.get(branchId), retrierConfig);
addSingleTaskWriterOutputToExistingDir(writerOutputDir, publisherOutputDir, state, branchId, parallelRunner);
} else {
if (writerOutputPathsMoved.contains(writerOutputDir)) {
// If publishSingleTaskData=true, writerOutputPathMoved is ignored.
return;
}
if (this.publisherFileSystemByBranches.get(branchId).exists(publisherOutputDir)) {
// The final output directory already exists, check if the job is configured to replace it.
// If publishSingleTaskData=true, final output directory is never replaced.
boolean replaceFinalOutputDir = this.getState().getPropAsBoolean(ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.DATA_PUBLISHER_REPLACE_FINAL_DIR, this.numBranches, branchId));
// If the final output directory is not configured to be replaced, put new data to the existing directory.
if (!replaceFinalOutputDir) {
addWriterOutputToExistingDir(writerOutputDir, publisherOutputDir, state, branchId, parallelRunner);
writerOutputPathsMoved.add(writerOutputDir);
return;
}
// Delete the final output directory if it is configured to be replaced
LOG.info("Deleting publisher output dir " + publisherOutputDir);
this.publisherFileSystemByBranches.get(branchId).delete(publisherOutputDir, true);
} else {
// Create the parent directory of the final output directory if it does not exist
WriterUtils.mkdirsWithRecursivePermissionWithRetry(this.publisherFileSystemByBranches.get(branchId), publisherOutputDir.getParent(), this.permissions.get(branchId), retrierConfig);
}
movePath(parallelRunner, state, writerOutputDir, publisherOutputDir, branchId);
writerOutputPathsMoved.add(writerOutputDir);
}
}
use of org.apache.gobblin.util.ParallelRunner in project incubator-gobblin by apache.
the class TaskStateCollectorService method collectOutputTaskStates.
/**
* Collect output {@link TaskState}s of tasks of the job launched.
*
* <p>
* This method collects all available output {@link TaskState} files at the time it is called. It
* uses a {@link ParallelRunner} to deserialize the {@link TaskState}s. Each {@link TaskState}
* file gets deleted after the {@link TaskState} it stores is successfully collected.
* </p>
*
* @throws IOException if it fails to collect the output {@link TaskState}s
*/
private void collectOutputTaskStates() throws IOException {
List<String> taskStateNames = taskStateStore.getTableNames(outputTaskStateDir.getName(), new Predicate<String>() {
@Override
public boolean apply(String input) {
return input.endsWith(AbstractJobLauncher.TASK_STATE_STORE_TABLE_SUFFIX) && !input.startsWith(FsStateStore.TMP_FILE_PREFIX);
}
});
if (taskStateNames == null || taskStateNames.size() == 0) {
LOGGER.debug("No output task state files found in " + this.outputTaskStateDir);
return;
}
final Queue<TaskState> taskStateQueue = Queues.newConcurrentLinkedQueue();
try (ParallelRunner stateSerDeRunner = new ParallelRunner(this.stateSerDeRunnerThreads, null)) {
for (final String taskStateName : taskStateNames) {
LOGGER.debug("Found output task state file " + taskStateName);
// Deserialize the TaskState and delete the file
stateSerDeRunner.submitCallable(new Callable<Void>() {
@Override
public Void call() throws Exception {
TaskState taskState = taskStateStore.getAll(outputTaskStateDir.getName(), taskStateName).get(0);
taskStateQueue.add(taskState);
taskStateStore.delete(outputTaskStateDir.getName(), taskStateName);
return null;
}
}, "Deserialize state for " + taskStateName);
}
} catch (IOException ioe) {
LOGGER.warn("Could not read all task state files.");
}
LOGGER.info(String.format("Collected task state of %d completed tasks", taskStateQueue.size()));
// returns to the launcher, it sees the TaskStates of all completed tasks.
for (TaskState taskState : taskStateQueue) {
taskState.setJobState(this.jobState);
this.jobState.addTaskState(taskState);
}
// Currently implemented handler for Hive registration only.
if (optionalTaskCollectorHandler.isPresent()) {
LOGGER.info("Execute Pipelined TaskStateCollectorService Handler for " + taskStateQueue.size() + " tasks");
try {
optionalTaskCollectorHandler.get().handle(taskStateQueue);
} catch (Throwable t) {
if (isJobProceedOnCollectorServiceFailure) {
log.error("Failed to commit dataset while job proceeds", t);
SafeDatasetCommit.setTaskFailureException(taskStateQueue, t);
} else {
throw new RuntimeException("Hive Registration as the TaskStateCollectorServiceHandler failed.", t);
}
}
}
// Notify the listeners for the completion of the tasks
this.eventBus.post(new NewTaskCompletionEvent(ImmutableList.copyOf(taskStateQueue)));
}
use of org.apache.gobblin.util.ParallelRunner in project incubator-gobblin by apache.
the class GobblinHelixJobLauncher method createJob.
/**
* Create a job from a given batch of {@link WorkUnit}s.
*/
private JobConfig.Builder createJob(List<WorkUnit> workUnits) throws IOException {
Map<String, TaskConfig> taskConfigMap = Maps.newHashMap();
try (ParallelRunner stateSerDeRunner = new ParallelRunner(this.stateSerDeRunnerThreads, this.fs)) {
int multiTaskIdSequence = 0;
for (WorkUnit workUnit : workUnits) {
if (workUnit instanceof MultiWorkUnit) {
workUnit.setId(JobLauncherUtils.newMultiTaskId(this.jobContext.getJobId(), multiTaskIdSequence++));
}
addWorkUnit(workUnit, stateSerDeRunner, taskConfigMap);
}
Path jobStateFilePath;
// write the job.state using the state store if present, otherwise serialize directly to the file
if (this.stateStores.haveJobStateStore()) {
jobStateFilePath = GobblinClusterUtils.getJobStateFilePath(true, this.appWorkDir, this.jobContext.getJobId());
this.stateStores.getJobStateStore().put(jobStateFilePath.getParent().getName(), jobStateFilePath.getName(), this.jobContext.getJobState());
} else {
jobStateFilePath = GobblinClusterUtils.getJobStateFilePath(false, this.appWorkDir, this.jobContext.getJobId());
SerializationUtils.serializeState(this.fs, jobStateFilePath, this.jobContext.getJobState());
}
LOGGER.debug("GobblinHelixJobLauncher.createJob: jobStateFilePath {}, jobState {} jobProperties {}", jobStateFilePath, this.jobContext.getJobState().toString(), this.jobContext.getJobState().getProperties());
}
JobConfig.Builder jobConfigBuilder = new JobConfig.Builder();
jobConfigBuilder.setMaxAttemptsPerTask(this.jobContext.getJobState().getPropAsInt(ConfigurationKeys.MAX_TASK_RETRIES_KEY, ConfigurationKeys.DEFAULT_MAX_TASK_RETRIES));
jobConfigBuilder.setFailureThreshold(workUnits.size());
jobConfigBuilder.addTaskConfigMap(taskConfigMap).setCommand(GobblinTaskRunner.GOBBLIN_TASK_FACTORY_NAME);
jobConfigBuilder.setNumConcurrentTasksPerInstance(ConfigUtils.getInt(jobConfig, GobblinClusterConfigurationKeys.HELIX_CLUSTER_TASK_CONCURRENCY, GobblinClusterConfigurationKeys.HELIX_CLUSTER_TASK_CONCURRENCY_DEFAULT));
if (Task.getExecutionModel(ConfigUtils.configToState(jobConfig)).equals(ExecutionModel.STREAMING)) {
jobConfigBuilder.setRebalanceRunningTask(true);
}
return jobConfigBuilder;
}
Aggregations