Search in sources :

Example 1 with ParallelRunner

use of org.apache.gobblin.util.ParallelRunner in project incubator-gobblin by apache.

the class AbstractJobLauncher method cleanupStagingDataPerTask.

private static void cleanupStagingDataPerTask(JobState jobState) {
    Closer closer = Closer.create();
    Map<String, ParallelRunner> parallelRunners = Maps.newHashMap();
    try {
        for (TaskState taskState : jobState.getTaskStates()) {
            try {
                JobLauncherUtils.cleanTaskStagingData(taskState, LOG, closer, parallelRunners);
            } catch (IOException e) {
                LOG.error(String.format("Failed to clean staging data for task %s: %s", taskState.getTaskId(), e), e);
            }
        }
    } finally {
        try {
            closer.close();
        } catch (IOException e) {
            LOG.error("Failed to clean staging data", e);
        }
    }
}
Also used : Closer(com.google.common.io.Closer) IOException(java.io.IOException) ParallelRunner(org.apache.gobblin.util.ParallelRunner)

Example 2 with ParallelRunner

use of org.apache.gobblin.util.ParallelRunner in project incubator-gobblin by apache.

the class AbstractJobLauncher method cleanLeftoverStagingData.

/**
 * Cleanup the left-over staging data possibly from the previous run of the job that may have failed
 * and not cleaned up its staging data.
 *
 * Property {@link ConfigurationKeys#CLEANUP_STAGING_DATA_PER_TASK} controls whether to cleanup
 * staging data per task, or to cleanup entire job's staging data at once.
 *
 * Staging data will not be cleaned if the job has unfinished {@link CommitSequence}s.
 */
private void cleanLeftoverStagingData(WorkUnitStream workUnits, JobState jobState) throws JobException {
    if (jobState.getPropAsBoolean(ConfigurationKeys.CLEANUP_STAGING_DATA_BY_INITIALIZER, false)) {
        // Clean up will be done by initializer.
        return;
    }
    try {
        if (!canCleanStagingData(jobState)) {
            LOG.error("Job " + jobState.getJobName() + " has unfinished commit sequences. Will not clean up staging data.");
            return;
        }
    } catch (IOException e) {
        throw new JobException("Failed to check unfinished commit sequences", e);
    }
    try {
        if (this.jobContext.shouldCleanupStagingDataPerTask()) {
            if (workUnits.isSafeToMaterialize()) {
                Closer closer = Closer.create();
                Map<String, ParallelRunner> parallelRunners = Maps.newHashMap();
                try {
                    for (WorkUnit workUnit : JobLauncherUtils.flattenWorkUnits(workUnits.getMaterializedWorkUnitCollection())) {
                        JobLauncherUtils.cleanTaskStagingData(new WorkUnitState(workUnit, jobState), LOG, closer, parallelRunners);
                    }
                } catch (Throwable t) {
                    throw closer.rethrow(t);
                } finally {
                    closer.close();
                }
            } else {
                throw new RuntimeException("Work unit streams do not support cleaning staging data per task.");
            }
        } else {
            if (jobState.getPropAsBoolean(ConfigurationKeys.CLEANUP_OLD_JOBS_DATA, ConfigurationKeys.DEFAULT_CLEANUP_OLD_JOBS_DATA)) {
                JobLauncherUtils.cleanUpOldJobData(jobState, LOG, jobContext.getStagingDirProvided(), jobContext.getOutputDirProvided());
            }
            JobLauncherUtils.cleanJobStagingData(jobState, LOG);
        }
    } catch (Throwable t) {
        // Catch Throwable instead of just IOException to make sure failure of this won't affect the current run
        LOG.error("Failed to clean leftover staging data", t);
    }
}
Also used : Closer(com.google.common.io.Closer) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) IOException(java.io.IOException) MultiWorkUnit(org.apache.gobblin.source.workunit.MultiWorkUnit) WorkUnit(org.apache.gobblin.source.workunit.WorkUnit) ParallelRunner(org.apache.gobblin.util.ParallelRunner)

Example 3 with ParallelRunner

use of org.apache.gobblin.util.ParallelRunner in project incubator-gobblin by apache.

the class BaseDataPublisher method publishData.

protected void publishData(WorkUnitState state, int branchId, boolean publishSingleTaskData, Set<Path> writerOutputPathsMoved) throws IOException {
    // Get a ParallelRunner instance for moving files in parallel
    ParallelRunner parallelRunner = this.getParallelRunner(this.writerFileSystemByBranches.get(branchId));
    // The directory where the workUnitState wrote its output data.
    Path writerOutputDir = WriterUtils.getWriterOutputDir(state, this.numBranches, branchId);
    if (!this.writerFileSystemByBranches.get(branchId).exists(writerOutputDir)) {
        LOG.warn(String.format("Branch %d of WorkUnit %s produced no data", branchId, state.getId()));
        return;
    }
    // The directory where the final output directory for this job will be placed.
    // It is a combination of DATA_PUBLISHER_FINAL_DIR and WRITER_FILE_PATH.
    Path publisherOutputDir = getPublisherOutputDir(state, branchId);
    if (publishSingleTaskData) {
        // Create final output directory
        WriterUtils.mkdirsWithRecursivePermissionWithRetry(this.publisherFileSystemByBranches.get(branchId), publisherOutputDir, this.permissions.get(branchId), retrierConfig);
        addSingleTaskWriterOutputToExistingDir(writerOutputDir, publisherOutputDir, state, branchId, parallelRunner);
    } else {
        if (writerOutputPathsMoved.contains(writerOutputDir)) {
            // If publishSingleTaskData=true, writerOutputPathMoved is ignored.
            return;
        }
        if (this.publisherFileSystemByBranches.get(branchId).exists(publisherOutputDir)) {
            // The final output directory already exists, check if the job is configured to replace it.
            // If publishSingleTaskData=true, final output directory is never replaced.
            boolean replaceFinalOutputDir = this.getState().getPropAsBoolean(ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.DATA_PUBLISHER_REPLACE_FINAL_DIR, this.numBranches, branchId));
            // If the final output directory is not configured to be replaced, put new data to the existing directory.
            if (!replaceFinalOutputDir) {
                addWriterOutputToExistingDir(writerOutputDir, publisherOutputDir, state, branchId, parallelRunner);
                writerOutputPathsMoved.add(writerOutputDir);
                return;
            }
            // Delete the final output directory if it is configured to be replaced
            LOG.info("Deleting publisher output dir " + publisherOutputDir);
            this.publisherFileSystemByBranches.get(branchId).delete(publisherOutputDir, true);
        } else {
            // Create the parent directory of the final output directory if it does not exist
            WriterUtils.mkdirsWithRecursivePermissionWithRetry(this.publisherFileSystemByBranches.get(branchId), publisherOutputDir.getParent(), this.permissions.get(branchId), retrierConfig);
        }
        movePath(parallelRunner, state, writerOutputDir, publisherOutputDir, branchId);
        writerOutputPathsMoved.add(writerOutputDir);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) ParallelRunner(org.apache.gobblin.util.ParallelRunner)

Example 4 with ParallelRunner

use of org.apache.gobblin.util.ParallelRunner in project incubator-gobblin by apache.

the class TaskStateCollectorService method collectOutputTaskStates.

/**
 * Collect output {@link TaskState}s of tasks of the job launched.
 *
 * <p>
 *   This method collects all available output {@link TaskState} files at the time it is called. It
 *   uses a {@link ParallelRunner} to deserialize the {@link TaskState}s. Each {@link TaskState}
 *   file gets deleted after the {@link TaskState} it stores is successfully collected.
 * </p>
 *
 * @throws IOException if it fails to collect the output {@link TaskState}s
 */
private void collectOutputTaskStates() throws IOException {
    List<String> taskStateNames = taskStateStore.getTableNames(outputTaskStateDir.getName(), new Predicate<String>() {

        @Override
        public boolean apply(String input) {
            return input.endsWith(AbstractJobLauncher.TASK_STATE_STORE_TABLE_SUFFIX) && !input.startsWith(FsStateStore.TMP_FILE_PREFIX);
        }
    });
    if (taskStateNames == null || taskStateNames.size() == 0) {
        LOGGER.debug("No output task state files found in " + this.outputTaskStateDir);
        return;
    }
    final Queue<TaskState> taskStateQueue = Queues.newConcurrentLinkedQueue();
    try (ParallelRunner stateSerDeRunner = new ParallelRunner(this.stateSerDeRunnerThreads, null)) {
        for (final String taskStateName : taskStateNames) {
            LOGGER.debug("Found output task state file " + taskStateName);
            // Deserialize the TaskState and delete the file
            stateSerDeRunner.submitCallable(new Callable<Void>() {

                @Override
                public Void call() throws Exception {
                    TaskState taskState = taskStateStore.getAll(outputTaskStateDir.getName(), taskStateName).get(0);
                    taskStateQueue.add(taskState);
                    taskStateStore.delete(outputTaskStateDir.getName(), taskStateName);
                    return null;
                }
            }, "Deserialize state for " + taskStateName);
        }
    } catch (IOException ioe) {
        LOGGER.warn("Could not read all task state files.");
    }
    LOGGER.info(String.format("Collected task state of %d completed tasks", taskStateQueue.size()));
    // returns to the launcher, it sees the TaskStates of all completed tasks.
    for (TaskState taskState : taskStateQueue) {
        taskState.setJobState(this.jobState);
        this.jobState.addTaskState(taskState);
    }
    // Currently implemented handler for Hive registration only.
    if (optionalTaskCollectorHandler.isPresent()) {
        LOGGER.info("Execute Pipelined TaskStateCollectorService Handler for " + taskStateQueue.size() + " tasks");
        try {
            optionalTaskCollectorHandler.get().handle(taskStateQueue);
        } catch (Throwable t) {
            if (isJobProceedOnCollectorServiceFailure) {
                log.error("Failed to commit dataset while job proceeds", t);
                SafeDatasetCommit.setTaskFailureException(taskStateQueue, t);
            } else {
                throw new RuntimeException("Hive Registration as the TaskStateCollectorServiceHandler failed.", t);
            }
        }
    }
    // Notify the listeners for the completion of the tasks
    this.eventBus.post(new NewTaskCompletionEvent(ImmutableList.copyOf(taskStateQueue)));
}
Also used : IOException(java.io.IOException) ParallelRunner(org.apache.gobblin.util.ParallelRunner) IOException(java.io.IOException)

Example 5 with ParallelRunner

use of org.apache.gobblin.util.ParallelRunner in project incubator-gobblin by apache.

the class GobblinHelixJobLauncher method createJob.

/**
 * Create a job from a given batch of {@link WorkUnit}s.
 */
private JobConfig.Builder createJob(List<WorkUnit> workUnits) throws IOException {
    Map<String, TaskConfig> taskConfigMap = Maps.newHashMap();
    try (ParallelRunner stateSerDeRunner = new ParallelRunner(this.stateSerDeRunnerThreads, this.fs)) {
        int multiTaskIdSequence = 0;
        for (WorkUnit workUnit : workUnits) {
            if (workUnit instanceof MultiWorkUnit) {
                workUnit.setId(JobLauncherUtils.newMultiTaskId(this.jobContext.getJobId(), multiTaskIdSequence++));
            }
            addWorkUnit(workUnit, stateSerDeRunner, taskConfigMap);
        }
        Path jobStateFilePath;
        // write the job.state using the state store if present, otherwise serialize directly to the file
        if (this.stateStores.haveJobStateStore()) {
            jobStateFilePath = GobblinClusterUtils.getJobStateFilePath(true, this.appWorkDir, this.jobContext.getJobId());
            this.stateStores.getJobStateStore().put(jobStateFilePath.getParent().getName(), jobStateFilePath.getName(), this.jobContext.getJobState());
        } else {
            jobStateFilePath = GobblinClusterUtils.getJobStateFilePath(false, this.appWorkDir, this.jobContext.getJobId());
            SerializationUtils.serializeState(this.fs, jobStateFilePath, this.jobContext.getJobState());
        }
        LOGGER.debug("GobblinHelixJobLauncher.createJob: jobStateFilePath {}, jobState {} jobProperties {}", jobStateFilePath, this.jobContext.getJobState().toString(), this.jobContext.getJobState().getProperties());
    }
    JobConfig.Builder jobConfigBuilder = new JobConfig.Builder();
    jobConfigBuilder.setMaxAttemptsPerTask(this.jobContext.getJobState().getPropAsInt(ConfigurationKeys.MAX_TASK_RETRIES_KEY, ConfigurationKeys.DEFAULT_MAX_TASK_RETRIES));
    jobConfigBuilder.setFailureThreshold(workUnits.size());
    jobConfigBuilder.addTaskConfigMap(taskConfigMap).setCommand(GobblinTaskRunner.GOBBLIN_TASK_FACTORY_NAME);
    jobConfigBuilder.setNumConcurrentTasksPerInstance(ConfigUtils.getInt(jobConfig, GobblinClusterConfigurationKeys.HELIX_CLUSTER_TASK_CONCURRENCY, GobblinClusterConfigurationKeys.HELIX_CLUSTER_TASK_CONCURRENCY_DEFAULT));
    if (Task.getExecutionModel(ConfigUtils.configToState(jobConfig)).equals(ExecutionModel.STREAMING)) {
        jobConfigBuilder.setRebalanceRunningTask(true);
    }
    return jobConfigBuilder;
}
Also used : Path(org.apache.hadoop.fs.Path) MultiWorkUnit(org.apache.gobblin.source.workunit.MultiWorkUnit) TaskConfig(org.apache.helix.task.TaskConfig) MultiWorkUnit(org.apache.gobblin.source.workunit.MultiWorkUnit) WorkUnit(org.apache.gobblin.source.workunit.WorkUnit) ParallelRunner(org.apache.gobblin.util.ParallelRunner) JobConfig(org.apache.helix.task.JobConfig)

Aggregations

ParallelRunner (org.apache.gobblin.util.ParallelRunner)6 Closer (com.google.common.io.Closer)3 IOException (java.io.IOException)3 MultiWorkUnit (org.apache.gobblin.source.workunit.MultiWorkUnit)3 WorkUnit (org.apache.gobblin.source.workunit.WorkUnit)3 Path (org.apache.hadoop.fs.Path)3 WorkUnitState (org.apache.gobblin.configuration.WorkUnitState)1 JobConfig (org.apache.helix.task.JobConfig)1 TaskConfig (org.apache.helix.task.TaskConfig)1