use of org.apache.helix.task.WorkflowConfig in project helix by apache.
the class TestRetrieveWorkflows method testGetAllWorkflows.
@Test
public void testGetAllWorkflows() throws Exception {
List<Workflow> workflowList = new ArrayList<Workflow>();
for (int i = 0; i < 2; i++) {
Workflow workflow = WorkflowGenerator.generateDefaultRepeatedJobWorkflowBuilder(TestHelper.getTestMethodName() + i).build();
_driver.start(workflow);
workflowList.add(workflow);
}
for (Workflow workflow : workflowList) {
_driver.pollForWorkflowState(workflow.getName(), TaskState.COMPLETED);
}
Map<String, WorkflowConfig> workflowConfigMap = _driver.getWorkflows();
Assert.assertEquals(workflowConfigMap.size(), workflowList.size());
for (Map.Entry<String, WorkflowConfig> workflow : workflowConfigMap.entrySet()) {
WorkflowConfig workflowConfig = workflow.getValue();
WorkflowContext workflowContext = _driver.getWorkflowContext(workflow.getKey());
Assert.assertNotNull(workflowContext);
for (String job : workflowConfig.getJobDag().getAllNodes()) {
JobConfig jobConfig = _driver.getJobConfig(job);
JobContext jobContext = _driver.getJobContext(job);
Assert.assertNotNull(jobConfig);
Assert.assertNotNull(jobContext);
}
}
}
use of org.apache.helix.task.WorkflowConfig in project helix by apache.
the class TestJobFailureDependence method testWorkflowFailureJobThreshold.
@Test
public void testWorkflowFailureJobThreshold() throws Exception {
String queueName = TestHelper.getTestMethodName();
// Create a queue
LOG.info("Starting job-queue: " + queueName);
JobQueue.Builder queueBuilder = TaskTestUtil.buildJobQueue(queueName, 0, 3);
// Create and Enqueue jobs
List<String> currentJobNames = new ArrayList<String>();
for (int i = 0; i < _numDbs; i++) {
JobConfig.Builder jobConfig = new JobConfig.Builder().setCommand(MockTask.TASK_COMMAND).setTargetResource(_testDbs.get(i)).setTargetPartitionStates(Sets.newHashSet("SLAVE")).setIgnoreDependentJobFailure(true);
String jobName = "job" + _testDbs.get(i);
queueBuilder.enqueueJob(jobName, jobConfig);
currentJobNames.add(jobName);
}
_driver.start(queueBuilder.build());
_setupTool.dropResourceFromCluster(CLUSTER_NAME, _testDbs.get(1));
String namedSpaceJob1 = String.format("%s_%s", queueName, currentJobNames.get(1));
_driver.pollForJobState(queueName, namedSpaceJob1, TaskState.FAILED);
String lastJob = String.format("%s_%s", queueName, currentJobNames.get(currentJobNames.size() - 1));
_driver.pollForJobState(queueName, lastJob, TaskState.COMPLETED);
_driver.flushQueue(queueName);
WorkflowConfig currentWorkflowConfig = _driver.getWorkflowConfig(queueName);
WorkflowConfig.Builder configBuilder = new WorkflowConfig.Builder(currentWorkflowConfig);
configBuilder.setFailureThreshold(0);
_driver.updateWorkflow(queueName, configBuilder.build());
_driver.stop(queueName);
for (int i = 0; i < _numDbs; i++) {
JobConfig.Builder jobConfig = new JobConfig.Builder().setCommand(MockTask.TASK_COMMAND).setTargetResource(_testDbs.get(i)).setTargetPartitionStates(Sets.newHashSet("SLAVE")).setIgnoreDependentJobFailure(true);
String jobName = "job" + _testDbs.get(i);
queueBuilder.enqueueJob(jobName, jobConfig);
_driver.enqueueJob(queueName, jobName, jobConfig);
}
_driver.resume(queueName);
namedSpaceJob1 = String.format("%s_%s", queueName, currentJobNames.get(1));
_driver.pollForJobState(queueName, namedSpaceJob1, TaskState.FAILED);
}
use of org.apache.helix.task.WorkflowConfig in project helix by apache.
the class TestJobQueueCleanUp method testJobQueueAutoCleanUp.
@Test
public void testJobQueueAutoCleanUp() throws InterruptedException {
int capacity = 10;
String queueName = TestHelper.getTestMethodName();
JobQueue.Builder builder = TaskTestUtil.buildJobQueue(queueName, capacity);
WorkflowConfig.Builder cfgBuilder = new WorkflowConfig.Builder(builder.getWorkflowConfig());
cfgBuilder.setJobPurgeInterval(1000);
builder.setWorkflowConfig(cfgBuilder.build());
JobConfig.Builder jobBuilder = new JobConfig.Builder().setTargetResource(WorkflowGenerator.DEFAULT_TGT_DB).setCommand(MockTask.TASK_COMMAND).setMaxAttemptsPerTask(2).setJobCommandConfigMap(ImmutableMap.of(MockTask.SUCCESS_COUNT_BEFORE_FAIL, String.valueOf(capacity / 2))).setExpiry(200L);
Set<String> deletedJobs = new HashSet<String>();
Set<String> remainJobs = new HashSet<String>();
for (int i = 0; i < capacity; i++) {
builder.enqueueJob("JOB" + i, jobBuilder);
if (i < capacity / 2) {
deletedJobs.add("JOB" + i);
} else {
remainJobs.add(TaskUtil.getNamespacedJobName(queueName, "JOB" + i));
}
}
_driver.start(builder.build());
_driver.pollForJobState(queueName, TaskUtil.getNamespacedJobName(queueName, "JOB" + (capacity - 1)), TaskState.FAILED);
Thread.sleep(2000);
WorkflowConfig config = _driver.getWorkflowConfig(queueName);
Assert.assertEquals(config.getJobDag().getAllNodes(), remainJobs);
WorkflowContext context = _driver.getWorkflowContext(queueName);
Assert.assertEquals(context.getJobStates().keySet(), remainJobs);
Assert.assertTrue(remainJobs.containsAll(context.getJobStartTimes().keySet()));
for (String job : deletedJobs) {
JobConfig cfg = _driver.getJobConfig(job);
JobContext ctx = _driver.getJobContext(job);
Assert.assertNull(cfg);
Assert.assertNull(ctx);
}
}
use of org.apache.helix.task.WorkflowConfig in project incubator-gobblin by apache.
the class GobblinHelixJobLauncherTest method testJobCleanup.
public void testJobCleanup() throws Exception {
final ConcurrentHashMap<String, Boolean> runningMap = new ConcurrentHashMap<>();
final Properties properties = generateJobProperties(this.baseConfig, "3", "_1504201348473");
final GobblinHelixJobLauncher gobblinHelixJobLauncher = new GobblinHelixJobLauncher(properties, this.helixManager, this.appWorkDir, ImmutableList.<Tag<?>>of(), runningMap);
final Properties properties2 = generateJobProperties(this.baseConfig, "33", "_1504201348474");
final GobblinHelixJobLauncher gobblinHelixJobLauncher2 = new GobblinHelixJobLauncher(properties2, this.helixManager, this.appWorkDir, ImmutableList.<Tag<?>>of(), runningMap);
gobblinHelixJobLauncher.launchJob(null);
gobblinHelixJobLauncher2.launchJob(null);
final TaskDriver taskDriver = new TaskDriver(this.helixManager);
final String jobName = properties.getProperty(ConfigurationKeys.JOB_NAME_KEY);
final String jobIdKey = properties.getProperty(ConfigurationKeys.JOB_ID_KEY);
final String jobContextName = jobName + "_" + jobIdKey;
final String jobName2 = properties2.getProperty(ConfigurationKeys.JOB_NAME_KEY);
org.apache.helix.task.JobContext jobContext = taskDriver.getJobContext(jobContextName);
// job context should be present until close
Assert.assertNotNull(jobContext);
gobblinHelixJobLauncher.close();
// job queue deleted asynchronously after close
waitForQueueCleanup(taskDriver, jobName);
jobContext = taskDriver.getJobContext(jobContextName);
// job context should have been deleted
Assert.assertNull(jobContext);
// job queue should have been deleted
WorkflowConfig workflowConfig = taskDriver.getWorkflowConfig(jobName);
Assert.assertNull(workflowConfig);
WorkflowContext workflowContext = taskDriver.getWorkflowContext(jobName);
Assert.assertNull(workflowContext);
// second job queue with shared prefix should not be deleted when the first job queue is cleaned up
workflowConfig = taskDriver.getWorkflowConfig(jobName2);
Assert.assertNotNull(workflowConfig);
gobblinHelixJobLauncher2.close();
// job queue deleted asynchronously after close
waitForQueueCleanup(taskDriver, jobName2);
workflowConfig = taskDriver.getWorkflowConfig(jobName2);
Assert.assertNull(workflowConfig);
// check that workunit and taskstate directory for the job are cleaned up
final File workunitsDir = new File(this.appWorkDir + File.separator + GobblinClusterConfigurationKeys.INPUT_WORK_UNIT_DIR_NAME + File.separator + jobIdKey);
final File taskstatesDir = new File(this.appWorkDir + File.separator + GobblinClusterConfigurationKeys.OUTPUT_TASK_STATE_DIR_NAME + File.separator + jobIdKey);
Assert.assertFalse(workunitsDir.exists());
Assert.assertFalse(taskstatesDir.exists());
// check that job.state file is cleaned up
final File jobStateFile = new File(GobblinClusterUtils.getJobStateFilePath(true, this.appWorkDir, jobIdKey).toString());
Assert.assertFalse(jobStateFile.exists());
}
use of org.apache.helix.task.WorkflowConfig in project incubator-gobblin by apache.
the class GobblinClusterManager method handleLeadershipChange.
/**
* Handle leadership change.
* The applicationLauncher is only started on the leader.
* The leader cleans up existing jobs before starting the applicationLauncher.
* @param changeContext notification context
*/
@VisibleForTesting
void handleLeadershipChange(NotificationContext changeContext) {
this.metrics.clusterLeadershipChange.update(1);
if (this.helixManager.isLeader()) {
// can get multiple notifications on a leadership change, so only start the application launcher the first time
// the notification is received
LOGGER.info("Leader notification for {} isLeader {} HM.isLeader {}", this.helixManager.getInstanceName(), isLeader, this.helixManager.isLeader());
if (!isLeader) {
LOGGER.info("New Helix Controller leader {}", this.helixManager.getInstanceName());
// Clean up existing jobs
TaskDriver taskDriver = new TaskDriver(this.helixManager);
Map<String, WorkflowConfig> workflows = taskDriver.getWorkflows();
for (Map.Entry<String, WorkflowConfig> entry : workflows.entrySet()) {
String queueName = entry.getKey();
WorkflowConfig workflowConfig = entry.getValue();
// request delete if not already requested
if (workflowConfig.getTargetState() != TargetState.DELETE) {
taskDriver.delete(queueName);
LOGGER.info("Requested delete of queue {}", queueName);
}
}
startAppLauncherAndServices();
isLeader = true;
}
} else {
// this prepares them to start when this cluster manager becomes a leader
if (isLeader) {
isLeader = false;
stopAppLauncherAndServices();
try {
initializeAppLauncherAndServices();
} catch (Exception e) {
throw new RuntimeException("Exception reinitializing app launcher services ", e);
}
}
}
}
Aggregations