use of org.apache.helix.task.Workflow in project ambry by linkedin.
the class HelixClusterWideAggregationTool method main.
/**
* @param args takes in three mandatory arguments: the ZK layout, the cluster name, the workflow name. Optional
* argument to create the workflow as a recurrent workflow and specifies the recurrent time interval.
* The ZK layout has to be of the following form:
* {
* "zkInfo" : [
* {
* "datacenter":"dc1",
* "id" : "1",
* "zkConnectStr":"abc.example.com:2199",
* },
* {
* "datacenter":"dc2",
* "id" : "2",
* "zkConnectStr":"def.example.com:2300",
* }
* ]
* }
* @throws Exception
*/
public static void main(String[] args) throws Exception {
OptionParser parser = new OptionParser();
ArgumentAcceptingOptionSpec<String> zkLayoutPathOpt = parser.accepts("zkLayoutPath", "The path to the json file containing zookeeper connect info. This should be of the following form: \n{\n" + " \"zkInfo\" : [\n" + " {\n" + " \"datacenter\":\"dc1\",\n" + " \"id\":\"1\",\n" + " \"zkConnectStr\":\"abc.example.com:2199\",\n" + " },\n" + " {\n" + " \"datacenter\":\"dc2\",\n" + " \"id\":\"2\",\n" + " \"zkConnectStr\":\"def.example.com:2300\",\n" + " },\n" + " {\n" + " \"datacenter\":\"dc3\",\n" + " \"id\":\"3\",\n" + " \"zkConnectStr\":\"ghi.example.com:2400\",\n" + " }\n" + " ]\n" + "}").withRequiredArg().describedAs("zk_connect_info_path").ofType(String.class);
ArgumentAcceptingOptionSpec<String> clusterNameOpt = parser.accepts("clusterName", "The cluster name in helix").withRequiredArg().describedAs("cluster_name").ofType(String.class);
ArgumentAcceptingOptionSpec<String> workflowNameOpt = parser.accepts("workflowName", "The name of the one-time workflow").withRequiredArg().describedAs("workflow_name").ofType(String.class);
ArgumentAcceptingOptionSpec<Long> recurrentIntervalInMinutesOpt = parser.accepts("recurrentIntervalInMinutes", "The frequency for the recurrent workflow").withOptionalArg().describedAs("recurrent_interval_in_minutes").ofType(Long.class).defaultsTo(Utils.Infinite_Time);
parser.accepts("delete", "Flag to remove the given workflow from the cluster(s) instead of creating one");
OptionSet options = parser.parse(args);
Boolean isDelete = options.has("delete");
String zkLayoutPath = options.valueOf(zkLayoutPathOpt);
String clusterName = options.valueOf(clusterNameOpt);
String workflowName = options.valueOf(workflowNameOpt);
Long recurrentIntervalInMinutes = options.valueOf(recurrentIntervalInMinutesOpt);
Map<String, ClusterMapUtils.DcZkInfo> dataCenterToZKAddress = ClusterMapUtils.parseDcJsonAndPopulateDcInfo(Utils.readStringFromFile(zkLayoutPath));
for (ClusterMapUtils.DcZkInfo zkInfo : dataCenterToZKAddress.values()) {
String zkAddress = zkInfo.getZkConnectStr();
ZkClient zkClient = new ZkClient(zkAddress, SESSION_TIMEOUT, CONNECTION_TIMEOUT, new ZNRecordSerializer());
TaskDriver taskDriver = new TaskDriver(zkClient, clusterName);
if (isDelete) {
try {
taskDriver.stop(workflowName);
taskDriver.delete(workflowName);
} catch (Exception | Error e) {
System.out.println(String.format("Failed to delete %s. Workflow not found in cluster %s at %s", workflowName, clusterName, zkAddress));
}
} else {
try {
Workflow.Builder workflowBuilder = new Workflow.Builder(workflowName);
String jobId = ONE_TIME_JOB_ID;
if (recurrentIntervalInMinutes != Utils.Infinite_Time) {
jobId = RECURRENT_JOB_ID;
workflowBuilder.setScheduleConfig(ScheduleConfig.recurringFromNow(TimeUnit.MINUTES, recurrentIntervalInMinutes));
workflowBuilder.setExpiry(TimeUnit.MINUTES.toMillis(recurrentIntervalInMinutes));
}
JobConfig.Builder jobConfigBuilder = new JobConfig.Builder();
List<TaskConfig> taskConfigs = new ArrayList<>();
taskConfigs.add(new TaskConfig.Builder().setTaskId(TASK_ID).setCommand(String.format("%s_%s", HelixHealthReportAggregatorTask.TASK_COMMAND_PREFIX, REPORT_NAME)).build());
jobConfigBuilder.addTaskConfigs(taskConfigs);
workflowBuilder.addJob(jobId, jobConfigBuilder);
Workflow workflow = workflowBuilder.build();
taskDriver.start(workflow);
System.out.println(String.format("%s_%s started successfully", workflowName, jobId));
} catch (Exception | Error e) {
System.out.println(String.format("Failed to start %s in cluster %s at %s", workflowName, clusterName, zkAddress));
}
}
}
}
use of org.apache.helix.task.Workflow in project helix by apache.
the class TestTaskRebalancer method testExpiry.
@Test
public void testExpiry() throws Exception {
String jobName = "Expiry";
long expiry = 1000;
Map<String, String> commandConfig = ImmutableMap.of(TIMEOUT_CONFIG, String.valueOf(100));
JobConfig.Builder jobBuilder = JobConfig.Builder.fromMap(WorkflowGenerator.DEFAULT_JOB_CONFIG);
jobBuilder.setJobCommandConfigMap(commandConfig);
Workflow flow = WorkflowGenerator.generateSingleJobWorkflowBuilder(jobName, jobBuilder).setExpiry(expiry).build();
_driver.start(flow);
_driver.pollForWorkflowState(jobName, TaskState.IN_PROGRESS);
// Running workflow should have config and context viewable through accessor
HelixDataAccessor accessor = _manager.getHelixDataAccessor();
PropertyKey workflowCfgKey = accessor.keyBuilder().resourceConfig(jobName);
String workflowPropStoreKey = Joiner.on("/").join(TaskConstants.REBALANCER_CONTEXT_ROOT, jobName);
// Ensure context and config exist
Assert.assertTrue(_manager.getHelixPropertyStore().exists(workflowPropStoreKey, AccessOption.PERSISTENT));
Assert.assertNotSame(accessor.getProperty(workflowCfgKey), null);
// Wait for job to finish and expire
_driver.pollForWorkflowState(jobName, TaskState.COMPLETED);
Thread.sleep(expiry + 100);
// Ensure workflow config and context were cleaned up by now
Assert.assertFalse(_manager.getHelixPropertyStore().exists(workflowPropStoreKey, AccessOption.PERSISTENT));
Assert.assertEquals(accessor.getProperty(workflowCfgKey), null);
}
use of org.apache.helix.task.Workflow in project helix by apache.
the class TestTaskRebalancer method partitionSet.
@Test
public void partitionSet() throws Exception {
final String jobResource = "partitionSet";
ImmutableList<String> targetPartitions = ImmutableList.of("TestDB_1", "TestDB_2", "TestDB_3", "TestDB_5", "TestDB_8", "TestDB_13");
// construct and submit our basic workflow
Map<String, String> commandConfig = ImmutableMap.of(TIMEOUT_CONFIG, String.valueOf(100));
JobConfig.Builder jobBuilder = JobConfig.Builder.fromMap(WorkflowGenerator.DEFAULT_JOB_CONFIG);
jobBuilder.setJobCommandConfigMap(commandConfig).setMaxAttemptsPerTask(1).setTargetPartitions(targetPartitions);
Workflow flow = WorkflowGenerator.generateSingleJobWorkflowBuilder(jobResource, jobBuilder).build();
_driver.start(flow);
// wait for job completeness/timeout
_driver.pollForWorkflowState(jobResource, TaskState.COMPLETED);
// see if resulting context completed successfully for our partition set
String namespacedName = TaskUtil.getNamespacedJobName(jobResource);
JobContext ctx = _driver.getJobContext(namespacedName);
WorkflowContext workflowContext = _driver.getWorkflowContext(jobResource);
Assert.assertNotNull(ctx);
Assert.assertNotNull(workflowContext);
Assert.assertEquals(workflowContext.getJobState(namespacedName), TaskState.COMPLETED);
for (String pName : targetPartitions) {
int i = ctx.getPartitionsByTarget().get(pName).get(0);
Assert.assertEquals(ctx.getPartitionState(i), TaskPartitionState.COMPLETED);
Assert.assertEquals(ctx.getPartitionNumAttempts(i), 1);
}
}
use of org.apache.helix.task.Workflow in project helix by apache.
the class TestTaskRebalancer method timeouts.
@Test
public void timeouts() throws Exception {
final String jobResource = "timeouts";
JobConfig.Builder jobBuilder = JobConfig.Builder.fromMap(WorkflowGenerator.DEFAULT_JOB_CONFIG);
jobBuilder.setJobCommandConfigMap(WorkflowGenerator.DEFAULT_COMMAND_CONFIG).setMaxAttemptsPerTask(2).setTimeoutPerTask(100);
Workflow flow = WorkflowGenerator.generateSingleJobWorkflowBuilder(jobResource, jobBuilder).build();
_driver.start(flow);
// Wait until the job reports failure.
_driver.pollForWorkflowState(jobResource, TaskState.FAILED);
// Check that all partitions timed out up to maxAttempts
JobContext ctx = _driver.getJobContext(TaskUtil.getNamespacedJobName(jobResource));
int maxAttempts = 0;
boolean sawTimedoutTask = false;
for (int i = 0; i < _numParitions; i++) {
TaskPartitionState state = ctx.getPartitionState(i);
if (state != null) {
if (state == TaskPartitionState.TIMED_OUT) {
sawTimedoutTask = true;
}
// At least one task timed out, other might be aborted due to job failure.
Assert.assertTrue(state == TaskPartitionState.TIMED_OUT || state == TaskPartitionState.TASK_ABORTED);
maxAttempts = Math.max(maxAttempts, ctx.getPartitionNumAttempts(i));
}
}
Assert.assertTrue(sawTimedoutTask);
Assert.assertEquals(maxAttempts, 2);
}
use of org.apache.helix.task.Workflow in project helix by apache.
the class TestTaskRebalancer method testRepeatedWorkflow.
@Test
public void testRepeatedWorkflow() throws Exception {
String workflowName = "SomeWorkflow";
Workflow flow = WorkflowGenerator.generateDefaultRepeatedJobWorkflowBuilder(workflowName).build();
new TaskDriver(_manager).start(flow);
// Wait until the workflow completes
_driver.pollForWorkflowState(workflowName, TaskState.COMPLETED);
// Assert completion for all tasks within two minutes
for (String task : flow.getJobConfigs().keySet()) {
_driver.pollForJobState(workflowName, task, TaskState.COMPLETED);
}
}
Aggregations