use of org.apache.helix.task.TaskDriver in project ambry by linkedin.
the class HelixClusterWideAggregationTool method main.
/**
* @param args takes in three mandatory arguments: the ZK layout, the cluster name, the workflow name. Optional
* argument to create the workflow as a recurrent workflow and specifies the recurrent time interval.
* The ZK layout has to be of the following form:
* {
* "zkInfo" : [
* {
* "datacenter":"dc1",
* "id" : "1",
* "zkConnectStr":"abc.example.com:2199",
* },
* {
* "datacenter":"dc2",
* "id" : "2",
* "zkConnectStr":"def.example.com:2300",
* }
* ]
* }
* @throws Exception
*/
public static void main(String[] args) throws Exception {
OptionParser parser = new OptionParser();
ArgumentAcceptingOptionSpec<String> zkLayoutPathOpt = parser.accepts("zkLayoutPath", "The path to the json file containing zookeeper connect info. This should be of the following form: \n{\n" + " \"zkInfo\" : [\n" + " {\n" + " \"datacenter\":\"dc1\",\n" + " \"id\":\"1\",\n" + " \"zkConnectStr\":\"abc.example.com:2199\",\n" + " },\n" + " {\n" + " \"datacenter\":\"dc2\",\n" + " \"id\":\"2\",\n" + " \"zkConnectStr\":\"def.example.com:2300\",\n" + " },\n" + " {\n" + " \"datacenter\":\"dc3\",\n" + " \"id\":\"3\",\n" + " \"zkConnectStr\":\"ghi.example.com:2400\",\n" + " }\n" + " ]\n" + "}").withRequiredArg().describedAs("zk_connect_info_path").ofType(String.class);
ArgumentAcceptingOptionSpec<String> clusterNameOpt = parser.accepts("clusterName", "The cluster name in helix").withRequiredArg().describedAs("cluster_name").ofType(String.class);
ArgumentAcceptingOptionSpec<String> workflowNameOpt = parser.accepts("workflowName", "The name of the one-time workflow").withRequiredArg().describedAs("workflow_name").ofType(String.class);
ArgumentAcceptingOptionSpec<Long> recurrentIntervalInMinutesOpt = parser.accepts("recurrentIntervalInMinutes", "The frequency for the recurrent workflow").withOptionalArg().describedAs("recurrent_interval_in_minutes").ofType(Long.class).defaultsTo(Utils.Infinite_Time);
parser.accepts("delete", "Flag to remove the given workflow from the cluster(s) instead of creating one");
OptionSet options = parser.parse(args);
Boolean isDelete = options.has("delete");
String zkLayoutPath = options.valueOf(zkLayoutPathOpt);
String clusterName = options.valueOf(clusterNameOpt);
String workflowName = options.valueOf(workflowNameOpt);
Long recurrentIntervalInMinutes = options.valueOf(recurrentIntervalInMinutesOpt);
Map<String, ClusterMapUtils.DcZkInfo> dataCenterToZKAddress = ClusterMapUtils.parseDcJsonAndPopulateDcInfo(Utils.readStringFromFile(zkLayoutPath));
for (ClusterMapUtils.DcZkInfo zkInfo : dataCenterToZKAddress.values()) {
String zkAddress = zkInfo.getZkConnectStr();
ZkClient zkClient = new ZkClient(zkAddress, SESSION_TIMEOUT, CONNECTION_TIMEOUT, new ZNRecordSerializer());
TaskDriver taskDriver = new TaskDriver(zkClient, clusterName);
if (isDelete) {
try {
taskDriver.stop(workflowName);
taskDriver.delete(workflowName);
} catch (Exception | Error e) {
System.out.println(String.format("Failed to delete %s. Workflow not found in cluster %s at %s", workflowName, clusterName, zkAddress));
}
} else {
try {
Workflow.Builder workflowBuilder = new Workflow.Builder(workflowName);
String jobId = ONE_TIME_JOB_ID;
if (recurrentIntervalInMinutes != Utils.Infinite_Time) {
jobId = RECURRENT_JOB_ID;
workflowBuilder.setScheduleConfig(ScheduleConfig.recurringFromNow(TimeUnit.MINUTES, recurrentIntervalInMinutes));
workflowBuilder.setExpiry(TimeUnit.MINUTES.toMillis(recurrentIntervalInMinutes));
}
JobConfig.Builder jobConfigBuilder = new JobConfig.Builder();
List<TaskConfig> taskConfigs = new ArrayList<>();
taskConfigs.add(new TaskConfig.Builder().setTaskId(TASK_ID).setCommand(String.format("%s_%s", HelixHealthReportAggregatorTask.TASK_COMMAND_PREFIX, REPORT_NAME)).build());
jobConfigBuilder.addTaskConfigs(taskConfigs);
workflowBuilder.addJob(jobId, jobConfigBuilder);
Workflow workflow = workflowBuilder.build();
taskDriver.start(workflow);
System.out.println(String.format("%s_%s started successfully", workflowName, jobId));
} catch (Exception | Error e) {
System.out.println(String.format("Failed to start %s in cluster %s at %s", workflowName, clusterName, zkAddress));
}
}
}
}
use of org.apache.helix.task.TaskDriver in project incubator-gobblin by apache.
the class GobblinHelixJobLauncherTest method testJobCleanup.
public void testJobCleanup() throws Exception {
final ConcurrentHashMap<String, Boolean> runningMap = new ConcurrentHashMap<>();
final Properties properties = generateJobProperties(this.baseConfig, "3", "_1504201348473");
final GobblinHelixJobLauncher gobblinHelixJobLauncher = new GobblinHelixJobLauncher(properties, this.helixManager, this.appWorkDir, ImmutableList.<Tag<?>>of(), runningMap);
final Properties properties2 = generateJobProperties(this.baseConfig, "33", "_1504201348474");
final GobblinHelixJobLauncher gobblinHelixJobLauncher2 = new GobblinHelixJobLauncher(properties2, this.helixManager, this.appWorkDir, ImmutableList.<Tag<?>>of(), runningMap);
gobblinHelixJobLauncher.launchJob(null);
gobblinHelixJobLauncher2.launchJob(null);
final TaskDriver taskDriver = new TaskDriver(this.helixManager);
final String jobName = properties.getProperty(ConfigurationKeys.JOB_NAME_KEY);
final String jobIdKey = properties.getProperty(ConfigurationKeys.JOB_ID_KEY);
final String jobContextName = jobName + "_" + jobIdKey;
final String jobName2 = properties2.getProperty(ConfigurationKeys.JOB_NAME_KEY);
org.apache.helix.task.JobContext jobContext = taskDriver.getJobContext(jobContextName);
// job context should be present until close
Assert.assertNotNull(jobContext);
gobblinHelixJobLauncher.close();
// job queue deleted asynchronously after close
waitForQueueCleanup(taskDriver, jobName);
jobContext = taskDriver.getJobContext(jobContextName);
// job context should have been deleted
Assert.assertNull(jobContext);
// job queue should have been deleted
WorkflowConfig workflowConfig = taskDriver.getWorkflowConfig(jobName);
Assert.assertNull(workflowConfig);
WorkflowContext workflowContext = taskDriver.getWorkflowContext(jobName);
Assert.assertNull(workflowContext);
// second job queue with shared prefix should not be deleted when the first job queue is cleaned up
workflowConfig = taskDriver.getWorkflowConfig(jobName2);
Assert.assertNotNull(workflowConfig);
gobblinHelixJobLauncher2.close();
// job queue deleted asynchronously after close
waitForQueueCleanup(taskDriver, jobName2);
workflowConfig = taskDriver.getWorkflowConfig(jobName2);
Assert.assertNull(workflowConfig);
// check that workunit and taskstate directory for the job are cleaned up
final File workunitsDir = new File(this.appWorkDir + File.separator + GobblinClusterConfigurationKeys.INPUT_WORK_UNIT_DIR_NAME + File.separator + jobIdKey);
final File taskstatesDir = new File(this.appWorkDir + File.separator + GobblinClusterConfigurationKeys.OUTPUT_TASK_STATE_DIR_NAME + File.separator + jobIdKey);
Assert.assertFalse(workunitsDir.exists());
Assert.assertFalse(taskstatesDir.exists());
// check that job.state file is cleaned up
final File jobStateFile = new File(GobblinClusterUtils.getJobStateFilePath(true, this.appWorkDir, jobIdKey).toString());
Assert.assertFalse(jobStateFile.exists());
}
use of org.apache.helix.task.TaskDriver in project incubator-gobblin by apache.
the class GobblinClusterManager method handleLeadershipChange.
/**
* Handle leadership change.
* The applicationLauncher is only started on the leader.
* The leader cleans up existing jobs before starting the applicationLauncher.
* @param changeContext notification context
*/
@VisibleForTesting
void handleLeadershipChange(NotificationContext changeContext) {
this.metrics.clusterLeadershipChange.update(1);
if (this.helixManager.isLeader()) {
// can get multiple notifications on a leadership change, so only start the application launcher the first time
// the notification is received
LOGGER.info("Leader notification for {} isLeader {} HM.isLeader {}", this.helixManager.getInstanceName(), isLeader, this.helixManager.isLeader());
if (!isLeader) {
LOGGER.info("New Helix Controller leader {}", this.helixManager.getInstanceName());
// Clean up existing jobs
TaskDriver taskDriver = new TaskDriver(this.helixManager);
Map<String, WorkflowConfig> workflows = taskDriver.getWorkflows();
for (Map.Entry<String, WorkflowConfig> entry : workflows.entrySet()) {
String queueName = entry.getKey();
WorkflowConfig workflowConfig = entry.getValue();
// request delete if not already requested
if (workflowConfig.getTargetState() != TargetState.DELETE) {
taskDriver.delete(queueName);
LOGGER.info("Requested delete of queue {}", queueName);
}
}
startAppLauncherAndServices();
isLeader = true;
}
} else {
// this prepares them to start when this cluster manager becomes a leader
if (isLeader) {
isLeader = false;
stopAppLauncherAndServices();
try {
initializeAppLauncherAndServices();
} catch (Exception e) {
throw new RuntimeException("Exception reinitializing app launcher services ", e);
}
}
}
}
use of org.apache.helix.task.TaskDriver in project helix by apache.
the class TestTaskRebalancer method testRepeatedWorkflow.
@Test
public void testRepeatedWorkflow() throws Exception {
String workflowName = "SomeWorkflow";
Workflow flow = WorkflowGenerator.generateDefaultRepeatedJobWorkflowBuilder(workflowName).build();
new TaskDriver(_manager).start(flow);
// Wait until the workflow completes
_driver.pollForWorkflowState(workflowName, TaskState.COMPLETED);
// Assert completion for all tasks within two minutes
for (String task : flow.getJobConfigs().keySet()) {
_driver.pollForJobState(workflowName, task, TaskState.COMPLETED);
}
}
use of org.apache.helix.task.TaskDriver in project helix by apache.
the class TestGenericTaskAssignmentCalculator method beforeClass.
@BeforeClass
public void beforeClass() throws Exception {
_participants = new MockParticipantManager[_numNodes];
String namespace = "/" + CLUSTER_NAME;
if (_gZkClient.exists(namespace)) {
_gZkClient.deleteRecursively(namespace);
}
// Setup cluster and instances
ClusterSetup setupTool = new ClusterSetup(ZK_ADDR);
setupTool.addCluster(CLUSTER_NAME, true);
for (int i = 0; i < _numNodes; i++) {
String storageNodeName = PARTICIPANT_PREFIX + "_" + (_startPort + i);
setupTool.addInstanceToCluster(CLUSTER_NAME, storageNodeName);
}
// start dummy participants
for (int i = 0; i < _numNodes; i++) {
final String instanceName = PARTICIPANT_PREFIX + "_" + (_startPort + i);
// Set task callbacks
Map<String, TaskFactory> taskFactoryReg = new HashMap<String, TaskFactory>();
taskFactoryReg.put("TaskOne", new TaskFactory() {
@Override
public Task createNewTask(TaskCallbackContext context) {
return new TaskOne(context, instanceName);
}
});
_participants[i] = new MockParticipantManager(ZK_ADDR, CLUSTER_NAME, instanceName);
// Register a Task state model factory.
StateMachineEngine stateMachine = _participants[i].getStateMachineEngine();
stateMachine.registerStateModelFactory("Task", new TaskStateModelFactory(_participants[i], taskFactoryReg));
_participants[i].syncStart();
}
// Start controller
String controllerName = CONTROLLER_PREFIX + "_0";
_controller = new ClusterControllerManager(ZK_ADDR, CLUSTER_NAME, controllerName);
_controller.syncStart();
// Start an admin connection
_manager = HelixManagerFactory.getZKHelixManager(CLUSTER_NAME, "Admin", InstanceType.ADMINISTRATOR, ZK_ADDR);
_manager.connect();
_driver = new TaskDriver(_manager);
Map<String, String> taskConfigMap = Maps.newHashMap();
_taskConfig = new TaskConfig("TaskOne", taskConfigMap);
_jobCommandMap = Maps.newHashMap();
}
Aggregations