Search in sources :

Example 11 with TaskDriver

use of org.apache.helix.task.TaskDriver in project ambry by linkedin.

the class HelixClusterWideAggregationTool method main.

/**
 * @param args takes in three mandatory arguments: the ZK layout, the cluster name, the workflow name. Optional
 *             argument to create the workflow as a recurrent workflow and specifies the recurrent time interval.
 *             The ZK layout has to be of the following form:
 *             {
 *               "zkInfo" : [
 *                 {
 *                   "datacenter":"dc1",
 *                    "id" : "1",
 *                   "zkConnectStr":"abc.example.com:2199",
 *                 },
 *                 {
 *                   "datacenter":"dc2",
 *                   "id" : "2",
 *                   "zkConnectStr":"def.example.com:2300",
 *                 }
 *               ]
 *             }
 * @throws Exception
 */
public static void main(String[] args) throws Exception {
    OptionParser parser = new OptionParser();
    ArgumentAcceptingOptionSpec<String> zkLayoutPathOpt = parser.accepts("zkLayoutPath", "The path to the json file containing zookeeper connect info. This should be of the following form: \n{\n" + "  \"zkInfo\" : [\n" + "     {\n" + "       \"datacenter\":\"dc1\",\n" + "       \"id\":\"1\",\n" + "       \"zkConnectStr\":\"abc.example.com:2199\",\n" + "     },\n" + "     {\n" + "       \"datacenter\":\"dc2\",\n" + "       \"id\":\"2\",\n" + "       \"zkConnectStr\":\"def.example.com:2300\",\n" + "     },\n" + "     {\n" + "       \"datacenter\":\"dc3\",\n" + "       \"id\":\"3\",\n" + "       \"zkConnectStr\":\"ghi.example.com:2400\",\n" + "     }\n" + "  ]\n" + "}").withRequiredArg().describedAs("zk_connect_info_path").ofType(String.class);
    ArgumentAcceptingOptionSpec<String> clusterNameOpt = parser.accepts("clusterName", "The cluster name in helix").withRequiredArg().describedAs("cluster_name").ofType(String.class);
    ArgumentAcceptingOptionSpec<String> workflowNameOpt = parser.accepts("workflowName", "The name of the one-time workflow").withRequiredArg().describedAs("workflow_name").ofType(String.class);
    ArgumentAcceptingOptionSpec<Long> recurrentIntervalInMinutesOpt = parser.accepts("recurrentIntervalInMinutes", "The frequency for the recurrent workflow").withOptionalArg().describedAs("recurrent_interval_in_minutes").ofType(Long.class).defaultsTo(Utils.Infinite_Time);
    parser.accepts("delete", "Flag to remove the given workflow from the cluster(s) instead of creating one");
    OptionSet options = parser.parse(args);
    Boolean isDelete = options.has("delete");
    String zkLayoutPath = options.valueOf(zkLayoutPathOpt);
    String clusterName = options.valueOf(clusterNameOpt);
    String workflowName = options.valueOf(workflowNameOpt);
    Long recurrentIntervalInMinutes = options.valueOf(recurrentIntervalInMinutesOpt);
    Map<String, ClusterMapUtils.DcZkInfo> dataCenterToZKAddress = ClusterMapUtils.parseDcJsonAndPopulateDcInfo(Utils.readStringFromFile(zkLayoutPath));
    for (ClusterMapUtils.DcZkInfo zkInfo : dataCenterToZKAddress.values()) {
        String zkAddress = zkInfo.getZkConnectStr();
        ZkClient zkClient = new ZkClient(zkAddress, SESSION_TIMEOUT, CONNECTION_TIMEOUT, new ZNRecordSerializer());
        TaskDriver taskDriver = new TaskDriver(zkClient, clusterName);
        if (isDelete) {
            try {
                taskDriver.stop(workflowName);
                taskDriver.delete(workflowName);
            } catch (Exception | Error e) {
                System.out.println(String.format("Failed to delete %s. Workflow not found in cluster %s at %s", workflowName, clusterName, zkAddress));
            }
        } else {
            try {
                Workflow.Builder workflowBuilder = new Workflow.Builder(workflowName);
                String jobId = ONE_TIME_JOB_ID;
                if (recurrentIntervalInMinutes != Utils.Infinite_Time) {
                    jobId = RECURRENT_JOB_ID;
                    workflowBuilder.setScheduleConfig(ScheduleConfig.recurringFromNow(TimeUnit.MINUTES, recurrentIntervalInMinutes));
                    workflowBuilder.setExpiry(TimeUnit.MINUTES.toMillis(recurrentIntervalInMinutes));
                }
                JobConfig.Builder jobConfigBuilder = new JobConfig.Builder();
                List<TaskConfig> taskConfigs = new ArrayList<>();
                taskConfigs.add(new TaskConfig.Builder().setTaskId(TASK_ID).setCommand(String.format("%s_%s", HelixHealthReportAggregatorTask.TASK_COMMAND_PREFIX, REPORT_NAME)).build());
                jobConfigBuilder.addTaskConfigs(taskConfigs);
                workflowBuilder.addJob(jobId, jobConfigBuilder);
                Workflow workflow = workflowBuilder.build();
                taskDriver.start(workflow);
                System.out.println(String.format("%s_%s started successfully", workflowName, jobId));
            } catch (Exception | Error e) {
                System.out.println(String.format("Failed to start %s in cluster %s at %s", workflowName, clusterName, zkAddress));
            }
        }
    }
}
Also used : TaskDriver(org.apache.helix.task.TaskDriver) ArrayList(java.util.ArrayList) TaskConfig(org.apache.helix.task.TaskConfig) OptionParser(joptsimple.OptionParser) JobConfig(org.apache.helix.task.JobConfig) ZNRecordSerializer(org.apache.helix.manager.zk.ZNRecordSerializer) ZkClient(org.apache.helix.manager.zk.ZkClient) Workflow(org.apache.helix.task.Workflow) OptionSet(joptsimple.OptionSet)

Example 12 with TaskDriver

use of org.apache.helix.task.TaskDriver in project incubator-gobblin by apache.

the class GobblinHelixJobLauncherTest method testJobCleanup.

public void testJobCleanup() throws Exception {
    final ConcurrentHashMap<String, Boolean> runningMap = new ConcurrentHashMap<>();
    final Properties properties = generateJobProperties(this.baseConfig, "3", "_1504201348473");
    final GobblinHelixJobLauncher gobblinHelixJobLauncher = new GobblinHelixJobLauncher(properties, this.helixManager, this.appWorkDir, ImmutableList.<Tag<?>>of(), runningMap);
    final Properties properties2 = generateJobProperties(this.baseConfig, "33", "_1504201348474");
    final GobblinHelixJobLauncher gobblinHelixJobLauncher2 = new GobblinHelixJobLauncher(properties2, this.helixManager, this.appWorkDir, ImmutableList.<Tag<?>>of(), runningMap);
    gobblinHelixJobLauncher.launchJob(null);
    gobblinHelixJobLauncher2.launchJob(null);
    final TaskDriver taskDriver = new TaskDriver(this.helixManager);
    final String jobName = properties.getProperty(ConfigurationKeys.JOB_NAME_KEY);
    final String jobIdKey = properties.getProperty(ConfigurationKeys.JOB_ID_KEY);
    final String jobContextName = jobName + "_" + jobIdKey;
    final String jobName2 = properties2.getProperty(ConfigurationKeys.JOB_NAME_KEY);
    org.apache.helix.task.JobContext jobContext = taskDriver.getJobContext(jobContextName);
    // job context should be present until close
    Assert.assertNotNull(jobContext);
    gobblinHelixJobLauncher.close();
    // job queue deleted asynchronously after close
    waitForQueueCleanup(taskDriver, jobName);
    jobContext = taskDriver.getJobContext(jobContextName);
    // job context should have been deleted
    Assert.assertNull(jobContext);
    // job queue should have been deleted
    WorkflowConfig workflowConfig = taskDriver.getWorkflowConfig(jobName);
    Assert.assertNull(workflowConfig);
    WorkflowContext workflowContext = taskDriver.getWorkflowContext(jobName);
    Assert.assertNull(workflowContext);
    // second job queue with shared prefix should not be deleted when the first job queue is cleaned up
    workflowConfig = taskDriver.getWorkflowConfig(jobName2);
    Assert.assertNotNull(workflowConfig);
    gobblinHelixJobLauncher2.close();
    // job queue deleted asynchronously after close
    waitForQueueCleanup(taskDriver, jobName2);
    workflowConfig = taskDriver.getWorkflowConfig(jobName2);
    Assert.assertNull(workflowConfig);
    // check that workunit and taskstate directory for the job are cleaned up
    final File workunitsDir = new File(this.appWorkDir + File.separator + GobblinClusterConfigurationKeys.INPUT_WORK_UNIT_DIR_NAME + File.separator + jobIdKey);
    final File taskstatesDir = new File(this.appWorkDir + File.separator + GobblinClusterConfigurationKeys.OUTPUT_TASK_STATE_DIR_NAME + File.separator + jobIdKey);
    Assert.assertFalse(workunitsDir.exists());
    Assert.assertFalse(taskstatesDir.exists());
    // check that job.state file is cleaned up
    final File jobStateFile = new File(GobblinClusterUtils.getJobStateFilePath(true, this.appWorkDir, jobIdKey).toString());
    Assert.assertFalse(jobStateFile.exists());
}
Also used : TaskDriver(org.apache.helix.task.TaskDriver) WorkflowContext(org.apache.helix.task.WorkflowContext) Properties(java.util.Properties) WorkflowConfig(org.apache.helix.task.WorkflowConfig) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) File(java.io.File)

Example 13 with TaskDriver

use of org.apache.helix.task.TaskDriver in project incubator-gobblin by apache.

the class GobblinClusterManager method handleLeadershipChange.

/**
 * Handle leadership change.
 * The applicationLauncher is only started on the leader.
 * The leader cleans up existing jobs before starting the applicationLauncher.
 * @param changeContext notification context
 */
@VisibleForTesting
void handleLeadershipChange(NotificationContext changeContext) {
    this.metrics.clusterLeadershipChange.update(1);
    if (this.helixManager.isLeader()) {
        // can get multiple notifications on a leadership change, so only start the application launcher the first time
        // the notification is received
        LOGGER.info("Leader notification for {} isLeader {} HM.isLeader {}", this.helixManager.getInstanceName(), isLeader, this.helixManager.isLeader());
        if (!isLeader) {
            LOGGER.info("New Helix Controller leader {}", this.helixManager.getInstanceName());
            // Clean up existing jobs
            TaskDriver taskDriver = new TaskDriver(this.helixManager);
            Map<String, WorkflowConfig> workflows = taskDriver.getWorkflows();
            for (Map.Entry<String, WorkflowConfig> entry : workflows.entrySet()) {
                String queueName = entry.getKey();
                WorkflowConfig workflowConfig = entry.getValue();
                // request delete if not already requested
                if (workflowConfig.getTargetState() != TargetState.DELETE) {
                    taskDriver.delete(queueName);
                    LOGGER.info("Requested delete of queue {}", queueName);
                }
            }
            startAppLauncherAndServices();
            isLeader = true;
        }
    } else {
        // this prepares them to start when this cluster manager becomes a leader
        if (isLeader) {
            isLeader = false;
            stopAppLauncherAndServices();
            try {
                initializeAppLauncherAndServices();
            } catch (Exception e) {
                throw new RuntimeException("Exception reinitializing app launcher services ", e);
            }
        }
    }
}
Also used : WorkflowConfig(org.apache.helix.task.WorkflowConfig) TaskDriver(org.apache.helix.task.TaskDriver) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) ApplicationException(org.apache.gobblin.runtime.app.ApplicationException) InvocationTargetException(java.lang.reflect.InvocationTargetException) ParseException(org.apache.commons.cli.ParseException) IOException(java.io.IOException) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 14 with TaskDriver

use of org.apache.helix.task.TaskDriver in project helix by apache.

the class TestTaskRebalancer method testRepeatedWorkflow.

@Test
public void testRepeatedWorkflow() throws Exception {
    String workflowName = "SomeWorkflow";
    Workflow flow = WorkflowGenerator.generateDefaultRepeatedJobWorkflowBuilder(workflowName).build();
    new TaskDriver(_manager).start(flow);
    // Wait until the workflow completes
    _driver.pollForWorkflowState(workflowName, TaskState.COMPLETED);
    // Assert completion for all tasks within two minutes
    for (String task : flow.getJobConfigs().keySet()) {
        _driver.pollForJobState(workflowName, task, TaskState.COMPLETED);
    }
}
Also used : TaskDriver(org.apache.helix.task.TaskDriver) Workflow(org.apache.helix.task.Workflow) Test(org.testng.annotations.Test)

Example 15 with TaskDriver

use of org.apache.helix.task.TaskDriver in project helix by apache.

the class TestGenericTaskAssignmentCalculator method beforeClass.

@BeforeClass
public void beforeClass() throws Exception {
    _participants = new MockParticipantManager[_numNodes];
    String namespace = "/" + CLUSTER_NAME;
    if (_gZkClient.exists(namespace)) {
        _gZkClient.deleteRecursively(namespace);
    }
    // Setup cluster and instances
    ClusterSetup setupTool = new ClusterSetup(ZK_ADDR);
    setupTool.addCluster(CLUSTER_NAME, true);
    for (int i = 0; i < _numNodes; i++) {
        String storageNodeName = PARTICIPANT_PREFIX + "_" + (_startPort + i);
        setupTool.addInstanceToCluster(CLUSTER_NAME, storageNodeName);
    }
    // start dummy participants
    for (int i = 0; i < _numNodes; i++) {
        final String instanceName = PARTICIPANT_PREFIX + "_" + (_startPort + i);
        // Set task callbacks
        Map<String, TaskFactory> taskFactoryReg = new HashMap<String, TaskFactory>();
        taskFactoryReg.put("TaskOne", new TaskFactory() {

            @Override
            public Task createNewTask(TaskCallbackContext context) {
                return new TaskOne(context, instanceName);
            }
        });
        _participants[i] = new MockParticipantManager(ZK_ADDR, CLUSTER_NAME, instanceName);
        // Register a Task state model factory.
        StateMachineEngine stateMachine = _participants[i].getStateMachineEngine();
        stateMachine.registerStateModelFactory("Task", new TaskStateModelFactory(_participants[i], taskFactoryReg));
        _participants[i].syncStart();
    }
    // Start controller
    String controllerName = CONTROLLER_PREFIX + "_0";
    _controller = new ClusterControllerManager(ZK_ADDR, CLUSTER_NAME, controllerName);
    _controller.syncStart();
    // Start an admin connection
    _manager = HelixManagerFactory.getZKHelixManager(CLUSTER_NAME, "Admin", InstanceType.ADMINISTRATOR, ZK_ADDR);
    _manager.connect();
    _driver = new TaskDriver(_manager);
    Map<String, String> taskConfigMap = Maps.newHashMap();
    _taskConfig = new TaskConfig("TaskOne", taskConfigMap);
    _jobCommandMap = Maps.newHashMap();
}
Also used : Task(org.apache.helix.task.Task) StateMachineEngine(org.apache.helix.participant.StateMachineEngine) MockParticipantManager(org.apache.helix.integration.manager.MockParticipantManager) HashMap(java.util.HashMap) TaskDriver(org.apache.helix.task.TaskDriver) TaskConfig(org.apache.helix.task.TaskConfig) ClusterSetup(org.apache.helix.tools.ClusterSetup) TaskCallbackContext(org.apache.helix.task.TaskCallbackContext) ClusterControllerManager(org.apache.helix.integration.manager.ClusterControllerManager) TaskFactory(org.apache.helix.task.TaskFactory) TaskStateModelFactory(org.apache.helix.task.TaskStateModelFactory) BeforeClass(org.testng.annotations.BeforeClass)

Aggregations

TaskDriver (org.apache.helix.task.TaskDriver)31 WorkflowConfig (org.apache.helix.task.WorkflowConfig)11 Path (javax.ws.rs.Path)9 GET (javax.ws.rs.GET)8 IOException (java.io.IOException)6 HashMap (java.util.HashMap)6 HelixException (org.apache.helix.HelixException)6 ZkClient (org.apache.helix.manager.zk.ZkClient)6 JobConfig (org.apache.helix.task.JobConfig)6 Workflow (org.apache.helix.task.Workflow)6 Test (org.testng.annotations.Test)6 ZNRecord (org.apache.helix.ZNRecord)5 JobQueue (org.apache.helix.task.JobQueue)4 WorkflowContext (org.apache.helix.task.WorkflowContext)4 ObjectNode (org.codehaus.jackson.node.ObjectNode)4 ArrayList (java.util.ArrayList)3 Map (java.util.Map)3 Entity (javax.ws.rs.client.Entity)3 HelixManager (org.apache.helix.HelixManager)3 ClusterControllerManager (org.apache.helix.integration.manager.ClusterControllerManager)3