Search in sources :

Example 1 with DAGNode

use of com.twitter.ambrose.model.DAGNode in project ambrose by twitter.

the class AmbrosePigProgressNotificationListener method initialPlanNotification.

/**
 * Called after the job DAG has been created, but before any jobs are fired.
 * @param plan the MROperPlan that represents the DAG of operations. Each operation will become
 * a MapReduce job when it's launched.
 */
@Override
public void initialPlanNotification(String scriptId, OperatorPlan<?> plan) {
    log.info("initialPlanNotification - scriptId " + scriptId + " plan " + plan);
    // For ambrose to work above 3 must be non-null
    Preconditions.checkNotNull(pigConfig.getJobClient());
    Preconditions.checkNotNull(pigConfig.getJobGraph());
    Preconditions.checkNotNull(pigConfig.getPigProperties());
    try {
        statsWriteService.initWriteService(pigConfig.getPigProperties());
    } catch (IOException ioe) {
        throw new RuntimeException("Exception while initializing statsWriteService", ioe);
    }
    this.workflowVersion = pigConfig.getPigProperties().getProperty("pig.logical.plan.signature");
    OperatorPlan<MapReduceOper> mrPlan;
    try {
        mrPlan = (OperatorPlan<MapReduceOper>) plan;
    } catch (Exception e) {
        log.error(String.format("Failed to cast OperatorPlan: %s", plan), e);
        return;
    }
    Map<OperatorKey, MapReduceOper> planKeys = mrPlan.getKeys();
    Configuration flowConfig = new Configuration(false);
    boolean initialized = false;
    // first pass builds all nodes
    for (Map.Entry<OperatorKey, MapReduceOper> entry : planKeys.entrySet()) {
        String nodeName = entry.getKey().toString();
        MapReduceOper op = entry.getValue();
        MRScriptState scriptState = MRScriptState.get();
        String[] aliases = toArray(scriptState.getAlias(op).trim());
        String[] features = toArray(scriptState.getPigFeature(op).trim());
        if (!initialized) {
            scriptState.addSettingsToConf(op, flowConfig);
            pigConfig.getPigProperties().putAll(ConfigurationUtil.toProperties(flowConfig));
            initialized = true;
        }
        PigJob job = new PigJob();
        job.setAliases(aliases);
        job.setFeatures(features);
        job.setConfiguration(pigConfig.getPigProperties());
        DAGNode<PigJob> node = new DAGNode<PigJob>(nodeName, job);
        this.dagNodeNameMap.put(node.getName(), node);
        // this shows how we can get the basic info about all nameless jobs before any execute.
        // we can traverse the plan to build a DAG of this info
        log.info("initialPlanNotification: aliases: " + Arrays.toString(aliases) + ", name: " + node.getName() + ", features: " + Arrays.toString(features));
    }
    // second pass connects the edges
    for (Map.Entry<OperatorKey, MapReduceOper> entry : planKeys.entrySet()) {
        DAGNode node = this.dagNodeNameMap.get(entry.getKey().toString());
        List<DAGNode<? extends Job>> successorNodeList = Lists.newArrayList();
        List<MapReduceOper> successors = mrPlan.getSuccessors(entry.getValue());
        if (successors != null) {
            for (MapReduceOper successor : successors) {
                DAGNode<? extends Job> successorNode = this.dagNodeNameMap.get(successor.getOperatorKey().toString());
                successorNodeList.add(successorNode);
            }
        }
        node.setSuccessors(successorNodeList);
    }
    AmbroseUtils.sendDagNodeNameMap(statsWriteService, scriptId, dagNodeNameMap);
}
Also used : OperatorKey(org.apache.pig.impl.plan.OperatorKey) Configuration(org.apache.hadoop.conf.Configuration) MRScriptState(org.apache.pig.tools.pigstats.mapreduce.MRScriptState) IOException(java.io.IOException) DAGNode(com.twitter.ambrose.model.DAGNode) IOException(java.io.IOException) MapReduceOper(org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceOper) Job(com.twitter.ambrose.model.Job) Map(java.util.Map)

Example 2 with DAGNode

use of com.twitter.ambrose.model.DAGNode in project ambrose by twitter.

the class HRavenStatsReadService method getDagNodeNameMap.

/**
 * Gets the dag nodes for this workflowId. Returns null if the workflow does not exist.
 *
 * @param workflowId the id of the workflow
 * @return a map of nodeIds to DAGNodes
 * @throws IOException
 */
@SuppressWarnings("rawtypes")
@Override
public Map<String, DAGNode> getDagNodeNameMap(String workflowId) throws IOException {
    WorkflowId id = WorkflowId.parseString(workflowId);
    Flow flow = flowQueueService.getFlowFromQueue(id.getCluster(), id.getTimestamp(), id.getFlowId());
    if (flow == null) {
        return null;
    }
    // TODO This may not work nicely with multiple type of jobs
    // See: https://github.com/twitter/ambrose/pull/131
    Map<String, DAGNode> dagMap = JSONUtil.toObject(flow.getJobGraphJSON(), new TypeReference<Map<String, DAGNode>>() {
    });
    return dagMap;
}
Also used : WorkflowId(com.twitter.ambrose.model.WorkflowId) DAGNode(com.twitter.ambrose.model.DAGNode) Map(java.util.Map) Flow(com.twitter.hraven.Flow)

Example 3 with DAGNode

use of com.twitter.ambrose.model.DAGNode in project ambrose by twitter.

the class HiveDAGTransformer method asDAGNode.

/**
 * Converts job properties to a DAGNode representation
 *
 * @param task
 * @return
 */
private DAGNode<Job> asDAGNode(Task<? extends Serializable> task) {
    MapredWork mrWork = (MapredWork) task.getWork();
    List<String> indexTableAliases = getAllJobAliases(getPathToAliases(mrWork));
    String[] features = getFeatures(mrWork.getAllOperators(), task.getTaskTag());
    String[] displayAliases = getDisplayAliases(indexTableAliases);
    // DAGNode's name of a workflow is unique among all workflows
    DAGNode<Job> dagNode = new DAGNode<Job>(AmbroseHiveUtil.getNodeIdFromNodeName(conf, task.getId()), new HiveJob(displayAliases, features));
    // init empty successors
    dagNode.setSuccessors(new ArrayList<DAGNode<? extends Job>>());
    return dagNode;
}
Also used : MapredWork(org.apache.hadoop.hive.ql.plan.MapredWork) Job(com.twitter.ambrose.model.Job) DAGNode(com.twitter.ambrose.model.DAGNode)

Example 4 with DAGNode

use of com.twitter.ambrose.model.DAGNode in project ambrose by twitter.

the class PigJobTest method testFromJson.

@Test
public void testFromJson() throws IOException {
    String json = "{\n" + "  \"type\" : \"JOB_STARTED\",\n" + "  \"payload\" : {\n" + "    \"name\" : \"scope-29\",\n" + "    \"job\" : {\n" + "      \"runtime\" : \"pig\",\n" + "      \"id\" : \"job_local_0001\",\n" + "      \"aliases\" : [ \"A\", \"AA\", \"B\", \"C\" ],\n" + "      \"features\" : [ \"GROUP_BY\", \"COMBINER\", \"MAP_PARTIALAGG\" ],\n" + "      \"metrics\" : {\n" + "        \"somemetric\": 123\n" + "      } \n" + "    },\n" + "    \"successorNames\" : [ ]\n" + "  },\n" + "  \"id\" : 1,\n" + "  \"timestamp\" : 1373560988033\n" + "}";
    Event event = Event.fromJson(json);
    PigJob job = ((DAGNode<PigJob>) event.getPayload()).getJob();
    assertEquals("job_local_0001", job.getId());
    assertArrayEquals(new String[] { "A", "AA", "B", "C" }, job.getAliases());
    assertArrayEquals(new String[] { "GROUP_BY", "COMBINER", "MAP_PARTIALAGG" }, job.getFeatures());
    assertNotNull(job.getMetrics());
    assertEquals(123, job.getMetrics().get("somemetric"));
}
Also used : Event(com.twitter.ambrose.model.Event) DAGNode(com.twitter.ambrose.model.DAGNode) Test(org.junit.Test)

Example 5 with DAGNode

use of com.twitter.ambrose.model.DAGNode in project ambrose by twitter.

the class AmbroseCascadingGraphConverter method convert.

/**
 * Converts the flowStep that generated from cascading to a Map of DAGNode and its name to be used
 * to build Ambrose Graph.
 */
public void convert() {
    // returns a set of the nodes contained in this graph
    Set vertices = jobsGraph.vertexSet();
    // create ambrose nodes
    for (Object vertex : vertices) {
        BaseFlowStep step = (BaseFlowStep) vertex;
        CascadingJob job = new CascadingJob();
        job.setFeatures(getNodeFeatures(step));
        String name = step.getName();
        DAGNode<CascadingJob> node = new DAGNode<CascadingJob>(name, job);
        dagNamesMap.put(name, node);
    }
    // loop again to set the successors for each node after nodes are created
    for (Object vertex : vertices) {
        BaseFlowStep step = (BaseFlowStep) vertex;
        String name = step.getName();
        DAGNode<CascadingJob> node = dagNamesMap.get(name);
        node.setSuccessors(getNodeSuccessors(vertex));
    }
}
Also used : Set(java.util.Set) BaseFlowStep(cascading.flow.planner.BaseFlowStep) DAGNode(com.twitter.ambrose.model.DAGNode)

Aggregations

DAGNode (com.twitter.ambrose.model.DAGNode)12 Job (com.twitter.ambrose.model.Job)5 BaseFlowStep (cascading.flow.planner.BaseFlowStep)4 Event (com.twitter.ambrose.model.Event)3 Map (java.util.Map)3 EmbeddedAmbroseHiveProgressReporter (com.twitter.ambrose.hive.reporter.EmbeddedAmbroseHiveProgressReporter)2 IOException (java.io.IOException)2 List (java.util.List)2 Set (java.util.Set)2 Configuration (org.apache.hadoop.conf.Configuration)2 WorkflowId (com.twitter.ambrose.model.WorkflowId)1 Flow (com.twitter.hraven.Flow)1 MapredWork (org.apache.hadoop.hive.ql.plan.MapredWork)1 RunningJob (org.apache.hadoop.mapred.RunningJob)1 MapReduceOper (org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceOper)1 OperatorKey (org.apache.pig.impl.plan.OperatorKey)1 MRScriptState (org.apache.pig.tools.pigstats.mapreduce.MRScriptState)1 Test (org.junit.Test)1