use of com.twitter.ambrose.model.DAGNode in project ambrose by twitter.
the class AmbrosePigProgressNotificationListener method initialPlanNotification.
/**
* Called after the job DAG has been created, but before any jobs are fired.
* @param plan the MROperPlan that represents the DAG of operations. Each operation will become
* a MapReduce job when it's launched.
*/
@Override
public void initialPlanNotification(String scriptId, OperatorPlan<?> plan) {
log.info("initialPlanNotification - scriptId " + scriptId + " plan " + plan);
// For ambrose to work above 3 must be non-null
Preconditions.checkNotNull(pigConfig.getJobClient());
Preconditions.checkNotNull(pigConfig.getJobGraph());
Preconditions.checkNotNull(pigConfig.getPigProperties());
try {
statsWriteService.initWriteService(pigConfig.getPigProperties());
} catch (IOException ioe) {
throw new RuntimeException("Exception while initializing statsWriteService", ioe);
}
this.workflowVersion = pigConfig.getPigProperties().getProperty("pig.logical.plan.signature");
OperatorPlan<MapReduceOper> mrPlan;
try {
mrPlan = (OperatorPlan<MapReduceOper>) plan;
} catch (Exception e) {
log.error(String.format("Failed to cast OperatorPlan: %s", plan), e);
return;
}
Map<OperatorKey, MapReduceOper> planKeys = mrPlan.getKeys();
Configuration flowConfig = new Configuration(false);
boolean initialized = false;
// first pass builds all nodes
for (Map.Entry<OperatorKey, MapReduceOper> entry : planKeys.entrySet()) {
String nodeName = entry.getKey().toString();
MapReduceOper op = entry.getValue();
MRScriptState scriptState = MRScriptState.get();
String[] aliases = toArray(scriptState.getAlias(op).trim());
String[] features = toArray(scriptState.getPigFeature(op).trim());
if (!initialized) {
scriptState.addSettingsToConf(op, flowConfig);
pigConfig.getPigProperties().putAll(ConfigurationUtil.toProperties(flowConfig));
initialized = true;
}
PigJob job = new PigJob();
job.setAliases(aliases);
job.setFeatures(features);
job.setConfiguration(pigConfig.getPigProperties());
DAGNode<PigJob> node = new DAGNode<PigJob>(nodeName, job);
this.dagNodeNameMap.put(node.getName(), node);
// this shows how we can get the basic info about all nameless jobs before any execute.
// we can traverse the plan to build a DAG of this info
log.info("initialPlanNotification: aliases: " + Arrays.toString(aliases) + ", name: " + node.getName() + ", features: " + Arrays.toString(features));
}
// second pass connects the edges
for (Map.Entry<OperatorKey, MapReduceOper> entry : planKeys.entrySet()) {
DAGNode node = this.dagNodeNameMap.get(entry.getKey().toString());
List<DAGNode<? extends Job>> successorNodeList = Lists.newArrayList();
List<MapReduceOper> successors = mrPlan.getSuccessors(entry.getValue());
if (successors != null) {
for (MapReduceOper successor : successors) {
DAGNode<? extends Job> successorNode = this.dagNodeNameMap.get(successor.getOperatorKey().toString());
successorNodeList.add(successorNode);
}
}
node.setSuccessors(successorNodeList);
}
AmbroseUtils.sendDagNodeNameMap(statsWriteService, scriptId, dagNodeNameMap);
}
use of com.twitter.ambrose.model.DAGNode in project ambrose by twitter.
the class HRavenStatsReadService method getDagNodeNameMap.
/**
* Gets the dag nodes for this workflowId. Returns null if the workflow does not exist.
*
* @param workflowId the id of the workflow
* @return a map of nodeIds to DAGNodes
* @throws IOException
*/
@SuppressWarnings("rawtypes")
@Override
public Map<String, DAGNode> getDagNodeNameMap(String workflowId) throws IOException {
WorkflowId id = WorkflowId.parseString(workflowId);
Flow flow = flowQueueService.getFlowFromQueue(id.getCluster(), id.getTimestamp(), id.getFlowId());
if (flow == null) {
return null;
}
// TODO This may not work nicely with multiple type of jobs
// See: https://github.com/twitter/ambrose/pull/131
Map<String, DAGNode> dagMap = JSONUtil.toObject(flow.getJobGraphJSON(), new TypeReference<Map<String, DAGNode>>() {
});
return dagMap;
}
use of com.twitter.ambrose.model.DAGNode in project ambrose by twitter.
the class HiveDAGTransformer method asDAGNode.
/**
* Converts job properties to a DAGNode representation
*
* @param task
* @return
*/
private DAGNode<Job> asDAGNode(Task<? extends Serializable> task) {
MapredWork mrWork = (MapredWork) task.getWork();
List<String> indexTableAliases = getAllJobAliases(getPathToAliases(mrWork));
String[] features = getFeatures(mrWork.getAllOperators(), task.getTaskTag());
String[] displayAliases = getDisplayAliases(indexTableAliases);
// DAGNode's name of a workflow is unique among all workflows
DAGNode<Job> dagNode = new DAGNode<Job>(AmbroseHiveUtil.getNodeIdFromNodeName(conf, task.getId()), new HiveJob(displayAliases, features));
// init empty successors
dagNode.setSuccessors(new ArrayList<DAGNode<? extends Job>>());
return dagNode;
}
use of com.twitter.ambrose.model.DAGNode in project ambrose by twitter.
the class PigJobTest method testFromJson.
@Test
public void testFromJson() throws IOException {
String json = "{\n" + " \"type\" : \"JOB_STARTED\",\n" + " \"payload\" : {\n" + " \"name\" : \"scope-29\",\n" + " \"job\" : {\n" + " \"runtime\" : \"pig\",\n" + " \"id\" : \"job_local_0001\",\n" + " \"aliases\" : [ \"A\", \"AA\", \"B\", \"C\" ],\n" + " \"features\" : [ \"GROUP_BY\", \"COMBINER\", \"MAP_PARTIALAGG\" ],\n" + " \"metrics\" : {\n" + " \"somemetric\": 123\n" + " } \n" + " },\n" + " \"successorNames\" : [ ]\n" + " },\n" + " \"id\" : 1,\n" + " \"timestamp\" : 1373560988033\n" + "}";
Event event = Event.fromJson(json);
PigJob job = ((DAGNode<PigJob>) event.getPayload()).getJob();
assertEquals("job_local_0001", job.getId());
assertArrayEquals(new String[] { "A", "AA", "B", "C" }, job.getAliases());
assertArrayEquals(new String[] { "GROUP_BY", "COMBINER", "MAP_PARTIALAGG" }, job.getFeatures());
assertNotNull(job.getMetrics());
assertEquals(123, job.getMetrics().get("somemetric"));
}
use of com.twitter.ambrose.model.DAGNode in project ambrose by twitter.
the class AmbroseCascadingGraphConverter method convert.
/**
* Converts the flowStep that generated from cascading to a Map of DAGNode and its name to be used
* to build Ambrose Graph.
*/
public void convert() {
// returns a set of the nodes contained in this graph
Set vertices = jobsGraph.vertexSet();
// create ambrose nodes
for (Object vertex : vertices) {
BaseFlowStep step = (BaseFlowStep) vertex;
CascadingJob job = new CascadingJob();
job.setFeatures(getNodeFeatures(step));
String name = step.getName();
DAGNode<CascadingJob> node = new DAGNode<CascadingJob>(name, job);
dagNamesMap.put(name, node);
}
// loop again to set the successors for each node after nodes are created
for (Object vertex : vertices) {
BaseFlowStep step = (BaseFlowStep) vertex;
String name = step.getName();
DAGNode<CascadingJob> node = dagNamesMap.get(name);
node.setSuccessors(getNodeSuccessors(vertex));
}
}
Aggregations