use of org.apache.ignite.internal.processors.hadoop.HadoopJobId in project ignite by apache.
the class HadoopJobTracker method processNodeLeft.
/**
* Processes node leave (or fail) event.
*
* @param evt Discovery event.
*/
@SuppressWarnings("ConstantConditions")
private void processNodeLeft(DiscoveryEvent evt) {
if (log.isDebugEnabled())
log.debug("Processing discovery event [locNodeId=" + ctx.localNodeId() + ", evt=" + evt + ']');
// Check only if this node is responsible for job status updates.
if (ctx.jobUpdateLeader()) {
boolean checkSetup = evt.eventNode().order() < ctx.localNodeOrder();
Iterable<IgniteCache.Entry<HadoopJobId, HadoopJobMetadata>> entries;
try {
entries = jobMetaCache().localEntries(OFFHEAP_PEEK_MODE);
} catch (IgniteCheckedException e) {
U.error(log, "Failed to get local entries", e);
return;
}
// Iteration over all local entries is correct since system cache is REPLICATED.
for (IgniteCache.Entry<HadoopJobId, HadoopJobMetadata> entry : entries) {
HadoopJobMetadata meta = entry.getValue();
HadoopJobId jobId = meta.jobId();
HadoopMapReducePlan plan = meta.mapReducePlan();
HadoopJobPhase phase = meta.phase();
try {
if (checkSetup && phase == PHASE_SETUP && !activeJobs.containsKey(jobId)) {
// Failover setup task.
HadoopJobEx job = job(jobId, meta.jobInfo());
Collection<HadoopTaskInfo> setupTask = setupTask(jobId);
assert setupTask != null;
ctx.taskExecutor().run(job, setupTask);
} else if (phase == PHASE_MAP || phase == PHASE_REDUCE) {
// Must check all nodes, even that are not event node ID due to
// multiple node failure possibility.
Collection<HadoopInputSplit> cancelSplits = null;
for (UUID nodeId : plan.mapperNodeIds()) {
if (ctx.kernalContext().discovery().node(nodeId) == null) {
// Node has left the grid.
Collection<HadoopInputSplit> mappers = plan.mappers(nodeId);
if (cancelSplits == null)
cancelSplits = new HashSet<>();
cancelSplits.addAll(mappers);
}
}
Collection<Integer> cancelReducers = null;
for (UUID nodeId : plan.reducerNodeIds()) {
if (ctx.kernalContext().discovery().node(nodeId) == null) {
// Node has left the grid.
int[] reducers = plan.reducers(nodeId);
if (cancelReducers == null)
cancelReducers = new HashSet<>();
for (int rdc : reducers) cancelReducers.add(rdc);
}
}
if (cancelSplits != null || cancelReducers != null)
jobMetaCache().invoke(meta.jobId(), new CancelJobProcessor(null, new IgniteCheckedException("One or more nodes participating in map-reduce job execution failed."), cancelSplits, cancelReducers));
}
} catch (IgniteCheckedException e) {
U.error(log, "Failed to cancel job: " + meta, e);
}
}
}
}
use of org.apache.ignite.internal.processors.hadoop.HadoopJobId in project ignite by apache.
the class HadoopJobTracker method jobMetaCache.
/**
* @return Job meta projection.
*/
@SuppressWarnings("NonPrivateFieldAccessedInSynchronizedContext")
private IgniteInternalCache<HadoopJobId, HadoopJobMetadata> jobMetaCache() {
IgniteInternalCache<HadoopJobId, HadoopJobMetadata> prj = jobMetaPrj;
if (prj == null) {
synchronized (mux) {
if ((prj = jobMetaPrj) == null) {
GridCacheAdapter<HadoopJobId, HadoopJobMetadata> sysCache = ctx.kernalContext().cache().internalCache(CU.SYS_CACHE_HADOOP_MR);
assert sysCache != null;
mrPlanner = ctx.planner();
try {
ctx.kernalContext().resource().injectGeneric(mrPlanner);
} catch (IgniteCheckedException e) {
// Must not happen.
U.error(log, "Failed to inject resources.", e);
throw new IllegalStateException(e);
}
jobMetaPrj = prj = sysCache;
if (ctx.configuration().getFinishedJobInfoTtl() > 0) {
ExpiryPolicy finishedJobPlc = new ModifiedExpiryPolicy(new Duration(MILLISECONDS, ctx.configuration().getFinishedJobInfoTtl()));
finishedJobMetaPrj = prj.withExpiryPolicy(finishedJobPlc);
} else
finishedJobMetaPrj = jobMetaPrj;
}
}
}
return prj;
}
use of org.apache.ignite.internal.processors.hadoop.HadoopJobId in project ignite by apache.
the class HadoopJobTracker method processJobMetadataUpdates.
/**
* @param updated Updated cache entries.
* @throws IgniteCheckedException If failed.
*/
private void processJobMetadataUpdates(Iterable<CacheEntryEvent<? extends HadoopJobId, ? extends HadoopJobMetadata>> updated) throws IgniteCheckedException {
UUID locNodeId = ctx.localNodeId();
for (CacheEntryEvent<? extends HadoopJobId, ? extends HadoopJobMetadata> entry : updated) {
HadoopJobId jobId = entry.getKey();
HadoopJobMetadata meta = entry.getValue();
if (meta == null || !ctx.isParticipating(meta))
continue;
if (log.isDebugEnabled())
log.debug("Processing job metadata update callback [locNodeId=" + locNodeId + ", meta=" + meta + ']');
try {
ctx.taskExecutor().onJobStateChanged(meta);
} catch (IgniteCheckedException e) {
U.error(log, "Failed to process job state changed callback (will fail the job) " + "[locNodeId=" + locNodeId + ", jobId=" + jobId + ", meta=" + meta + ']', e);
transform(jobId, new CancelJobProcessor(null, e));
continue;
}
processJobMetaUpdate(jobId, meta, locNodeId);
}
}
use of org.apache.ignite.internal.processors.hadoop.HadoopJobId in project ignite by apache.
the class HadoopExternalTaskExecutor method startProcess.
/**
* Starts process template that will be ready to execute Hadoop tasks.
*
* @param job Job instance.
* @param plan Map reduce plan.
*/
private HadoopProcess startProcess(final HadoopJobEx job, final HadoopMapReducePlan plan) {
final UUID childProcId = UUID.randomUUID();
HadoopJobId jobId = job.id();
final HadoopProcessFuture fut = new HadoopProcessFuture(childProcId, jobId);
final HadoopProcess proc = new HadoopProcess(jobId, fut, plan.reducers(ctx.localNodeId()));
HadoopProcess old = runningProcsByJobId.put(jobId, proc);
assert old == null;
old = runningProcsByProcId.put(childProcId, proc);
assert old == null;
ctx.kernalContext().closure().runLocalSafe(new Runnable() {
@Override
public void run() {
if (!busyLock.tryReadLock()) {
fut.onDone(new IgniteCheckedException("Failed to start external process (grid is stopping)."));
return;
}
try {
HadoopExternalTaskMetadata startMeta = buildTaskMeta();
if (log.isDebugEnabled())
log.debug("Created hadoop child process metadata for job [job=" + job + ", childProcId=" + childProcId + ", taskMeta=" + startMeta + ']');
Process proc = startJavaProcess(childProcId, startMeta, job, ctx.kernalContext().config().getWorkDirectory());
BufferedReader rdr = new BufferedReader(new InputStreamReader(proc.getInputStream()));
String line;
// Read up all the process output.
while ((line = rdr.readLine()) != null) {
if (log.isDebugEnabled())
log.debug("Tracing process output: " + line);
if ("Started".equals(line)) {
// Process started successfully, it should not write anything more to the output stream.
if (log.isDebugEnabled())
log.debug("Successfully started child process [childProcId=" + childProcId + ", meta=" + job + ']');
fut.onProcessStarted(proc);
break;
} else if ("Failed".equals(line)) {
StringBuilder sb = new StringBuilder("Failed to start child process: " + job + "\n");
while ((line = rdr.readLine()) != null) sb.append(" ").append(line).append("\n");
// Cut last character.
sb.setLength(sb.length() - 1);
log.warning(sb.toString());
fut.onDone(new IgniteCheckedException(sb.toString()));
break;
}
}
} catch (Throwable e) {
fut.onDone(new IgniteCheckedException("Failed to initialize child process: " + job, e));
if (e instanceof Error)
throw (Error) e;
} finally {
busyLock.readUnlock();
}
}
}, true);
fut.listen(new CI1<IgniteInternalFuture<IgniteBiTuple<Process, HadoopProcessDescriptor>>>() {
@Override
public void apply(IgniteInternalFuture<IgniteBiTuple<Process, HadoopProcessDescriptor>> f) {
try {
// Make sure there were no exceptions.
f.get();
prepareForJob(proc, job, plan);
} catch (IgniteCheckedException ignore) {
// Exception is printed in future's onDone() method.
}
}
});
return proc;
}
use of org.apache.ignite.internal.processors.hadoop.HadoopJobId in project ignite by apache.
the class HadoopJobInfoUpdateRequest method readExternal.
/** {@inheritDoc} */
@Override
public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException {
jobId = new HadoopJobId();
jobId.readExternal(in);
jobPhase = (HadoopJobPhase) in.readObject();
reducersAddrs = (HadoopProcessDescriptor[]) U.readArray(in);
}
Aggregations