use of org.apache.hadoop.mapreduce.TaskAttemptID in project hadoop by apache.
the class Fetcher method copyMapOutput.
private TaskAttemptID[] copyMapOutput(MapHost host, DataInputStream input, Set<TaskAttemptID> remaining, boolean canRetry) throws IOException {
MapOutput<K, V> mapOutput = null;
TaskAttemptID mapId = null;
long decompressedLength = -1;
long compressedLength = -1;
try {
long startTime = Time.monotonicNow();
int forReduce = -1;
//Read the shuffle header
try {
ShuffleHeader header = new ShuffleHeader();
header.readFields(input);
mapId = TaskAttemptID.forName(header.mapId);
compressedLength = header.compressedLength;
decompressedLength = header.uncompressedLength;
forReduce = header.forReduce;
} catch (IllegalArgumentException e) {
badIdErrs.increment(1);
LOG.warn("Invalid map id ", e);
//Don't know which one was bad, so consider all of them as bad
return remaining.toArray(new TaskAttemptID[remaining.size()]);
}
InputStream is = input;
is = CryptoUtils.wrapIfNecessary(jobConf, is, compressedLength);
compressedLength -= CryptoUtils.cryptoPadding(jobConf);
decompressedLength -= CryptoUtils.cryptoPadding(jobConf);
// Do some basic sanity verification
if (!verifySanity(compressedLength, decompressedLength, forReduce, remaining, mapId)) {
return new TaskAttemptID[] { mapId };
}
if (LOG.isDebugEnabled()) {
LOG.debug("header: " + mapId + ", len: " + compressedLength + ", decomp len: " + decompressedLength);
}
// Get the location for the map output - either in-memory or on-disk
try {
mapOutput = merger.reserve(mapId, decompressedLength, id);
} catch (IOException ioe) {
// kill this reduce attempt
ioErrs.increment(1);
scheduler.reportLocalError(ioe);
return EMPTY_ATTEMPT_ID_ARRAY;
}
// Check if we can shuffle *now* ...
if (mapOutput == null) {
LOG.info("fetcher#" + id + " - MergeManager returned status WAIT ...");
//Not an error but wait to process data.
return EMPTY_ATTEMPT_ID_ARRAY;
}
// to allow fetch failure logic to be processed
try {
// Go!
LOG.info("fetcher#" + id + " about to shuffle output of map " + mapOutput.getMapId() + " decomp: " + decompressedLength + " len: " + compressedLength + " to " + mapOutput.getDescription());
mapOutput.shuffle(host, is, compressedLength, decompressedLength, metrics, reporter);
} catch (java.lang.InternalError | Exception e) {
LOG.warn("Failed to shuffle for fetcher#" + id, e);
throw new IOException(e);
}
// Inform the shuffle scheduler
long endTime = Time.monotonicNow();
// Reset retryStartTime as map task make progress if retried before.
retryStartTime = 0;
scheduler.copySucceeded(mapId, host, compressedLength, startTime, endTime, mapOutput);
// Note successful shuffle
remaining.remove(mapId);
metrics.successFetch();
return null;
} catch (IOException ioe) {
if (mapOutput != null) {
mapOutput.abort();
}
if (canRetry) {
checkTimeoutOrRetry(host, ioe);
}
ioErrs.increment(1);
if (mapId == null || mapOutput == null) {
LOG.warn("fetcher#" + id + " failed to read map header" + mapId + " decomp: " + decompressedLength + ", " + compressedLength, ioe);
if (mapId == null) {
return remaining.toArray(new TaskAttemptID[remaining.size()]);
} else {
return new TaskAttemptID[] { mapId };
}
}
LOG.warn("Failed to shuffle output of " + mapId + " from " + host.getHostName(), ioe);
// Inform the shuffle-scheduler
metrics.failedFetch();
return new TaskAttemptID[] { mapId };
}
}
use of org.apache.hadoop.mapreduce.TaskAttemptID in project hadoop by apache.
the class Fetcher method copyFromHost.
/**
* The crux of the matter...
*
* @param host {@link MapHost} from which we need to
* shuffle available map-outputs.
*/
@VisibleForTesting
protected void copyFromHost(MapHost host) throws IOException {
// reset retryStartTime for a new host
retryStartTime = 0;
// Get completed maps on 'host'
List<TaskAttemptID> maps = scheduler.getMapsForHost(host);
// especially at the tail of large jobs
if (maps.size() == 0) {
return;
}
if (LOG.isDebugEnabled()) {
LOG.debug("Fetcher " + id + " going to fetch from " + host + " for: " + maps);
}
// List of maps to be fetched yet
Set<TaskAttemptID> remaining = new HashSet<TaskAttemptID>(maps);
// Construct the url and connect
URL url = getMapOutputURL(host, maps);
DataInputStream input = openShuffleUrl(host, remaining, url);
if (input == null) {
return;
}
try {
// Loop through available map-outputs and fetch them
// On any error, faildTasks is not null and we exit
// after putting back the remaining maps to the
// yet_to_be_fetched list and marking the failed tasks.
TaskAttemptID[] failedTasks = null;
while (!remaining.isEmpty() && failedTasks == null) {
try {
failedTasks = copyMapOutput(host, input, remaining, fetchRetryEnabled);
} catch (IOException e) {
IOUtils.cleanup(LOG, input);
//
// Setup connection again if disconnected by NM
connection.disconnect();
// Get map output from remaining tasks only.
url = getMapOutputURL(host, remaining);
input = openShuffleUrl(host, remaining, url);
if (input == null) {
return;
}
}
}
if (failedTasks != null && failedTasks.length > 0) {
LOG.warn("copyMapOutput failed for tasks " + Arrays.toString(failedTasks));
scheduler.hostFailed(host.getHostName());
for (TaskAttemptID left : failedTasks) {
scheduler.copyFailed(left, host, true, false);
}
}
// Sanity check
if (failedTasks == null && !remaining.isEmpty()) {
throw new IOException("server didn't return all expected map outputs: " + remaining.size() + " left.");
}
input.close();
input = null;
} finally {
if (input != null) {
IOUtils.cleanup(LOG, input);
input = null;
}
for (TaskAttemptID left : remaining) {
scheduler.putBackKnownMapOutput(host, left);
}
}
}
use of org.apache.hadoop.mapreduce.TaskAttemptID in project hadoop by apache.
the class Fetcher method getMapOutputURL.
/**
* Create the map-output-url. This will contain all the map ids
* separated by commas
* @param host
* @param maps
* @return
* @throws MalformedURLException
*/
private URL getMapOutputURL(MapHost host, Collection<TaskAttemptID> maps) throws MalformedURLException {
// Get the base url
StringBuffer url = new StringBuffer(host.getBaseUrl());
boolean first = true;
for (TaskAttemptID mapId : maps) {
if (!first) {
url.append(",");
}
url.append(mapId);
first = false;
}
LOG.debug("MapOutput URL for " + host + " -> " + url.toString());
return new URL(url.toString());
}
use of org.apache.hadoop.mapreduce.TaskAttemptID in project hadoop by apache.
the class LocalFetcher method doCopy.
/**
* The crux of the matter...
*/
private void doCopy(Set<TaskAttemptID> maps) throws IOException {
Iterator<TaskAttemptID> iter = maps.iterator();
while (iter.hasNext()) {
TaskAttemptID map = iter.next();
LOG.debug("LocalFetcher " + id + " going to fetch: " + map);
if (copyMapOutput(map)) {
// Successful copy. Remove this from our worklist.
iter.remove();
} else {
// and block for InMemoryMerge.
break;
}
}
}
use of org.apache.hadoop.mapreduce.TaskAttemptID in project hadoop by apache.
the class TestClientRedirect method testRedirect.
@Test
public void testRedirect() throws Exception {
Configuration conf = new YarnConfiguration();
conf.set(MRConfig.FRAMEWORK_NAME, MRConfig.YARN_FRAMEWORK_NAME);
conf.set(YarnConfiguration.RM_ADDRESS, RMADDRESS);
conf.set(JHAdminConfig.MR_HISTORY_ADDRESS, HSHOSTADDRESS);
// Start the RM.
RMService rmService = new RMService("test");
rmService.init(conf);
rmService.start();
// Start the AM.
AMService amService = new AMService();
amService.init(conf);
amService.start(conf);
// Start the HS.
HistoryService historyService = new HistoryService();
historyService.init(conf);
historyService.start(conf);
LOG.info("services started");
Cluster cluster = new Cluster(conf);
org.apache.hadoop.mapreduce.JobID jobID = new org.apache.hadoop.mapred.JobID("201103121733", 1);
org.apache.hadoop.mapreduce.Counters counters = cluster.getJob(jobID).getCounters();
validateCounters(counters);
Assert.assertTrue(amContact);
LOG.info("Sleeping for 5 seconds before stop for" + " the client socket to not get EOF immediately..");
Thread.sleep(5000);
//bring down the AM service
amService.stop();
LOG.info("Sleeping for 5 seconds after stop for" + " the server to exit cleanly..");
Thread.sleep(5000);
amRestarting = true;
// Same client
//results are returned from fake (not started job)
counters = cluster.getJob(jobID).getCounters();
Assert.assertEquals(0, counters.countCounters());
Job job = cluster.getJob(jobID);
org.apache.hadoop.mapreduce.TaskID taskId = new org.apache.hadoop.mapreduce.TaskID(jobID, TaskType.MAP, 0);
TaskAttemptID tId = new TaskAttemptID(taskId, 0);
//invoke all methods to check that no exception is thrown
job.killJob();
job.killTask(tId);
job.failTask(tId);
job.getTaskCompletionEvents(0, 100);
job.getStatus();
job.getTaskDiagnostics(tId);
job.getTaskReports(TaskType.MAP);
job.getTrackingURL();
amRestarting = false;
amService = new AMService();
amService.init(conf);
amService.start(conf);
//reset
amContact = false;
counters = cluster.getJob(jobID).getCounters();
validateCounters(counters);
Assert.assertTrue(amContact);
// Stop the AM. It is not even restarting. So it should be treated as
// completed.
amService.stop();
// Same client
counters = cluster.getJob(jobID).getCounters();
validateCounters(counters);
Assert.assertTrue(hsContact);
rmService.stop();
historyService.stop();
}
Aggregations