use of java.io.DataInputStream in project hive by apache.
the class TokenStoreDelegationTokenSecretManager method decodeWritable.
public static void decodeWritable(Writable w, String idStr) throws IOException {
DataInputStream in = new DataInputStream(new ByteArrayInputStream(Base64.decodeBase64(idStr)));
w.readFields(in);
}
use of java.io.DataInputStream in project hadoop by apache.
the class Fetcher method copyMapOutput.
private TaskAttemptID[] copyMapOutput(MapHost host, DataInputStream input, Set<TaskAttemptID> remaining, boolean canRetry) throws IOException {
MapOutput<K, V> mapOutput = null;
TaskAttemptID mapId = null;
long decompressedLength = -1;
long compressedLength = -1;
try {
long startTime = Time.monotonicNow();
int forReduce = -1;
//Read the shuffle header
try {
ShuffleHeader header = new ShuffleHeader();
header.readFields(input);
mapId = TaskAttemptID.forName(header.mapId);
compressedLength = header.compressedLength;
decompressedLength = header.uncompressedLength;
forReduce = header.forReduce;
} catch (IllegalArgumentException e) {
badIdErrs.increment(1);
LOG.warn("Invalid map id ", e);
//Don't know which one was bad, so consider all of them as bad
return remaining.toArray(new TaskAttemptID[remaining.size()]);
}
InputStream is = input;
is = CryptoUtils.wrapIfNecessary(jobConf, is, compressedLength);
compressedLength -= CryptoUtils.cryptoPadding(jobConf);
decompressedLength -= CryptoUtils.cryptoPadding(jobConf);
// Do some basic sanity verification
if (!verifySanity(compressedLength, decompressedLength, forReduce, remaining, mapId)) {
return new TaskAttemptID[] { mapId };
}
if (LOG.isDebugEnabled()) {
LOG.debug("header: " + mapId + ", len: " + compressedLength + ", decomp len: " + decompressedLength);
}
// Get the location for the map output - either in-memory or on-disk
try {
mapOutput = merger.reserve(mapId, decompressedLength, id);
} catch (IOException ioe) {
// kill this reduce attempt
ioErrs.increment(1);
scheduler.reportLocalError(ioe);
return EMPTY_ATTEMPT_ID_ARRAY;
}
// Check if we can shuffle *now* ...
if (mapOutput == null) {
LOG.info("fetcher#" + id + " - MergeManager returned status WAIT ...");
//Not an error but wait to process data.
return EMPTY_ATTEMPT_ID_ARRAY;
}
// to allow fetch failure logic to be processed
try {
// Go!
LOG.info("fetcher#" + id + " about to shuffle output of map " + mapOutput.getMapId() + " decomp: " + decompressedLength + " len: " + compressedLength + " to " + mapOutput.getDescription());
mapOutput.shuffle(host, is, compressedLength, decompressedLength, metrics, reporter);
} catch (java.lang.InternalError | Exception e) {
LOG.warn("Failed to shuffle for fetcher#" + id, e);
throw new IOException(e);
}
// Inform the shuffle scheduler
long endTime = Time.monotonicNow();
// Reset retryStartTime as map task make progress if retried before.
retryStartTime = 0;
scheduler.copySucceeded(mapId, host, compressedLength, startTime, endTime, mapOutput);
// Note successful shuffle
remaining.remove(mapId);
metrics.successFetch();
return null;
} catch (IOException ioe) {
if (mapOutput != null) {
mapOutput.abort();
}
if (canRetry) {
checkTimeoutOrRetry(host, ioe);
}
ioErrs.increment(1);
if (mapId == null || mapOutput == null) {
LOG.warn("fetcher#" + id + " failed to read map header" + mapId + " decomp: " + decompressedLength + ", " + compressedLength, ioe);
if (mapId == null) {
return remaining.toArray(new TaskAttemptID[remaining.size()]);
} else {
return new TaskAttemptID[] { mapId };
}
}
LOG.warn("Failed to shuffle output of " + mapId + " from " + host.getHostName(), ioe);
// Inform the shuffle-scheduler
metrics.failedFetch();
return new TaskAttemptID[] { mapId };
}
}
use of java.io.DataInputStream in project hadoop by apache.
the class Fetcher method copyFromHost.
/**
* The crux of the matter...
*
* @param host {@link MapHost} from which we need to
* shuffle available map-outputs.
*/
@VisibleForTesting
protected void copyFromHost(MapHost host) throws IOException {
// reset retryStartTime for a new host
retryStartTime = 0;
// Get completed maps on 'host'
List<TaskAttemptID> maps = scheduler.getMapsForHost(host);
// especially at the tail of large jobs
if (maps.size() == 0) {
return;
}
if (LOG.isDebugEnabled()) {
LOG.debug("Fetcher " + id + " going to fetch from " + host + " for: " + maps);
}
// List of maps to be fetched yet
Set<TaskAttemptID> remaining = new HashSet<TaskAttemptID>(maps);
// Construct the url and connect
URL url = getMapOutputURL(host, maps);
DataInputStream input = openShuffleUrl(host, remaining, url);
if (input == null) {
return;
}
try {
// Loop through available map-outputs and fetch them
// On any error, faildTasks is not null and we exit
// after putting back the remaining maps to the
// yet_to_be_fetched list and marking the failed tasks.
TaskAttemptID[] failedTasks = null;
while (!remaining.isEmpty() && failedTasks == null) {
try {
failedTasks = copyMapOutput(host, input, remaining, fetchRetryEnabled);
} catch (IOException e) {
IOUtils.cleanup(LOG, input);
//
// Setup connection again if disconnected by NM
connection.disconnect();
// Get map output from remaining tasks only.
url = getMapOutputURL(host, remaining);
input = openShuffleUrl(host, remaining, url);
if (input == null) {
return;
}
}
}
if (failedTasks != null && failedTasks.length > 0) {
LOG.warn("copyMapOutput failed for tasks " + Arrays.toString(failedTasks));
scheduler.hostFailed(host.getHostName());
for (TaskAttemptID left : failedTasks) {
scheduler.copyFailed(left, host, true, false);
}
}
// Sanity check
if (failedTasks == null && !remaining.isEmpty()) {
throw new IOException("server didn't return all expected map outputs: " + remaining.size() + " left.");
}
input.close();
input = null;
} finally {
if (input != null) {
IOUtils.cleanup(LOG, input);
input = null;
}
for (TaskAttemptID left : remaining) {
scheduler.putBackKnownMapOutput(host, left);
}
}
}
use of java.io.DataInputStream in project hadoop by apache.
the class StreamBackedIterator method reset.
public void reset() {
if (null != outfbuf) {
inbuf = new ReplayableByteInputStream(outbuf.toByteArray());
infbuf = new DataInputStream(inbuf);
outfbuf = null;
}
inbuf.resetStream();
}
use of java.io.DataInputStream in project hadoop by apache.
the class NNBench method analyzeResults.
/**
* Analyze the results
* @throws IOException on error
*/
private int analyzeResults() throws IOException {
final FileSystem fs = FileSystem.get(getConf());
Path reduceDir = new Path(baseDir, OUTPUT_DIR_NAME);
long totalTimeAL1 = 0l;
long totalTimeAL2 = 0l;
long totalTimeTPmS = 0l;
long lateMaps = 0l;
long numOfExceptions = 0l;
long successfulFileOps = 0l;
long mapStartTimeTPmS = 0l;
long mapEndTimeTPmS = 0l;
FileStatus[] fss = fs.listStatus(reduceDir);
for (FileStatus status : fss) {
Path reduceFile = status.getPath();
try (DataInputStream in = new DataInputStream(fs.open(reduceFile));
BufferedReader lines = new BufferedReader(new InputStreamReader(in))) {
String line;
while ((line = lines.readLine()) != null) {
StringTokenizer tokens = new StringTokenizer(line, " \t\n\r\f%;");
String attr = tokens.nextToken();
if (attr.endsWith(":totalTimeAL1")) {
totalTimeAL1 = Long.parseLong(tokens.nextToken());
} else if (attr.endsWith(":totalTimeAL2")) {
totalTimeAL2 = Long.parseLong(tokens.nextToken());
} else if (attr.endsWith(":totalTimeTPmS")) {
totalTimeTPmS = Long.parseLong(tokens.nextToken());
} else if (attr.endsWith(":latemaps")) {
lateMaps = Long.parseLong(tokens.nextToken());
} else if (attr.endsWith(":numOfExceptions")) {
numOfExceptions = Long.parseLong(tokens.nextToken());
} else if (attr.endsWith(":successfulFileOps")) {
successfulFileOps = Long.parseLong(tokens.nextToken());
} else if (attr.endsWith(":mapStartTimeTPmS")) {
mapStartTimeTPmS = Long.parseLong(tokens.nextToken());
} else if (attr.endsWith(":mapEndTimeTPmS")) {
mapEndTimeTPmS = Long.parseLong(tokens.nextToken());
}
}
}
}
// Average latency is the average time to perform 'n' number of
// operations, n being the number of files
double avgLatency1 = (double) totalTimeAL1 / successfulFileOps;
double avgLatency2 = (double) totalTimeAL2 / successfulFileOps;
// The time it takes for the longest running map is measured. Using that,
// cluster transactions per second is calculated. It includes time to
// retry any of the failed operations
double longestMapTimeTPmS = (double) (mapEndTimeTPmS - mapStartTimeTPmS);
double totalTimeTPS = (longestMapTimeTPmS == 0) ? (1000 * successfulFileOps) : (double) (1000 * successfulFileOps) / longestMapTimeTPmS;
// The time it takes to perform 'n' operations is calculated (in ms),
// n being the number of files. Using that time, the average execution
// time is calculated. It includes time to retry any of the
// failed operations
double AverageExecutionTime = (totalTimeTPmS == 0) ? (double) successfulFileOps : (double) totalTimeTPmS / successfulFileOps;
String resultTPSLine1 = null;
String resultTPSLine2 = null;
String resultALLine1 = null;
String resultALLine2 = null;
if (operation.equals(OP_CREATE_WRITE)) {
// For create/write/close, it is treated as two transactions,
// since a file create from a client perspective involves create and close
resultTPSLine1 = " TPS: Create/Write/Close: " + (int) (totalTimeTPS * 2);
resultTPSLine2 = "Avg exec time (ms): Create/Write/Close: " + AverageExecutionTime;
resultALLine1 = " Avg Lat (ms): Create/Write: " + avgLatency1;
resultALLine2 = " Avg Lat (ms): Close: " + avgLatency2;
} else if (operation.equals(OP_OPEN_READ)) {
resultTPSLine1 = " TPS: Open/Read: " + (int) totalTimeTPS;
resultTPSLine2 = " Avg Exec time (ms): Open/Read: " + AverageExecutionTime;
resultALLine1 = " Avg Lat (ms): Open: " + avgLatency1;
if (readFileAfterOpen) {
resultALLine2 = " Avg Lat (ms): Read: " + avgLatency2;
}
} else if (operation.equals(OP_RENAME)) {
resultTPSLine1 = " TPS: Rename: " + (int) totalTimeTPS;
resultTPSLine2 = " Avg Exec time (ms): Rename: " + AverageExecutionTime;
resultALLine1 = " Avg Lat (ms): Rename: " + avgLatency1;
} else if (operation.equals(OP_DELETE)) {
resultTPSLine1 = " TPS: Delete: " + (int) totalTimeTPS;
resultTPSLine2 = " Avg Exec time (ms): Delete: " + AverageExecutionTime;
resultALLine1 = " Avg Lat (ms): Delete: " + avgLatency1;
}
String[] resultLines = { "-------------- NNBench -------------- : ", " Version: " + NNBENCH_VERSION, " Date & time: " + sdf.format(new Date(System.currentTimeMillis())), "", " Test Operation: " + operation, " Start time: " + sdf.format(new Date(startTime)), " Maps to run: " + numberOfMaps, " Reduces to run: " + numberOfReduces, " Block Size (bytes): " + blockSize, " Bytes to write: " + bytesToWrite, " Bytes per checksum: " + bytesPerChecksum, " Number of files: " + numberOfFiles, " Replication factor: " + replicationFactorPerFile, " Successful file operations: " + successfulFileOps, "", " # maps that missed the barrier: " + lateMaps, " # exceptions: " + numOfExceptions, "", resultTPSLine1, resultTPSLine2, resultALLine1, resultALLine2, "", " RAW DATA: AL Total #1: " + totalTimeAL1, " RAW DATA: AL Total #2: " + totalTimeAL2, " RAW DATA: TPS Total (ms): " + totalTimeTPmS, " RAW DATA: Longest Map Time (ms): " + longestMapTimeTPmS, " RAW DATA: Late maps: " + lateMaps, " RAW DATA: # of exceptions: " + numOfExceptions, "" };
try (PrintStream res = new PrintStream(new FileOutputStream(new File(DEFAULT_RES_FILE_NAME), true))) {
// Write to a file and also dump to log
for (String resultLine : resultLines) {
LOG.info(resultLine);
res.println(resultLine);
}
}
if (numOfExceptions >= MAX_OPERATION_EXCEPTIONS) {
return -1;
}
return 0;
}
Aggregations