use of org.apache.hadoop.io.MD5Hash in project hive by apache.
the class ProxyLocalFileSystem method getPFileChecksum.
private FileChecksum getPFileChecksum(Path f) throws IOException {
MessageDigest md5Digest;
try {
md5Digest = MessageDigest.getInstance("MD5");
MD5Hash md5Hash = new MD5Hash(getMD5Checksum(fs.open(f)));
return new PFileChecksum(md5Hash, md5Digest.getAlgorithm());
} catch (Exception e) {
throw new IOException(e);
}
}
use of org.apache.hadoop.io.MD5Hash in project hadoop by apache.
the class Util method doGetUrl.
/**
* Downloads the files at the specified url location into destination
* storage.
*/
public static MD5Hash doGetUrl(URL url, List<File> localPaths, Storage dstStorage, boolean getChecksum, int timeout, DataTransferThrottler throttler) throws IOException {
HttpURLConnection connection;
try {
connection = (HttpURLConnection) connectionFactory.openConnection(url, isSpnegoEnabled);
} catch (AuthenticationException e) {
throw new IOException(e);
}
setTimeout(connection, timeout);
if (connection.getResponseCode() != HttpURLConnection.HTTP_OK) {
throw new HttpGetFailedException("Image transfer servlet at " + url + " failed with status code " + connection.getResponseCode() + "\nResponse message:\n" + connection.getResponseMessage(), connection);
}
long advertisedSize;
String contentLength = connection.getHeaderField(CONTENT_LENGTH);
if (contentLength != null) {
advertisedSize = Long.parseLong(contentLength);
} else {
throw new IOException(CONTENT_LENGTH + " header is not provided " + "by the namenode when trying to fetch " + url);
}
MD5Hash advertisedDigest = parseMD5Header(connection);
String fsImageName = connection.getHeaderField(ImageServlet.HADOOP_IMAGE_EDITS_HEADER);
InputStream stream = connection.getInputStream();
return receiveFile(url.toExternalForm(), localPaths, dstStorage, getChecksum, advertisedSize, advertisedDigest, fsImageName, stream, throttler);
}
use of org.apache.hadoop.io.MD5Hash in project hadoop by apache.
the class Util method receiveFile.
/**
* Receives file at the url location from the input stream and puts them in
* the specified destination storage location.
*/
public static MD5Hash receiveFile(String url, List<File> localPaths, Storage dstStorage, boolean getChecksum, long advertisedSize, MD5Hash advertisedDigest, String fsImageName, InputStream stream, DataTransferThrottler throttler) throws IOException {
long startTime = Time.monotonicNow();
Map<FileOutputStream, File> streamPathMap = new HashMap<>();
StringBuilder xferStats = new StringBuilder();
double xferCombined = 0;
if (localPaths != null) {
// If the local paths refer to directories, use the server-provided header
// as the filename within that directory
List<File> newLocalPaths = new ArrayList<>();
for (File localPath : localPaths) {
if (localPath.isDirectory()) {
if (fsImageName == null) {
throw new IOException("No filename header provided by server");
}
newLocalPaths.add(new File(localPath, fsImageName));
} else {
newLocalPaths.add(localPath);
}
}
localPaths = newLocalPaths;
}
long received = 0;
MessageDigest digester = null;
if (getChecksum) {
digester = MD5Hash.getDigester();
stream = new DigestInputStream(stream, digester);
}
boolean finishedReceiving = false;
List<FileOutputStream> outputStreams = Lists.newArrayList();
try {
if (localPaths != null) {
for (File f : localPaths) {
try {
if (f.exists()) {
LOG.warn("Overwriting existing file " + f + " with file downloaded from " + url);
}
FileOutputStream fos = new FileOutputStream(f);
outputStreams.add(fos);
streamPathMap.put(fos, f);
} catch (IOException ioe) {
LOG.warn("Unable to download file " + f, ioe);
// outside of an NNStorage directory.
if (dstStorage != null && (dstStorage instanceof StorageErrorReporter)) {
((StorageErrorReporter) dstStorage).reportErrorOnFile(f);
}
}
}
if (outputStreams.isEmpty()) {
throw new IOException("Unable to download to any storage directory");
}
}
int num = 1;
byte[] buf = new byte[IO_FILE_BUFFER_SIZE];
while (num > 0) {
num = stream.read(buf);
if (num > 0) {
received += num;
for (FileOutputStream fos : outputStreams) {
fos.write(buf, 0, num);
}
if (throttler != null) {
throttler.throttle(num);
}
}
}
finishedReceiving = true;
double xferSec = Math.max(((float) (Time.monotonicNow() - startTime)) / 1000.0, 0.001);
long xferKb = received / 1024;
xferCombined += xferSec;
xferStats.append(String.format(" The file download took %.2fs at %.2f KB/s.", xferSec, xferKb / xferSec));
} finally {
stream.close();
for (FileOutputStream fos : outputStreams) {
long flushStartTime = Time.monotonicNow();
fos.getChannel().force(true);
fos.close();
double writeSec = Math.max(((float) (flushStartTime - Time.monotonicNow())) / 1000.0, 0.001);
xferCombined += writeSec;
xferStats.append(String.format(" Synchronous (fsync) write to disk of " + streamPathMap.get(fos).getAbsolutePath() + " took %.2fs.", writeSec));
}
// Remove the temporary files.
if (!finishedReceiving) {
deleteTmpFiles(localPaths);
}
if (finishedReceiving && received != advertisedSize) {
// only throw this exception if we think we read all of it on our end
// -- otherwise a client-side IOException would be masked by this
// exception that makes it look like a server-side problem!
deleteTmpFiles(localPaths);
throw new IOException("File " + url + " received length " + received + " is not of the advertised size " + advertisedSize);
}
}
xferStats.insert(0, String.format("Combined time for file download and" + " fsync to all disks took %.2fs.", xferCombined));
LOG.info(xferStats.toString());
if (digester != null) {
MD5Hash computedDigest = new MD5Hash(digester.digest());
if (advertisedDigest != null && !computedDigest.equals(advertisedDigest)) {
deleteTmpFiles(localPaths);
throw new IOException("File " + url + " computed digest " + computedDigest + " does not match advertised digest " + advertisedDigest);
}
return computedDigest;
} else {
return null;
}
}
use of org.apache.hadoop.io.MD5Hash in project hadoop by apache.
the class FSImage method loadFSImageFile.
void loadFSImageFile(FSNamesystem target, MetaRecoveryContext recovery, FSImageFile imageFile, StartupOption startupOption) throws IOException {
LOG.info("Planning to load image: " + imageFile);
StorageDirectory sdForProperties = imageFile.sd;
storage.readProperties(sdForProperties, startupOption);
if (NameNodeLayoutVersion.supports(LayoutVersion.Feature.TXID_BASED_LAYOUT, getLayoutVersion())) {
// For txid-based layout, we should have a .md5 file
// next to the image file
boolean isRollingRollback = RollingUpgradeStartupOption.ROLLBACK.matches(startupOption);
loadFSImage(imageFile.getFile(), target, recovery, isRollingRollback);
} else if (NameNodeLayoutVersion.supports(LayoutVersion.Feature.FSIMAGE_CHECKSUM, getLayoutVersion())) {
// In 0.22, we have the checksum stored in the VERSION file.
String md5 = storage.getDeprecatedProperty(NNStorage.DEPRECATED_MESSAGE_DIGEST_PROPERTY);
if (md5 == null) {
throw new InconsistentFSStateException(sdForProperties.getRoot(), "Message digest property " + NNStorage.DEPRECATED_MESSAGE_DIGEST_PROPERTY + " not set for storage directory " + sdForProperties.getRoot());
}
loadFSImage(imageFile.getFile(), new MD5Hash(md5), target, recovery, false);
} else {
// We don't have any record of the md5sum
loadFSImage(imageFile.getFile(), null, target, recovery, false);
}
}
use of org.apache.hadoop.io.MD5Hash in project hadoop by apache.
the class FSImage method loadFSImage.
/**
* Load the image namespace from the given image file, verifying
* it against the MD5 sum stored in its associated .md5 file.
*/
private void loadFSImage(File imageFile, FSNamesystem target, MetaRecoveryContext recovery, boolean requireSameLayoutVersion) throws IOException {
MD5Hash expectedMD5 = MD5FileUtils.readStoredMd5ForFile(imageFile);
if (expectedMD5 == null) {
throw new IOException("No MD5 file found corresponding to image file " + imageFile);
}
loadFSImage(imageFile, expectedMD5, target, recovery, requireSameLayoutVersion);
}
Aggregations