Search in sources :

Example 11 with MD5Hash

use of org.apache.hadoop.io.MD5Hash in project hive by apache.

the class ProxyLocalFileSystem method getPFileChecksum.

private FileChecksum getPFileChecksum(Path f) throws IOException {
    MessageDigest md5Digest;
    try {
        md5Digest = MessageDigest.getInstance("MD5");
        MD5Hash md5Hash = new MD5Hash(getMD5Checksum(fs.open(f)));
        return new PFileChecksum(md5Hash, md5Digest.getAlgorithm());
    } catch (Exception e) {
        throw new IOException(e);
    }
}
Also used : MD5Hash(org.apache.hadoop.io.MD5Hash) IOException(java.io.IOException) MessageDigest(java.security.MessageDigest) IOException(java.io.IOException)

Example 12 with MD5Hash

use of org.apache.hadoop.io.MD5Hash in project hadoop by apache.

the class Util method doGetUrl.

/**
   * Downloads the files at the specified url location into destination
   * storage.
   */
public static MD5Hash doGetUrl(URL url, List<File> localPaths, Storage dstStorage, boolean getChecksum, int timeout, DataTransferThrottler throttler) throws IOException {
    HttpURLConnection connection;
    try {
        connection = (HttpURLConnection) connectionFactory.openConnection(url, isSpnegoEnabled);
    } catch (AuthenticationException e) {
        throw new IOException(e);
    }
    setTimeout(connection, timeout);
    if (connection.getResponseCode() != HttpURLConnection.HTTP_OK) {
        throw new HttpGetFailedException("Image transfer servlet at " + url + " failed with status code " + connection.getResponseCode() + "\nResponse message:\n" + connection.getResponseMessage(), connection);
    }
    long advertisedSize;
    String contentLength = connection.getHeaderField(CONTENT_LENGTH);
    if (contentLength != null) {
        advertisedSize = Long.parseLong(contentLength);
    } else {
        throw new IOException(CONTENT_LENGTH + " header is not provided " + "by the namenode when trying to fetch " + url);
    }
    MD5Hash advertisedDigest = parseMD5Header(connection);
    String fsImageName = connection.getHeaderField(ImageServlet.HADOOP_IMAGE_EDITS_HEADER);
    InputStream stream = connection.getInputStream();
    return receiveFile(url.toExternalForm(), localPaths, dstStorage, getChecksum, advertisedSize, advertisedDigest, fsImageName, stream, throttler);
}
Also used : HttpURLConnection(java.net.HttpURLConnection) AuthenticationException(org.apache.hadoop.security.authentication.client.AuthenticationException) DigestInputStream(java.security.DigestInputStream) InputStream(java.io.InputStream) MD5Hash(org.apache.hadoop.io.MD5Hash) IOException(java.io.IOException)

Example 13 with MD5Hash

use of org.apache.hadoop.io.MD5Hash in project hadoop by apache.

the class Util method receiveFile.

/**
   * Receives file at the url location from the input stream and puts them in
   * the specified destination storage location.
   */
public static MD5Hash receiveFile(String url, List<File> localPaths, Storage dstStorage, boolean getChecksum, long advertisedSize, MD5Hash advertisedDigest, String fsImageName, InputStream stream, DataTransferThrottler throttler) throws IOException {
    long startTime = Time.monotonicNow();
    Map<FileOutputStream, File> streamPathMap = new HashMap<>();
    StringBuilder xferStats = new StringBuilder();
    double xferCombined = 0;
    if (localPaths != null) {
        // If the local paths refer to directories, use the server-provided header
        // as the filename within that directory
        List<File> newLocalPaths = new ArrayList<>();
        for (File localPath : localPaths) {
            if (localPath.isDirectory()) {
                if (fsImageName == null) {
                    throw new IOException("No filename header provided by server");
                }
                newLocalPaths.add(new File(localPath, fsImageName));
            } else {
                newLocalPaths.add(localPath);
            }
        }
        localPaths = newLocalPaths;
    }
    long received = 0;
    MessageDigest digester = null;
    if (getChecksum) {
        digester = MD5Hash.getDigester();
        stream = new DigestInputStream(stream, digester);
    }
    boolean finishedReceiving = false;
    List<FileOutputStream> outputStreams = Lists.newArrayList();
    try {
        if (localPaths != null) {
            for (File f : localPaths) {
                try {
                    if (f.exists()) {
                        LOG.warn("Overwriting existing file " + f + " with file downloaded from " + url);
                    }
                    FileOutputStream fos = new FileOutputStream(f);
                    outputStreams.add(fos);
                    streamPathMap.put(fos, f);
                } catch (IOException ioe) {
                    LOG.warn("Unable to download file " + f, ioe);
                    // outside of an NNStorage directory.
                    if (dstStorage != null && (dstStorage instanceof StorageErrorReporter)) {
                        ((StorageErrorReporter) dstStorage).reportErrorOnFile(f);
                    }
                }
            }
            if (outputStreams.isEmpty()) {
                throw new IOException("Unable to download to any storage directory");
            }
        }
        int num = 1;
        byte[] buf = new byte[IO_FILE_BUFFER_SIZE];
        while (num > 0) {
            num = stream.read(buf);
            if (num > 0) {
                received += num;
                for (FileOutputStream fos : outputStreams) {
                    fos.write(buf, 0, num);
                }
                if (throttler != null) {
                    throttler.throttle(num);
                }
            }
        }
        finishedReceiving = true;
        double xferSec = Math.max(((float) (Time.monotonicNow() - startTime)) / 1000.0, 0.001);
        long xferKb = received / 1024;
        xferCombined += xferSec;
        xferStats.append(String.format(" The file download took %.2fs at %.2f KB/s.", xferSec, xferKb / xferSec));
    } finally {
        stream.close();
        for (FileOutputStream fos : outputStreams) {
            long flushStartTime = Time.monotonicNow();
            fos.getChannel().force(true);
            fos.close();
            double writeSec = Math.max(((float) (flushStartTime - Time.monotonicNow())) / 1000.0, 0.001);
            xferCombined += writeSec;
            xferStats.append(String.format(" Synchronous (fsync) write to disk of " + streamPathMap.get(fos).getAbsolutePath() + " took %.2fs.", writeSec));
        }
        // Remove the temporary files.
        if (!finishedReceiving) {
            deleteTmpFiles(localPaths);
        }
        if (finishedReceiving && received != advertisedSize) {
            // only throw this exception if we think we read all of it on our end
            // -- otherwise a client-side IOException would be masked by this
            // exception that makes it look like a server-side problem!
            deleteTmpFiles(localPaths);
            throw new IOException("File " + url + " received length " + received + " is not of the advertised size " + advertisedSize);
        }
    }
    xferStats.insert(0, String.format("Combined time for file download and" + " fsync to all disks took %.2fs.", xferCombined));
    LOG.info(xferStats.toString());
    if (digester != null) {
        MD5Hash computedDigest = new MD5Hash(digester.digest());
        if (advertisedDigest != null && !computedDigest.equals(advertisedDigest)) {
            deleteTmpFiles(localPaths);
            throw new IOException("File " + url + " computed digest " + computedDigest + " does not match advertised digest " + advertisedDigest);
        }
        return computedDigest;
    } else {
        return null;
    }
}
Also used : DigestInputStream(java.security.DigestInputStream) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) IOException(java.io.IOException) FileOutputStream(java.io.FileOutputStream) MD5Hash(org.apache.hadoop.io.MD5Hash) MessageDigest(java.security.MessageDigest) File(java.io.File)

Example 14 with MD5Hash

use of org.apache.hadoop.io.MD5Hash in project hadoop by apache.

the class FSImage method loadFSImageFile.

void loadFSImageFile(FSNamesystem target, MetaRecoveryContext recovery, FSImageFile imageFile, StartupOption startupOption) throws IOException {
    LOG.info("Planning to load image: " + imageFile);
    StorageDirectory sdForProperties = imageFile.sd;
    storage.readProperties(sdForProperties, startupOption);
    if (NameNodeLayoutVersion.supports(LayoutVersion.Feature.TXID_BASED_LAYOUT, getLayoutVersion())) {
        // For txid-based layout, we should have a .md5 file
        // next to the image file
        boolean isRollingRollback = RollingUpgradeStartupOption.ROLLBACK.matches(startupOption);
        loadFSImage(imageFile.getFile(), target, recovery, isRollingRollback);
    } else if (NameNodeLayoutVersion.supports(LayoutVersion.Feature.FSIMAGE_CHECKSUM, getLayoutVersion())) {
        // In 0.22, we have the checksum stored in the VERSION file.
        String md5 = storage.getDeprecatedProperty(NNStorage.DEPRECATED_MESSAGE_DIGEST_PROPERTY);
        if (md5 == null) {
            throw new InconsistentFSStateException(sdForProperties.getRoot(), "Message digest property " + NNStorage.DEPRECATED_MESSAGE_DIGEST_PROPERTY + " not set for storage directory " + sdForProperties.getRoot());
        }
        loadFSImage(imageFile.getFile(), new MD5Hash(md5), target, recovery, false);
    } else {
        // We don't have any record of the md5sum
        loadFSImage(imageFile.getFile(), null, target, recovery, false);
    }
}
Also used : MD5Hash(org.apache.hadoop.io.MD5Hash) StorageDirectory(org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory) InconsistentFSStateException(org.apache.hadoop.hdfs.server.common.InconsistentFSStateException)

Example 15 with MD5Hash

use of org.apache.hadoop.io.MD5Hash in project hadoop by apache.

the class FSImage method loadFSImage.

/**
   * Load the image namespace from the given image file, verifying
   * it against the MD5 sum stored in its associated .md5 file.
   */
private void loadFSImage(File imageFile, FSNamesystem target, MetaRecoveryContext recovery, boolean requireSameLayoutVersion) throws IOException {
    MD5Hash expectedMD5 = MD5FileUtils.readStoredMd5ForFile(imageFile);
    if (expectedMD5 == null) {
        throw new IOException("No MD5 file found corresponding to image file " + imageFile);
    }
    loadFSImage(imageFile, expectedMD5, target, recovery, requireSameLayoutVersion);
}
Also used : MD5Hash(org.apache.hadoop.io.MD5Hash) IOException(java.io.IOException)

Aggregations

MD5Hash (org.apache.hadoop.io.MD5Hash)21 IOException (java.io.IOException)11 File (java.io.File)8 MessageDigest (java.security.MessageDigest)5 NameNodeFile (org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeFile)5 DigestInputStream (java.security.DigestInputStream)3 FileInputStream (java.io.FileInputStream)2 FileOutputStream (java.io.FileOutputStream)2 InputStream (java.io.InputStream)2 CountingOutputStream (com.google.common.io.CountingOutputStream)1 BufferedOutputStream (java.io.BufferedOutputStream)1 InputStreamReader (java.io.InputStreamReader)1 HttpURLConnection (java.net.HttpURLConnection)1 URI (java.net.URI)1 DigestOutputStream (java.security.DigestOutputStream)1 ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1 List (java.util.List)1 Matcher (java.util.regex.Matcher)1 ServletContext (javax.servlet.ServletContext)1