Search in sources :

Example 1 with CheckSumMetadata

use of voldemort.store.readonly.checksum.CheckSumMetadata in project voldemort by voldemort.

the class HadoopStoreWriter method close.

@Override
public void close() throws IOException {
    for (int chunkId : chunksHandled) {
        this.indexFileStream[chunkId].close();
        this.valueFileStream[chunkId].close();
    }
    if (this.nodeId == -1 || this.partitionId == -1) {
        // any output
        return;
    }
    // If the replica type read was not valid, shout out
    if (getSaveKeys() && this.replicaType == -1) {
        throw new RuntimeException("Could not read the replica type correctly for node " + nodeId + " ( partition - " + this.partitionId + " )");
    }
    String fileNamePrefix = null;
    if (getSaveKeys()) {
        fileNamePrefix = new String(Integer.toString(this.partitionId) + "_" + Integer.toString(this.replicaType) + "_");
    } else {
        fileNamePrefix = new String(Integer.toString(this.partitionId) + "_");
    }
    // Initialize the output directory
    String outputDirName;
    if (getBuildPrimaryReplicasOnly()) {
        outputDirName = "partition-" + this.partitionId;
    } else {
        outputDirName = "node-" + this.nodeId;
    }
    Path outputDir = new Path(this.outputDir, outputDirName);
    // Create output directory, if it doesn't exist
    FileSystem outputFs = outputDir.getFileSystem(this.conf);
    outputFs.mkdirs(outputDir);
    outputFs.setPermission(outputDir, new FsPermission(HadoopStoreBuilder.HADOOP_FILE_PERMISSION));
    logger.info("Setting permission to 755 for " + outputDir);
    // Write the checksum and output files
    for (int chunkId : chunksHandled) {
        String chunkFileName = fileNamePrefix + Integer.toString(chunkId);
        CheckSumMetadata indexCheckSum = new CheckSumMetadata();
        CheckSumMetadata valueCheckSum = new CheckSumMetadata();
        if (this.checkSumType != CheckSumType.NONE) {
            if (this.checkSumDigestIndex[chunkId] != null && this.checkSumDigestValue[chunkId] != null) {
                indexCheckSum.add(ReadOnlyStorageMetadata.CHECKSUM, new String(Hex.encodeHex(this.checkSumDigestIndex[chunkId].getCheckSum())));
                valueCheckSum.add(ReadOnlyStorageMetadata.CHECKSUM, new String(Hex.encodeHex(this.checkSumDigestValue[chunkId].getCheckSum())));
            } else {
                throw new RuntimeException("Failed to open checksum digest for node " + nodeId + " ( partition - " + this.partitionId + ", chunk - " + chunkId + " )");
            }
        }
        Path checkSumIndexFile = new Path(outputDir, chunkFileName + INDEX_FILE_EXTENSION + CHECKSUM_FILE_EXTENSION);
        Path checkSumValueFile = new Path(outputDir, chunkFileName + DATA_FILE_EXTENSION + CHECKSUM_FILE_EXTENSION);
        if (outputFs.exists(checkSumIndexFile)) {
            outputFs.delete(checkSumIndexFile, true);
        }
        FSDataOutputStream output = outputFs.create(checkSumIndexFile);
        outputFs.setPermission(checkSumIndexFile, new FsPermission(HadoopStoreBuilder.HADOOP_FILE_PERMISSION));
        indexCheckSum.add(CheckSumMetadata.INDEX_FILE_SIZE_IN_BYTES, Long.toString(this.indexFileSizeInBytes[chunkId]));
        output.write(indexCheckSum.toJsonString().getBytes());
        output.close();
        if (outputFs.exists(checkSumValueFile)) {
            outputFs.delete(checkSumValueFile, true);
        }
        output = outputFs.create(checkSumValueFile);
        outputFs.setPermission(checkSumValueFile, new FsPermission(HadoopStoreBuilder.HADOOP_FILE_PERMISSION));
        valueCheckSum.add(CheckSumMetadata.DATA_FILE_SIZE_IN_BYTES, Long.toString(this.valueFileSizeInBytes[chunkId]));
        output.write(valueCheckSum.toJsonString().getBytes());
        output.close();
        // Generate the final chunk files and add file size information
        Path indexFile = new Path(outputDir, chunkFileName + INDEX_FILE_EXTENSION + fileExtension);
        Path valueFile = new Path(outputDir, chunkFileName + DATA_FILE_EXTENSION + fileExtension);
        logger.info("Moving " + this.taskIndexFileName[chunkId] + " to " + indexFile);
        if (outputFs.exists(indexFile)) {
            outputFs.delete(indexFile, true);
        }
        fs.rename(taskIndexFileName[chunkId], indexFile);
        logger.info("Moving " + this.taskValueFileName[chunkId] + " to " + valueFile);
        if (outputFs.exists(valueFile)) {
            outputFs.delete(valueFile, true);
        }
        fs.rename(this.taskValueFileName[chunkId], valueFile);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) FileSystem(org.apache.hadoop.fs.FileSystem) FsPermission(org.apache.hadoop.fs.permission.FsPermission) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) CheckSumMetadata(voldemort.store.readonly.checksum.CheckSumMetadata)

Example 2 with CheckSumMetadata

use of voldemort.store.readonly.checksum.CheckSumMetadata in project voldemort by voldemort.

the class HadoopStoreBuilder method processCheckSumMetadataFile.

/**
     * For the given node, following three actions are done:
     *
     * 1. Computes checksum of checksums
     *
     * 2. Computes total data size
     *
     * 3. Computes total index size
     *
     * Finally updates the metadata file with those information
     *
     * @param directoryName
     * @param outputFs
     * @param checkSumGenerator
     * @param nodePath
     * @param metadata
     * @throws IOException
     */
private void processCheckSumMetadataFile(String directoryName, FileSystem outputFs, CheckSum checkSumGenerator, Path nodePath, ReadOnlyStorageMetadata metadata) throws IOException {
    long dataSizeInBytes = 0L;
    long indexSizeInBytes = 0L;
    FileStatus[] storeFiles = outputFs.listStatus(nodePath, new PathFilter() {

        @Override
        public boolean accept(Path arg0) {
            if (arg0.getName().endsWith("checksum") && !arg0.getName().startsWith(".")) {
                return true;
            }
            return false;
        }
    });
    if (storeFiles != null && storeFiles.length > 0) {
        Arrays.sort(storeFiles, new IndexFileLastComparator());
        FSDataInputStream input = null;
        CheckSumMetadata checksumMetadata;
        for (FileStatus file : storeFiles) {
            try {
                // HDFS NameNodes can sometimes GC for extended periods
                // of time, hence the exponential back-off strategy below.
                // TODO: Refactor all BnP retry code into a pluggable mechanism
                int totalAttempts = 4;
                int attemptsRemaining = totalAttempts;
                while (attemptsRemaining > 0) {
                    try {
                        attemptsRemaining--;
                        input = outputFs.open(file.getPath());
                    } catch (Exception e) {
                        if (attemptsRemaining < 1) {
                            throw e;
                        }
                        // Exponential back-off sleep times: 5s, 25s, 45s.
                        int sleepTime = ((totalAttempts - attemptsRemaining) ^ 2) * 5;
                        logger.error("Error getting checksum file from HDFS. Retries left: " + attemptsRemaining + ". Back-off until next retry: " + sleepTime + " seconds.", e);
                        Thread.sleep(sleepTime * 1000);
                    }
                }
                checksumMetadata = new CheckSumMetadata(input);
                if (checkSumType != CheckSumType.NONE) {
                    byte[] fileChecksum = checksumMetadata.getCheckSum();
                    logger.debug("Checksum for file " + file.toString() + " - " + new String(Hex.encodeHex(fileChecksum)));
                    checkSumGenerator.update(fileChecksum);
                }
                /*
                     * if this is a 'data checksum' file, add the data file size
                     * to 'dataSizeIbBytes'
                     */
                String dataFileSizeInBytes = (String) checksumMetadata.get(CheckSumMetadata.DATA_FILE_SIZE_IN_BYTES);
                if (dataFileSizeInBytes != null) {
                    dataSizeInBytes += Long.parseLong(dataFileSizeInBytes);
                }
                /*
                     * if this is a 'index checksum' file, add the index file
                     * size to 'indexSizeIbBytes'
                     */
                String indexFileSizeInBytes = (String) checksumMetadata.get(CheckSumMetadata.INDEX_FILE_SIZE_IN_BYTES);
                if (indexFileSizeInBytes != null) {
                    indexSizeInBytes += Long.parseLong(indexFileSizeInBytes);
                }
            } catch (Exception e) {
                logger.error("Error getting checksum file from HDFS", e);
            } finally {
                if (input != null)
                    input.close();
            }
            outputFs.delete(file.getPath(), false);
        }
        // update metadata
        String checkSum = "NONE";
        if (checkSumType != CheckSumType.NONE) {
            metadata.add(ReadOnlyStorageMetadata.CHECKSUM_TYPE, CheckSum.toString(checkSumType));
            checkSum = new String(Hex.encodeHex(checkSumGenerator.getCheckSum()));
            metadata.add(ReadOnlyStorageMetadata.CHECKSUM, checkSum);
        }
        long diskSizeForNodeInBytes = dataSizeInBytes + indexSizeInBytes;
        logger.debug(directoryName + ": Checksum = " + checkSum + ", Size = " + (diskSizeForNodeInBytes / ByteUtils.BYTES_PER_KB) + " KB");
        metadata.add(ReadOnlyStorageMetadata.DISK_SIZE_IN_BYTES, Long.toString(diskSizeForNodeInBytes));
    }
}
Also used : Path(org.apache.hadoop.fs.Path) PathFilter(org.apache.hadoop.fs.PathFilter) FileStatus(org.apache.hadoop.fs.FileStatus) CheckSumMetadata(voldemort.store.readonly.checksum.CheckSumMetadata) VoldemortException(voldemort.VoldemortException) IOException(java.io.IOException) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream)

Aggregations

Path (org.apache.hadoop.fs.Path)2 CheckSumMetadata (voldemort.store.readonly.checksum.CheckSumMetadata)2 IOException (java.io.IOException)1 FSDataInputStream (org.apache.hadoop.fs.FSDataInputStream)1 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)1 FileStatus (org.apache.hadoop.fs.FileStatus)1 FileSystem (org.apache.hadoop.fs.FileSystem)1 PathFilter (org.apache.hadoop.fs.PathFilter)1 FsPermission (org.apache.hadoop.fs.permission.FsPermission)1 VoldemortException (voldemort.VoldemortException)1