Search in sources :

Example 1 with ReplicaInputStreams

use of org.apache.hadoop.hdfs.server.datanode.fsdataset.ReplicaInputStreams in project hadoop by apache.

the class BlockPoolSlice method validateIntegrityAndSetLength.

   * Find out the number of bytes in the block that match its crc.
   * This algorithm assumes that data corruption caused by unexpected
   * datanode shutdown occurs only in the last crc chunk. So it checks
   * only the last chunk.
   * @param blockFile the block file
   * @param genStamp generation stamp of the block
   * @return the number of valid bytes
private long validateIntegrityAndSetLength(File blockFile, long genStamp) {
    try {
        final File metaFile = FsDatasetUtil.getMetaFile(blockFile, genStamp);
        long blockFileLen = blockFile.length();
        long metaFileLen = metaFile.length();
        int crcHeaderLen = DataChecksum.getChecksumHeaderSize();
        if (!blockFile.exists() || blockFileLen == 0 || !metaFile.exists() || metaFileLen < crcHeaderLen) {
            return 0;
        try (DataInputStream checksumIn = new DataInputStream(new BufferedInputStream(fileIoProvider.getFileInputStream(volume, metaFile), ioFileBufferSize))) {
            // read and handle the common header here. For now just a version
            final DataChecksum checksum = BlockMetadataHeader.readDataChecksum(checksumIn, metaFile);
            int bytesPerChecksum = checksum.getBytesPerChecksum();
            int checksumSize = checksum.getChecksumSize();
            long numChunks = Math.min((blockFileLen + bytesPerChecksum - 1) / bytesPerChecksum, (metaFileLen - crcHeaderLen) / checksumSize);
            if (numChunks == 0) {
                return 0;
            try (InputStream blockIn = fileIoProvider.getFileInputStream(volume, blockFile);
                ReplicaInputStreams ris = new ReplicaInputStreams(blockIn, checksumIn, volume.obtainReference(), fileIoProvider)) {
                ris.skipChecksumFully((numChunks - 1) * checksumSize);
                long lastChunkStartPos = (numChunks - 1) * bytesPerChecksum;
                int lastChunkSize = (int) Math.min(bytesPerChecksum, blockFileLen - lastChunkStartPos);
                byte[] buf = new byte[lastChunkSize + checksumSize];
                ris.readChecksumFully(buf, lastChunkSize, checksumSize);
                ris.readDataFully(buf, 0, lastChunkSize);
                checksum.update(buf, 0, lastChunkSize);
                long validFileLength;
                if (, lastChunkSize)) {
                    // last chunk matches crc
                    validFileLength = lastChunkStartPos + lastChunkSize;
                } else {
                    // last chunk is corrupt
                    validFileLength = lastChunkStartPos;
                // truncate if extra bytes are present without CRC
                if (blockFile.length() > validFileLength) {
                    try (RandomAccessFile blockRAF = fileIoProvider.getRandomAccessFile(volume, blockFile, "rw")) {
                        // truncate blockFile
                return validFileLength;
    } catch (IOException e) {
        return 0;
Also used : ReplicaInputStreams(org.apache.hadoop.hdfs.server.datanode.fsdataset.ReplicaInputStreams) RandomAccessFile( BufferedInputStream( DataInputStream( BufferedInputStream( FileInputStream( InputStream( IOException( DataInputStream( RandomAccessFile( File( DataChecksum(org.apache.hadoop.util.DataChecksum)

Example 2 with ReplicaInputStreams

use of org.apache.hadoop.hdfs.server.datanode.fsdataset.ReplicaInputStreams in project hadoop by apache.

the class BlockReceiver method computePartialChunkCrc.

   * reads in the partial crc chunk and computes checksum
   * of pre-existing data in partial chunk.
private Checksum computePartialChunkCrc(long blkoff, long ckoff) throws IOException {
    // find offset of the beginning of partial chunk.
    int sizePartialChunk = (int) (blkoff % bytesPerChecksum);
    blkoff = blkoff - sizePartialChunk;
    if (LOG.isDebugEnabled()) {
        LOG.debug("computePartialChunkCrc for " + block + ": sizePartialChunk=" + sizePartialChunk + ", block offset=" + blkoff + ", metafile offset=" + ckoff);
    // create an input stream from the block file
    // and read in partial crc chunk into temporary buffer
    byte[] buf = new byte[sizePartialChunk];
    byte[] crcbuf = new byte[checksumSize];
    try (ReplicaInputStreams instr =, blkoff, ckoff)) {
        instr.readDataFully(buf, 0, sizePartialChunk);
        // open meta file and read in crc value computer earlier
        instr.readChecksumFully(crcbuf, 0, crcbuf.length);
    // compute crc of partial chunk from data read in the block file.
    final Checksum partialCrc = DataChecksum.newDataChecksum(diskChecksum.getChecksumType(), diskChecksum.getBytesPerChecksum());
    partialCrc.update(buf, 0, sizePartialChunk);
    if (LOG.isDebugEnabled()) {
        LOG.debug("Read in partial CRC chunk from disk for " + block);
    // recalculated just now
    if (partialCrc.getValue() != checksum2long(crcbuf)) {
        String msg = "Partial CRC " + partialCrc.getValue() + " does not match value computed the " + " last time file was closed " + checksum2long(crcbuf);
        throw new IOException(msg);
    return partialCrc;
Also used : ReplicaInputStreams(org.apache.hadoop.hdfs.server.datanode.fsdataset.ReplicaInputStreams) Checksum( DataChecksum(org.apache.hadoop.util.DataChecksum) IOException(

Example 3 with ReplicaInputStreams

use of org.apache.hadoop.hdfs.server.datanode.fsdataset.ReplicaInputStreams in project hadoop by apache.

the class FsDatasetImpl method getTmpInputStreams.

   * Returns handles to the block file and its metadata file
// FsDatasetSpi
public ReplicaInputStreams getTmpInputStreams(ExtendedBlock b, long blkOffset, long metaOffset) throws IOException {
    try (AutoCloseableLock lock = datasetLock.acquire()) {
        ReplicaInfo info = getReplicaInfo(b);
        FsVolumeReference ref = info.getVolume().obtainReference();
        try {
            InputStream blockInStream = info.getDataInputStream(blkOffset);
            try {
                InputStream metaInStream = info.getMetadataInputStream(metaOffset);
                return new ReplicaInputStreams(blockInStream, metaInStream, ref, datanode.getFileIoProvider());
            } catch (IOException e) {
                IOUtils.cleanup(null, blockInStream);
                throw e;
        } catch (IOException e) {
            IOUtils.cleanup(null, ref);
            throw e;
Also used : ReplicaInputStreams(org.apache.hadoop.hdfs.server.datanode.fsdataset.ReplicaInputStreams) FsVolumeReference(org.apache.hadoop.hdfs.server.datanode.fsdataset.FsVolumeReference) ReplicaInfo(org.apache.hadoop.hdfs.server.datanode.ReplicaInfo) LengthInputStream(org.apache.hadoop.hdfs.server.datanode.fsdataset.LengthInputStream) FileInputStream( InputStream( AutoCloseableLock(org.apache.hadoop.util.AutoCloseableLock) IOException( MultipleIOException(


IOException ( ReplicaInputStreams (org.apache.hadoop.hdfs.server.datanode.fsdataset.ReplicaInputStreams)3 FileInputStream ( InputStream ( DataChecksum (org.apache.hadoop.util.DataChecksum)2 BufferedInputStream ( DataInputStream ( File ( RandomAccessFile ( Checksum ( ReplicaInfo (org.apache.hadoop.hdfs.server.datanode.ReplicaInfo)1 FsVolumeReference (org.apache.hadoop.hdfs.server.datanode.fsdataset.FsVolumeReference)1 LengthInputStream (org.apache.hadoop.hdfs.server.datanode.fsdataset.LengthInputStream)1 MultipleIOException ( AutoCloseableLock (org.apache.hadoop.util.AutoCloseableLock)1