Search in sources :

Example 1 with ReplicaInputStreams

use of org.apache.hadoop.hdfs.server.datanode.fsdataset.ReplicaInputStreams in project hadoop by apache.

the class BlockPoolSlice method validateIntegrityAndSetLength.

/**
   * Find out the number of bytes in the block that match its crc.
   *
   * This algorithm assumes that data corruption caused by unexpected
   * datanode shutdown occurs only in the last crc chunk. So it checks
   * only the last chunk.
   *
   * @param blockFile the block file
   * @param genStamp generation stamp of the block
   * @return the number of valid bytes
   */
private long validateIntegrityAndSetLength(File blockFile, long genStamp) {
    try {
        final File metaFile = FsDatasetUtil.getMetaFile(blockFile, genStamp);
        long blockFileLen = blockFile.length();
        long metaFileLen = metaFile.length();
        int crcHeaderLen = DataChecksum.getChecksumHeaderSize();
        if (!blockFile.exists() || blockFileLen == 0 || !metaFile.exists() || metaFileLen < crcHeaderLen) {
            return 0;
        }
        try (DataInputStream checksumIn = new DataInputStream(new BufferedInputStream(fileIoProvider.getFileInputStream(volume, metaFile), ioFileBufferSize))) {
            // read and handle the common header here. For now just a version
            final DataChecksum checksum = BlockMetadataHeader.readDataChecksum(checksumIn, metaFile);
            int bytesPerChecksum = checksum.getBytesPerChecksum();
            int checksumSize = checksum.getChecksumSize();
            long numChunks = Math.min((blockFileLen + bytesPerChecksum - 1) / bytesPerChecksum, (metaFileLen - crcHeaderLen) / checksumSize);
            if (numChunks == 0) {
                return 0;
            }
            try (InputStream blockIn = fileIoProvider.getFileInputStream(volume, blockFile);
                ReplicaInputStreams ris = new ReplicaInputStreams(blockIn, checksumIn, volume.obtainReference(), fileIoProvider)) {
                ris.skipChecksumFully((numChunks - 1) * checksumSize);
                long lastChunkStartPos = (numChunks - 1) * bytesPerChecksum;
                ris.skipDataFully(lastChunkStartPos);
                int lastChunkSize = (int) Math.min(bytesPerChecksum, blockFileLen - lastChunkStartPos);
                byte[] buf = new byte[lastChunkSize + checksumSize];
                ris.readChecksumFully(buf, lastChunkSize, checksumSize);
                ris.readDataFully(buf, 0, lastChunkSize);
                checksum.update(buf, 0, lastChunkSize);
                long validFileLength;
                if (checksum.compare(buf, lastChunkSize)) {
                    // last chunk matches crc
                    validFileLength = lastChunkStartPos + lastChunkSize;
                } else {
                    // last chunk is corrupt
                    validFileLength = lastChunkStartPos;
                }
                // truncate if extra bytes are present without CRC
                if (blockFile.length() > validFileLength) {
                    try (RandomAccessFile blockRAF = fileIoProvider.getRandomAccessFile(volume, blockFile, "rw")) {
                        // truncate blockFile
                        blockRAF.setLength(validFileLength);
                    }
                }
                return validFileLength;
            }
        }
    } catch (IOException e) {
        FsDatasetImpl.LOG.warn(e);
        return 0;
    }
}
Also used : ReplicaInputStreams(org.apache.hadoop.hdfs.server.datanode.fsdataset.ReplicaInputStreams) RandomAccessFile(java.io.RandomAccessFile) BufferedInputStream(java.io.BufferedInputStream) DataInputStream(java.io.DataInputStream) BufferedInputStream(java.io.BufferedInputStream) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) IOException(java.io.IOException) DataInputStream(java.io.DataInputStream) RandomAccessFile(java.io.RandomAccessFile) File(java.io.File) DataChecksum(org.apache.hadoop.util.DataChecksum)

Example 2 with ReplicaInputStreams

use of org.apache.hadoop.hdfs.server.datanode.fsdataset.ReplicaInputStreams in project hadoop by apache.

the class BlockReceiver method computePartialChunkCrc.

/**
   * reads in the partial crc chunk and computes checksum
   * of pre-existing data in partial chunk.
   */
private Checksum computePartialChunkCrc(long blkoff, long ckoff) throws IOException {
    // find offset of the beginning of partial chunk.
    //
    int sizePartialChunk = (int) (blkoff % bytesPerChecksum);
    blkoff = blkoff - sizePartialChunk;
    if (LOG.isDebugEnabled()) {
        LOG.debug("computePartialChunkCrc for " + block + ": sizePartialChunk=" + sizePartialChunk + ", block offset=" + blkoff + ", metafile offset=" + ckoff);
    }
    // create an input stream from the block file
    // and read in partial crc chunk into temporary buffer
    //
    byte[] buf = new byte[sizePartialChunk];
    byte[] crcbuf = new byte[checksumSize];
    try (ReplicaInputStreams instr = datanode.data.getTmpInputStreams(block, blkoff, ckoff)) {
        instr.readDataFully(buf, 0, sizePartialChunk);
        // open meta file and read in crc value computer earlier
        instr.readChecksumFully(crcbuf, 0, crcbuf.length);
    }
    // compute crc of partial chunk from data read in the block file.
    final Checksum partialCrc = DataChecksum.newDataChecksum(diskChecksum.getChecksumType(), diskChecksum.getBytesPerChecksum());
    partialCrc.update(buf, 0, sizePartialChunk);
    if (LOG.isDebugEnabled()) {
        LOG.debug("Read in partial CRC chunk from disk for " + block);
    }
    // recalculated just now
    if (partialCrc.getValue() != checksum2long(crcbuf)) {
        String msg = "Partial CRC " + partialCrc.getValue() + " does not match value computed the " + " last time file was closed " + checksum2long(crcbuf);
        throw new IOException(msg);
    }
    return partialCrc;
}
Also used : ReplicaInputStreams(org.apache.hadoop.hdfs.server.datanode.fsdataset.ReplicaInputStreams) Checksum(java.util.zip.Checksum) DataChecksum(org.apache.hadoop.util.DataChecksum) IOException(java.io.IOException)

Example 3 with ReplicaInputStreams

use of org.apache.hadoop.hdfs.server.datanode.fsdataset.ReplicaInputStreams in project hadoop by apache.

the class FsDatasetImpl method getTmpInputStreams.

/**
   * Returns handles to the block file and its metadata file
   */
// FsDatasetSpi
@Override
public ReplicaInputStreams getTmpInputStreams(ExtendedBlock b, long blkOffset, long metaOffset) throws IOException {
    try (AutoCloseableLock lock = datasetLock.acquire()) {
        ReplicaInfo info = getReplicaInfo(b);
        FsVolumeReference ref = info.getVolume().obtainReference();
        try {
            InputStream blockInStream = info.getDataInputStream(blkOffset);
            try {
                InputStream metaInStream = info.getMetadataInputStream(metaOffset);
                return new ReplicaInputStreams(blockInStream, metaInStream, ref, datanode.getFileIoProvider());
            } catch (IOException e) {
                IOUtils.cleanup(null, blockInStream);
                throw e;
            }
        } catch (IOException e) {
            IOUtils.cleanup(null, ref);
            throw e;
        }
    }
}
Also used : ReplicaInputStreams(org.apache.hadoop.hdfs.server.datanode.fsdataset.ReplicaInputStreams) FsVolumeReference(org.apache.hadoop.hdfs.server.datanode.fsdataset.FsVolumeReference) ReplicaInfo(org.apache.hadoop.hdfs.server.datanode.ReplicaInfo) LengthInputStream(org.apache.hadoop.hdfs.server.datanode.fsdataset.LengthInputStream) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) AutoCloseableLock(org.apache.hadoop.util.AutoCloseableLock) IOException(java.io.IOException) MultipleIOException(org.apache.hadoop.io.MultipleIOException)

Aggregations

IOException (java.io.IOException)3 ReplicaInputStreams (org.apache.hadoop.hdfs.server.datanode.fsdataset.ReplicaInputStreams)3 FileInputStream (java.io.FileInputStream)2 InputStream (java.io.InputStream)2 DataChecksum (org.apache.hadoop.util.DataChecksum)2 BufferedInputStream (java.io.BufferedInputStream)1 DataInputStream (java.io.DataInputStream)1 File (java.io.File)1 RandomAccessFile (java.io.RandomAccessFile)1 Checksum (java.util.zip.Checksum)1 ReplicaInfo (org.apache.hadoop.hdfs.server.datanode.ReplicaInfo)1 FsVolumeReference (org.apache.hadoop.hdfs.server.datanode.fsdataset.FsVolumeReference)1 LengthInputStream (org.apache.hadoop.hdfs.server.datanode.fsdataset.LengthInputStream)1 MultipleIOException (org.apache.hadoop.io.MultipleIOException)1 AutoCloseableLock (org.apache.hadoop.util.AutoCloseableLock)1