use of org.apache.hadoop.hdfs.server.datanode.fsdataset.ReplicaInputStreams in project hadoop by apache.
the class BlockPoolSlice method validateIntegrityAndSetLength.
/**
* Find out the number of bytes in the block that match its crc.
*
* This algorithm assumes that data corruption caused by unexpected
* datanode shutdown occurs only in the last crc chunk. So it checks
* only the last chunk.
*
* @param blockFile the block file
* @param genStamp generation stamp of the block
* @return the number of valid bytes
*/
private long validateIntegrityAndSetLength(File blockFile, long genStamp) {
try {
final File metaFile = FsDatasetUtil.getMetaFile(blockFile, genStamp);
long blockFileLen = blockFile.length();
long metaFileLen = metaFile.length();
int crcHeaderLen = DataChecksum.getChecksumHeaderSize();
if (!blockFile.exists() || blockFileLen == 0 || !metaFile.exists() || metaFileLen < crcHeaderLen) {
return 0;
}
try (DataInputStream checksumIn = new DataInputStream(new BufferedInputStream(fileIoProvider.getFileInputStream(volume, metaFile), ioFileBufferSize))) {
// read and handle the common header here. For now just a version
final DataChecksum checksum = BlockMetadataHeader.readDataChecksum(checksumIn, metaFile);
int bytesPerChecksum = checksum.getBytesPerChecksum();
int checksumSize = checksum.getChecksumSize();
long numChunks = Math.min((blockFileLen + bytesPerChecksum - 1) / bytesPerChecksum, (metaFileLen - crcHeaderLen) / checksumSize);
if (numChunks == 0) {
return 0;
}
try (InputStream blockIn = fileIoProvider.getFileInputStream(volume, blockFile);
ReplicaInputStreams ris = new ReplicaInputStreams(blockIn, checksumIn, volume.obtainReference(), fileIoProvider)) {
ris.skipChecksumFully((numChunks - 1) * checksumSize);
long lastChunkStartPos = (numChunks - 1) * bytesPerChecksum;
ris.skipDataFully(lastChunkStartPos);
int lastChunkSize = (int) Math.min(bytesPerChecksum, blockFileLen - lastChunkStartPos);
byte[] buf = new byte[lastChunkSize + checksumSize];
ris.readChecksumFully(buf, lastChunkSize, checksumSize);
ris.readDataFully(buf, 0, lastChunkSize);
checksum.update(buf, 0, lastChunkSize);
long validFileLength;
if (checksum.compare(buf, lastChunkSize)) {
// last chunk matches crc
validFileLength = lastChunkStartPos + lastChunkSize;
} else {
// last chunk is corrupt
validFileLength = lastChunkStartPos;
}
// truncate if extra bytes are present without CRC
if (blockFile.length() > validFileLength) {
try (RandomAccessFile blockRAF = fileIoProvider.getRandomAccessFile(volume, blockFile, "rw")) {
// truncate blockFile
blockRAF.setLength(validFileLength);
}
}
return validFileLength;
}
}
} catch (IOException e) {
FsDatasetImpl.LOG.warn(e);
return 0;
}
}
use of org.apache.hadoop.hdfs.server.datanode.fsdataset.ReplicaInputStreams in project hadoop by apache.
the class BlockReceiver method computePartialChunkCrc.
/**
* reads in the partial crc chunk and computes checksum
* of pre-existing data in partial chunk.
*/
private Checksum computePartialChunkCrc(long blkoff, long ckoff) throws IOException {
// find offset of the beginning of partial chunk.
//
int sizePartialChunk = (int) (blkoff % bytesPerChecksum);
blkoff = blkoff - sizePartialChunk;
if (LOG.isDebugEnabled()) {
LOG.debug("computePartialChunkCrc for " + block + ": sizePartialChunk=" + sizePartialChunk + ", block offset=" + blkoff + ", metafile offset=" + ckoff);
}
// create an input stream from the block file
// and read in partial crc chunk into temporary buffer
//
byte[] buf = new byte[sizePartialChunk];
byte[] crcbuf = new byte[checksumSize];
try (ReplicaInputStreams instr = datanode.data.getTmpInputStreams(block, blkoff, ckoff)) {
instr.readDataFully(buf, 0, sizePartialChunk);
// open meta file and read in crc value computer earlier
instr.readChecksumFully(crcbuf, 0, crcbuf.length);
}
// compute crc of partial chunk from data read in the block file.
final Checksum partialCrc = DataChecksum.newDataChecksum(diskChecksum.getChecksumType(), diskChecksum.getBytesPerChecksum());
partialCrc.update(buf, 0, sizePartialChunk);
if (LOG.isDebugEnabled()) {
LOG.debug("Read in partial CRC chunk from disk for " + block);
}
// recalculated just now
if (partialCrc.getValue() != checksum2long(crcbuf)) {
String msg = "Partial CRC " + partialCrc.getValue() + " does not match value computed the " + " last time file was closed " + checksum2long(crcbuf);
throw new IOException(msg);
}
return partialCrc;
}
use of org.apache.hadoop.hdfs.server.datanode.fsdataset.ReplicaInputStreams in project hadoop by apache.
the class FsDatasetImpl method getTmpInputStreams.
/**
* Returns handles to the block file and its metadata file
*/
// FsDatasetSpi
@Override
public ReplicaInputStreams getTmpInputStreams(ExtendedBlock b, long blkOffset, long metaOffset) throws IOException {
try (AutoCloseableLock lock = datasetLock.acquire()) {
ReplicaInfo info = getReplicaInfo(b);
FsVolumeReference ref = info.getVolume().obtainReference();
try {
InputStream blockInStream = info.getDataInputStream(blkOffset);
try {
InputStream metaInStream = info.getMetadataInputStream(metaOffset);
return new ReplicaInputStreams(blockInStream, metaInStream, ref, datanode.getFileIoProvider());
} catch (IOException e) {
IOUtils.cleanup(null, blockInStream);
throw e;
}
} catch (IOException e) {
IOUtils.cleanup(null, ref);
throw e;
}
}
}
Aggregations