use of org.apache.hadoop.util.DataChecksum in project hadoop by apache.
the class StripedWriter method init.
void init() throws IOException {
DataChecksum checksum = reconstructor.getChecksum();
checksumSize = checksum.getChecksumSize();
bytesPerChecksum = checksum.getBytesPerChecksum();
int chunkSize = bytesPerChecksum + checksumSize;
maxChunksPerPacket = Math.max((WRITE_PACKET_SIZE - PacketHeader.PKT_MAX_HEADER_LEN) / chunkSize, 1);
int maxPacketSize = chunkSize * maxChunksPerPacket + PacketHeader.PKT_MAX_HEADER_LEN;
packetBuf = new byte[maxPacketSize];
int tmpLen = checksumSize * (reconstructor.getBufferSize() / bytesPerChecksum);
checksumBuf = new byte[tmpLen];
if (initTargetStreams() == 0) {
String error = "All targets are failed.";
throw new IOException(error);
}
}
use of org.apache.hadoop.util.DataChecksum in project hadoop by apache.
the class BlockPoolSlice method validateIntegrityAndSetLength.
/**
* Find out the number of bytes in the block that match its crc.
*
* This algorithm assumes that data corruption caused by unexpected
* datanode shutdown occurs only in the last crc chunk. So it checks
* only the last chunk.
*
* @param blockFile the block file
* @param genStamp generation stamp of the block
* @return the number of valid bytes
*/
private long validateIntegrityAndSetLength(File blockFile, long genStamp) {
try {
final File metaFile = FsDatasetUtil.getMetaFile(blockFile, genStamp);
long blockFileLen = blockFile.length();
long metaFileLen = metaFile.length();
int crcHeaderLen = DataChecksum.getChecksumHeaderSize();
if (!blockFile.exists() || blockFileLen == 0 || !metaFile.exists() || metaFileLen < crcHeaderLen) {
return 0;
}
try (DataInputStream checksumIn = new DataInputStream(new BufferedInputStream(fileIoProvider.getFileInputStream(volume, metaFile), ioFileBufferSize))) {
// read and handle the common header here. For now just a version
final DataChecksum checksum = BlockMetadataHeader.readDataChecksum(checksumIn, metaFile);
int bytesPerChecksum = checksum.getBytesPerChecksum();
int checksumSize = checksum.getChecksumSize();
long numChunks = Math.min((blockFileLen + bytesPerChecksum - 1) / bytesPerChecksum, (metaFileLen - crcHeaderLen) / checksumSize);
if (numChunks == 0) {
return 0;
}
try (InputStream blockIn = fileIoProvider.getFileInputStream(volume, blockFile);
ReplicaInputStreams ris = new ReplicaInputStreams(blockIn, checksumIn, volume.obtainReference(), fileIoProvider)) {
ris.skipChecksumFully((numChunks - 1) * checksumSize);
long lastChunkStartPos = (numChunks - 1) * bytesPerChecksum;
ris.skipDataFully(lastChunkStartPos);
int lastChunkSize = (int) Math.min(bytesPerChecksum, blockFileLen - lastChunkStartPos);
byte[] buf = new byte[lastChunkSize + checksumSize];
ris.readChecksumFully(buf, lastChunkSize, checksumSize);
ris.readDataFully(buf, 0, lastChunkSize);
checksum.update(buf, 0, lastChunkSize);
long validFileLength;
if (checksum.compare(buf, lastChunkSize)) {
// last chunk matches crc
validFileLength = lastChunkStartPos + lastChunkSize;
} else {
// last chunk is corrupt
validFileLength = lastChunkStartPos;
}
// truncate if extra bytes are present without CRC
if (blockFile.length() > validFileLength) {
try (RandomAccessFile blockRAF = fileIoProvider.getRandomAccessFile(volume, blockFile, "rw")) {
// truncate blockFile
blockRAF.setLength(validFileLength);
}
}
return validFileLength;
}
}
} catch (IOException e) {
FsDatasetImpl.LOG.warn(e);
return 0;
}
}
use of org.apache.hadoop.util.DataChecksum in project hadoop by apache.
the class TestDiskError method testReplicationError.
/**
* Test that when there is a failure replicating a block the temporary
* and meta files are cleaned up and subsequent replication succeeds.
*/
@Test
public void testReplicationError() throws Exception {
// create a file of replication factor of 1
final Path fileName = new Path("/test.txt");
final int fileLen = 1;
DFSTestUtil.createFile(fs, fileName, 1, (short) 1, 1L);
DFSTestUtil.waitReplication(fs, fileName, (short) 1);
// get the block belonged to the created file
LocatedBlocks blocks = NameNodeAdapter.getBlockLocations(cluster.getNameNode(), fileName.toString(), 0, (long) fileLen);
assertEquals("Should only find 1 block", blocks.locatedBlockCount(), 1);
LocatedBlock block = blocks.get(0);
// bring up a second datanode
cluster.startDataNodes(conf, 1, true, null, null);
cluster.waitActive();
final int sndNode = 1;
DataNode datanode = cluster.getDataNodes().get(sndNode);
FsDatasetTestUtils utils = cluster.getFsDatasetTestUtils(datanode);
// replicate the block to the second datanode
InetSocketAddress target = datanode.getXferAddress();
Socket s = new Socket(target.getAddress(), target.getPort());
// write the header.
DataOutputStream out = new DataOutputStream(s.getOutputStream());
DataChecksum checksum = DataChecksum.newDataChecksum(DataChecksum.Type.CRC32, 512);
new Sender(out).writeBlock(block.getBlock(), StorageType.DEFAULT, BlockTokenSecretManager.DUMMY_TOKEN, "", new DatanodeInfo[0], new StorageType[0], null, BlockConstructionStage.PIPELINE_SETUP_CREATE, 1, 0L, 0L, 0L, checksum, CachingStrategy.newDefaultStrategy(), false, false, null);
out.flush();
// close the connection before sending the content of the block
out.close();
// the temporary block & meta files should be deleted
String bpid = cluster.getNamesystem().getBlockPoolId();
while (utils.getStoredReplicas(bpid).hasNext()) {
Thread.sleep(100);
}
// then increase the file's replication factor
fs.setReplication(fileName, (short) 2);
// replication should succeed
DFSTestUtil.waitReplication(fs, fileName, (short) 1);
// clean up the file
fs.delete(fileName, false);
}
use of org.apache.hadoop.util.DataChecksum in project hadoop by apache.
the class TestSimulatedFSDataset method testGetMetaData.
@Test
public void testGetMetaData() throws IOException {
final SimulatedFSDataset fsdataset = getSimulatedFSDataset();
ExtendedBlock b = new ExtendedBlock(bpid, FIRST_BLK_ID, 5, 0);
try {
assertTrue(fsdataset.getMetaDataInputStream(b) == null);
assertTrue("Expected an IO exception", false);
} catch (IOException e) {
// ok - as expected
}
// Only need to add one but ....
addSomeBlocks(fsdataset);
b = new ExtendedBlock(bpid, FIRST_BLK_ID, 0, 0);
InputStream metaInput = fsdataset.getMetaDataInputStream(b);
DataInputStream metaDataInput = new DataInputStream(metaInput);
short version = metaDataInput.readShort();
assertEquals(BlockMetadataHeader.VERSION, version);
DataChecksum checksum = DataChecksum.newDataChecksum(metaDataInput);
assertEquals(DataChecksum.Type.NULL, checksum.getChecksumType());
assertEquals(0, checksum.getChecksumSize());
}
use of org.apache.hadoop.util.DataChecksum in project hadoop by apache.
the class FsVolumeImpl method loadLastPartialChunkChecksum.
@Override
public byte[] loadLastPartialChunkChecksum(File blockFile, File metaFile) throws IOException {
// readHeader closes the temporary FileInputStream.
DataChecksum dcs;
try (FileInputStream fis = fileIoProvider.getFileInputStream(this, metaFile)) {
dcs = BlockMetadataHeader.readHeader(fis).getChecksum();
}
final int checksumSize = dcs.getChecksumSize();
final long onDiskLen = blockFile.length();
final int bytesPerChecksum = dcs.getBytesPerChecksum();
if (onDiskLen % bytesPerChecksum == 0) {
// because it will not be modified.
return null;
}
long offsetInChecksum = BlockMetadataHeader.getHeaderSize() + (onDiskLen / bytesPerChecksum) * checksumSize;
byte[] lastChecksum = new byte[checksumSize];
try (RandomAccessFile raf = fileIoProvider.getRandomAccessFile(this, metaFile, "r")) {
raf.seek(offsetInChecksum);
int readBytes = raf.read(lastChecksum, 0, checksumSize);
if (readBytes == -1) {
throw new IOException("Expected to read " + checksumSize + " bytes from offset " + offsetInChecksum + " but reached end of file.");
} else if (readBytes != checksumSize) {
throw new IOException("Expected to read " + checksumSize + " bytes from offset " + offsetInChecksum + " but read " + readBytes + " bytes.");
}
}
return lastChecksum;
}
Aggregations