Search in sources :

Example 1 with DataChecksum

use of org.apache.hadoop.util.DataChecksum in project hadoop by apache.

the class StripedWriter method init.

void init() throws IOException {
    DataChecksum checksum = reconstructor.getChecksum();
    checksumSize = checksum.getChecksumSize();
    bytesPerChecksum = checksum.getBytesPerChecksum();
    int chunkSize = bytesPerChecksum + checksumSize;
    maxChunksPerPacket = Math.max((WRITE_PACKET_SIZE - PacketHeader.PKT_MAX_HEADER_LEN) / chunkSize, 1);
    int maxPacketSize = chunkSize * maxChunksPerPacket + PacketHeader.PKT_MAX_HEADER_LEN;
    packetBuf = new byte[maxPacketSize];
    int tmpLen = checksumSize * (reconstructor.getBufferSize() / bytesPerChecksum);
    checksumBuf = new byte[tmpLen];
    if (initTargetStreams() == 0) {
        String error = "All targets are failed.";
        throw new IOException(error);
Also used : IOException( DataChecksum(org.apache.hadoop.util.DataChecksum)

Example 2 with DataChecksum

use of org.apache.hadoop.util.DataChecksum in project hadoop by apache.

the class BlockPoolSlice method validateIntegrityAndSetLength.

   * Find out the number of bytes in the block that match its crc.
   * This algorithm assumes that data corruption caused by unexpected
   * datanode shutdown occurs only in the last crc chunk. So it checks
   * only the last chunk.
   * @param blockFile the block file
   * @param genStamp generation stamp of the block
   * @return the number of valid bytes
private long validateIntegrityAndSetLength(File blockFile, long genStamp) {
    try {
        final File metaFile = FsDatasetUtil.getMetaFile(blockFile, genStamp);
        long blockFileLen = blockFile.length();
        long metaFileLen = metaFile.length();
        int crcHeaderLen = DataChecksum.getChecksumHeaderSize();
        if (!blockFile.exists() || blockFileLen == 0 || !metaFile.exists() || metaFileLen < crcHeaderLen) {
            return 0;
        try (DataInputStream checksumIn = new DataInputStream(new BufferedInputStream(fileIoProvider.getFileInputStream(volume, metaFile), ioFileBufferSize))) {
            // read and handle the common header here. For now just a version
            final DataChecksum checksum = BlockMetadataHeader.readDataChecksum(checksumIn, metaFile);
            int bytesPerChecksum = checksum.getBytesPerChecksum();
            int checksumSize = checksum.getChecksumSize();
            long numChunks = Math.min((blockFileLen + bytesPerChecksum - 1) / bytesPerChecksum, (metaFileLen - crcHeaderLen) / checksumSize);
            if (numChunks == 0) {
                return 0;
            try (InputStream blockIn = fileIoProvider.getFileInputStream(volume, blockFile);
                ReplicaInputStreams ris = new ReplicaInputStreams(blockIn, checksumIn, volume.obtainReference(), fileIoProvider)) {
                ris.skipChecksumFully((numChunks - 1) * checksumSize);
                long lastChunkStartPos = (numChunks - 1) * bytesPerChecksum;
                int lastChunkSize = (int) Math.min(bytesPerChecksum, blockFileLen - lastChunkStartPos);
                byte[] buf = new byte[lastChunkSize + checksumSize];
                ris.readChecksumFully(buf, lastChunkSize, checksumSize);
                ris.readDataFully(buf, 0, lastChunkSize);
                checksum.update(buf, 0, lastChunkSize);
                long validFileLength;
                if (, lastChunkSize)) {
                    // last chunk matches crc
                    validFileLength = lastChunkStartPos + lastChunkSize;
                } else {
                    // last chunk is corrupt
                    validFileLength = lastChunkStartPos;
                // truncate if extra bytes are present without CRC
                if (blockFile.length() > validFileLength) {
                    try (RandomAccessFile blockRAF = fileIoProvider.getRandomAccessFile(volume, blockFile, "rw")) {
                        // truncate blockFile
                return validFileLength;
    } catch (IOException e) {
        return 0;
Also used : ReplicaInputStreams(org.apache.hadoop.hdfs.server.datanode.fsdataset.ReplicaInputStreams) RandomAccessFile( BufferedInputStream( DataInputStream( BufferedInputStream( FileInputStream( InputStream( IOException( DataInputStream( RandomAccessFile( File( DataChecksum(org.apache.hadoop.util.DataChecksum)

Example 3 with DataChecksum

use of org.apache.hadoop.util.DataChecksum in project hadoop by apache.

the class TestDiskError method testReplicationError.

   * Test that when there is a failure replicating a block the temporary
   * and meta files are cleaned up and subsequent replication succeeds.
public void testReplicationError() throws Exception {
    // create a file of replication factor of 1
    final Path fileName = new Path("/test.txt");
    final int fileLen = 1;
    DFSTestUtil.createFile(fs, fileName, 1, (short) 1, 1L);
    DFSTestUtil.waitReplication(fs, fileName, (short) 1);
    // get the block belonged to the created file
    LocatedBlocks blocks = NameNodeAdapter.getBlockLocations(cluster.getNameNode(), fileName.toString(), 0, (long) fileLen);
    assertEquals("Should only find 1 block", blocks.locatedBlockCount(), 1);
    LocatedBlock block = blocks.get(0);
    // bring up a second datanode
    cluster.startDataNodes(conf, 1, true, null, null);
    final int sndNode = 1;
    DataNode datanode = cluster.getDataNodes().get(sndNode);
    FsDatasetTestUtils utils = cluster.getFsDatasetTestUtils(datanode);
    // replicate the block to the second datanode
    InetSocketAddress target = datanode.getXferAddress();
    Socket s = new Socket(target.getAddress(), target.getPort());
    // write the header.
    DataOutputStream out = new DataOutputStream(s.getOutputStream());
    DataChecksum checksum = DataChecksum.newDataChecksum(DataChecksum.Type.CRC32, 512);
    new Sender(out).writeBlock(block.getBlock(), StorageType.DEFAULT, BlockTokenSecretManager.DUMMY_TOKEN, "", new DatanodeInfo[0], new StorageType[0], null, BlockConstructionStage.PIPELINE_SETUP_CREATE, 1, 0L, 0L, 0L, checksum, CachingStrategy.newDefaultStrategy(), false, false, null);
    // close the connection before sending the content of the block
    // the temporary block & meta files should be deleted
    String bpid = cluster.getNamesystem().getBlockPoolId();
    while (utils.getStoredReplicas(bpid).hasNext()) {
    // then increase the file's replication factor
    fs.setReplication(fileName, (short) 2);
    // replication should succeed
    DFSTestUtil.waitReplication(fs, fileName, (short) 1);
    // clean up the file
    fs.delete(fileName, false);
Also used : Path(org.apache.hadoop.fs.Path) Sender(org.apache.hadoop.hdfs.protocol.datatransfer.Sender) InetSocketAddress( FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) DataOutputStream( LocatedBlocks(org.apache.hadoop.hdfs.protocol.LocatedBlocks) LocatedBlock(org.apache.hadoop.hdfs.protocol.LocatedBlock) Socket( DataChecksum(org.apache.hadoop.util.DataChecksum) Test(org.junit.Test)

Example 4 with DataChecksum

use of org.apache.hadoop.util.DataChecksum in project hadoop by apache.

the class TestSimulatedFSDataset method testGetMetaData.

public void testGetMetaData() throws IOException {
    final SimulatedFSDataset fsdataset = getSimulatedFSDataset();
    ExtendedBlock b = new ExtendedBlock(bpid, FIRST_BLK_ID, 5, 0);
    try {
        assertTrue(fsdataset.getMetaDataInputStream(b) == null);
        assertTrue("Expected an IO exception", false);
    } catch (IOException e) {
    // ok - as expected
    // Only need to add one but ....
    b = new ExtendedBlock(bpid, FIRST_BLK_ID, 0, 0);
    InputStream metaInput = fsdataset.getMetaDataInputStream(b);
    DataInputStream metaDataInput = new DataInputStream(metaInput);
    short version = metaDataInput.readShort();
    assertEquals(BlockMetadataHeader.VERSION, version);
    DataChecksum checksum = DataChecksum.newDataChecksum(metaDataInput);
    assertEquals(DataChecksum.Type.NULL, checksum.getChecksumType());
    assertEquals(0, checksum.getChecksumSize());
Also used : DataInputStream( InputStream( ExtendedBlock(org.apache.hadoop.hdfs.protocol.ExtendedBlock) IOException( DataInputStream( DataChecksum(org.apache.hadoop.util.DataChecksum) Test(org.junit.Test)

Example 5 with DataChecksum

use of org.apache.hadoop.util.DataChecksum in project hadoop by apache.

the class FsVolumeImpl method loadLastPartialChunkChecksum.

public byte[] loadLastPartialChunkChecksum(File blockFile, File metaFile) throws IOException {
    // readHeader closes the temporary FileInputStream.
    DataChecksum dcs;
    try (FileInputStream fis = fileIoProvider.getFileInputStream(this, metaFile)) {
        dcs = BlockMetadataHeader.readHeader(fis).getChecksum();
    final int checksumSize = dcs.getChecksumSize();
    final long onDiskLen = blockFile.length();
    final int bytesPerChecksum = dcs.getBytesPerChecksum();
    if (onDiskLen % bytesPerChecksum == 0) {
        // because it will not be modified.
        return null;
    long offsetInChecksum = BlockMetadataHeader.getHeaderSize() + (onDiskLen / bytesPerChecksum) * checksumSize;
    byte[] lastChecksum = new byte[checksumSize];
    try (RandomAccessFile raf = fileIoProvider.getRandomAccessFile(this, metaFile, "r")) {;
        int readBytes =, 0, checksumSize);
        if (readBytes == -1) {
            throw new IOException("Expected to read " + checksumSize + " bytes from offset " + offsetInChecksum + " but reached end of file.");
        } else if (readBytes != checksumSize) {
            throw new IOException("Expected to read " + checksumSize + " bytes from offset " + offsetInChecksum + " but read " + readBytes + " bytes.");
    return lastChecksum;
Also used : RandomAccessFile( IOException( FileInputStream( DataChecksum(org.apache.hadoop.util.DataChecksum)


DataChecksum (org.apache.hadoop.util.DataChecksum)21 IOException ( DataInputStream ( FileInputStream ( DataOutputStream ( File ( InputStream ( RandomAccessFile ( ByteBuffer (java.nio.ByteBuffer)4 BufferedInputStream ( BufferedOutputStream ( Path (org.apache.hadoop.fs.Path)3 Test (org.junit.Test)3 FileOutputStream ( InterruptedIOException ( InvocationTargetException (java.lang.reflect.InvocationTargetException)2 InetSocketAddress ( Socket ( HadoopIllegalArgumentException (org.apache.hadoop.HadoopIllegalArgumentException)2 Configuration (org.apache.hadoop.conf.Configuration)2