use of org.apache.hadoop.hdfs.protocol.datatransfer.PacketHeader in project hadoop by apache.
the class BlockSender method writePacketHeader.
/**
* Write packet header into {@code pkt},
* return the length of the header written.
*/
private int writePacketHeader(ByteBuffer pkt, int dataLen, int packetLen) {
pkt.clear();
// both syncBlock and syncPacket are false
PacketHeader header = new PacketHeader(packetLen, offset, seqno, (dataLen == 0), dataLen, false);
int size = header.getSerializedSize();
pkt.position(PacketHeader.PKT_MAX_HEADER_LEN - size);
header.putInBuffer(pkt);
return size;
}
use of org.apache.hadoop.hdfs.protocol.datatransfer.PacketHeader in project hadoop by apache.
the class BlockReceiver method receivePacket.
/**
* Receives and processes a packet. It can contain many chunks.
* returns the number of data bytes that the packet has.
*/
private int receivePacket() throws IOException {
// read the next packet
packetReceiver.receiveNextPacket(in);
PacketHeader header = packetReceiver.getHeader();
if (LOG.isDebugEnabled()) {
LOG.debug("Receiving one packet for block " + block + ": " + header);
}
// Sanity check the header
if (header.getOffsetInBlock() > replicaInfo.getNumBytes()) {
throw new IOException("Received an out-of-sequence packet for " + block + "from " + inAddr + " at offset " + header.getOffsetInBlock() + ". Expecting packet starting at " + replicaInfo.getNumBytes());
}
if (header.getDataLen() < 0) {
throw new IOException("Got wrong length during writeBlock(" + block + ") from " + inAddr + " at offset " + header.getOffsetInBlock() + ": " + header.getDataLen());
}
long offsetInBlock = header.getOffsetInBlock();
long seqno = header.getSeqno();
boolean lastPacketInBlock = header.isLastPacketInBlock();
final int len = header.getDataLen();
boolean syncBlock = header.getSyncBlock();
// avoid double sync'ing on close
if (syncBlock && lastPacketInBlock) {
this.syncOnClose = false;
}
// update received bytes
final long firstByteInBlock = offsetInBlock;
offsetInBlock += len;
if (replicaInfo.getNumBytes() < offsetInBlock) {
replicaInfo.setNumBytes(offsetInBlock);
}
// put in queue for pending acks, unless sync was requested
if (responder != null && !syncBlock && !shouldVerifyChecksum()) {
((PacketResponder) responder.getRunnable()).enqueue(seqno, lastPacketInBlock, offsetInBlock, Status.SUCCESS);
}
// Drop heartbeat for testing.
if (seqno < 0 && len == 0 && DataNodeFaultInjector.get().dropHeartbeatPacket()) {
return 0;
}
//First write the packet to the mirror:
if (mirrorOut != null && !mirrorError) {
try {
long begin = Time.monotonicNow();
// For testing. Normally no-op.
DataNodeFaultInjector.get().stopSendingPacketDownstream(mirrorAddr);
packetReceiver.mirrorPacketTo(mirrorOut);
mirrorOut.flush();
long now = Time.monotonicNow();
setLastSentTime(now);
long duration = now - begin;
DataNodeFaultInjector.get().logDelaySendingPacketDownstream(mirrorAddr, duration);
trackSendPacketToLastNodeInPipeline(duration);
if (duration > datanodeSlowLogThresholdMs) {
LOG.warn("Slow BlockReceiver write packet to mirror took " + duration + "ms (threshold=" + datanodeSlowLogThresholdMs + "ms)");
}
} catch (IOException e) {
handleMirrorOutError(e);
}
}
ByteBuffer dataBuf = packetReceiver.getDataSlice();
ByteBuffer checksumBuf = packetReceiver.getChecksumSlice();
if (lastPacketInBlock || len == 0) {
if (LOG.isDebugEnabled()) {
LOG.debug("Receiving an empty packet or the end of the block " + block);
}
// sync block if requested
if (syncBlock) {
flushOrSync(true);
}
} else {
final int checksumLen = diskChecksum.getChecksumSize(len);
final int checksumReceivedLen = checksumBuf.capacity();
if (checksumReceivedLen > 0 && checksumReceivedLen != checksumLen) {
throw new IOException("Invalid checksum length: received length is " + checksumReceivedLen + " but expected length is " + checksumLen);
}
if (checksumReceivedLen > 0 && shouldVerifyChecksum()) {
try {
verifyChunks(dataBuf, checksumBuf);
} catch (IOException ioe) {
// checksum error detected locally. there is no reason to continue.
if (responder != null) {
try {
((PacketResponder) responder.getRunnable()).enqueue(seqno, lastPacketInBlock, offsetInBlock, Status.ERROR_CHECKSUM);
// Wait until the responder sends back the response
// and interrupt this thread.
Thread.sleep(3000);
} catch (InterruptedException e) {
}
}
throw new IOException("Terminating due to a checksum error." + ioe);
}
if (needsChecksumTranslation) {
// overwrite the checksums in the packet buffer with the
// appropriate polynomial for the disk storage.
translateChunks(dataBuf, checksumBuf);
}
}
if (checksumReceivedLen == 0 && !streams.isTransientStorage()) {
// checksum is missing, need to calculate it
checksumBuf = ByteBuffer.allocate(checksumLen);
diskChecksum.calculateChunkedSums(dataBuf, checksumBuf);
}
// by this point, the data in the buffer uses the disk checksum
final boolean shouldNotWriteChecksum = checksumReceivedLen == 0 && streams.isTransientStorage();
try {
long onDiskLen = replicaInfo.getBytesOnDisk();
if (onDiskLen < offsetInBlock) {
// Normally the beginning of an incoming packet is aligned with the
// existing data on disk. If the beginning packet data offset is not
// checksum chunk aligned, the end of packet will not go beyond the
// next chunk boundary.
// When a failure-recovery is involved, the client state and the
// the datanode state may not exactly agree. I.e. the client may
// resend part of data that is already on disk. Correct number of
// bytes should be skipped when writing the data and checksum
// buffers out to disk.
long partialChunkSizeOnDisk = onDiskLen % bytesPerChecksum;
long lastChunkBoundary = onDiskLen - partialChunkSizeOnDisk;
boolean alignedOnDisk = partialChunkSizeOnDisk == 0;
boolean alignedInPacket = firstByteInBlock % bytesPerChecksum == 0;
// If the end of the on-disk data is not chunk-aligned, the last
// checksum needs to be overwritten.
boolean overwriteLastCrc = !alignedOnDisk && !shouldNotWriteChecksum;
// If the starting offset of the packat data is at the last chunk
// boundary of the data on disk, the partial checksum recalculation
// can be skipped and the checksum supplied by the client can be used
// instead. This reduces disk reads and cpu load.
boolean doCrcRecalc = overwriteLastCrc && (lastChunkBoundary != firstByteInBlock);
// chunk boundary.
if (!alignedInPacket && len > bytesPerChecksum) {
throw new IOException("Unexpected packet data length for " + block + " from " + inAddr + ": a partial chunk must be " + " sent in an individual packet (data length = " + len + " > bytesPerChecksum = " + bytesPerChecksum + ")");
}
// If the last portion of the block file is not a full chunk,
// then read in pre-existing partial data chunk and recalculate
// the checksum so that the checksum calculation can continue
// from the right state. If the client provided the checksum for
// the whole chunk, this is not necessary.
Checksum partialCrc = null;
if (doCrcRecalc) {
if (LOG.isDebugEnabled()) {
LOG.debug("receivePacket for " + block + ": previous write did not end at the chunk boundary." + " onDiskLen=" + onDiskLen);
}
long offsetInChecksum = BlockMetadataHeader.getHeaderSize() + onDiskLen / bytesPerChecksum * checksumSize;
partialCrc = computePartialChunkCrc(onDiskLen, offsetInChecksum);
}
// The data buffer position where write will begin. If the packet
// data and on-disk data have no overlap, this will not be at the
// beginning of the buffer.
int startByteToDisk = (int) (onDiskLen - firstByteInBlock) + dataBuf.arrayOffset() + dataBuf.position();
// Actual number of data bytes to write.
int numBytesToDisk = (int) (offsetInBlock - onDiskLen);
// Write data to disk.
long begin = Time.monotonicNow();
streams.writeDataToDisk(dataBuf.array(), startByteToDisk, numBytesToDisk);
long duration = Time.monotonicNow() - begin;
if (duration > datanodeSlowLogThresholdMs) {
LOG.warn("Slow BlockReceiver write data to disk cost:" + duration + "ms (threshold=" + datanodeSlowLogThresholdMs + "ms)");
}
if (duration > maxWriteToDiskMs) {
maxWriteToDiskMs = duration;
}
final byte[] lastCrc;
if (shouldNotWriteChecksum) {
lastCrc = null;
} else {
int skip = 0;
byte[] crcBytes = null;
// First, prepare to overwrite the partial crc at the end.
if (overwriteLastCrc) {
// not chunk-aligned on disk
// prepare to overwrite last checksum
adjustCrcFilePosition();
}
// CRC by reading the rest of the chunk, then write it out.
if (doCrcRecalc) {
// Calculate new crc for this chunk.
int bytesToReadForRecalc = (int) (bytesPerChecksum - partialChunkSizeOnDisk);
if (numBytesToDisk < bytesToReadForRecalc) {
bytesToReadForRecalc = numBytesToDisk;
}
partialCrc.update(dataBuf.array(), startByteToDisk, bytesToReadForRecalc);
byte[] buf = FSOutputSummer.convertToByteStream(partialCrc, checksumSize);
crcBytes = copyLastChunkChecksum(buf, checksumSize, buf.length);
checksumOut.write(buf);
if (LOG.isDebugEnabled()) {
LOG.debug("Writing out partial crc for data len " + len + ", skip=" + skip);
}
// For the partial chunk that was just read.
skip++;
}
// Determine how many checksums need to be skipped up to the last
// boundary. The checksum after the boundary was already counted
// above. Only count the number of checksums skipped up to the
// boundary here.
long skippedDataBytes = lastChunkBoundary - firstByteInBlock;
if (skippedDataBytes > 0) {
skip += (int) (skippedDataBytes / bytesPerChecksum) + ((skippedDataBytes % bytesPerChecksum == 0) ? 0 : 1);
}
// Convert to number of bytes
skip *= checksumSize;
// write the rest of checksum
final int offset = checksumBuf.arrayOffset() + checksumBuf.position() + skip;
final int end = offset + checksumLen - skip;
// more to write after that.
if (offset >= end && doCrcRecalc) {
lastCrc = crcBytes;
} else {
final int remainingBytes = checksumLen - skip;
lastCrc = copyLastChunkChecksum(checksumBuf.array(), checksumSize, end);
checksumOut.write(checksumBuf.array(), offset, remainingBytes);
}
}
/// flush entire packet, sync if requested
flushOrSync(syncBlock);
replicaInfo.setLastChecksumAndDataLen(offsetInBlock, lastCrc);
datanode.metrics.incrBytesWritten(len);
datanode.metrics.incrTotalWriteTime(duration);
manageWriterOsCache(offsetInBlock);
}
} catch (IOException iex) {
// Volume error check moved to FileIoProvider
throw iex;
}
}
// (after the fsync finished)
if (responder != null && (syncBlock || shouldVerifyChecksum())) {
((PacketResponder) responder.getRunnable()).enqueue(seqno, lastPacketInBlock, offsetInBlock, Status.SUCCESS);
}
/*
* Send in-progress responses for the replaceBlock() calls back to caller to
* avoid timeouts due to balancer throttling. HDFS-6247
*/
if (isReplaceBlock && (Time.monotonicNow() - lastResponseTime > responseInterval)) {
BlockOpResponseProto.Builder response = BlockOpResponseProto.newBuilder().setStatus(Status.IN_PROGRESS);
response.build().writeDelimitedTo(replyOut);
replyOut.flush();
lastResponseTime = Time.monotonicNow();
}
if (throttler != null) {
// throttle I/O
throttler.throttle(len);
}
return lastPacketInBlock ? -1 : len;
}
use of org.apache.hadoop.hdfs.protocol.datatransfer.PacketHeader in project hadoop by apache.
the class TestDataTransferProtocol method testPacketHeader.
@Test
public void testPacketHeader() throws IOException {
PacketHeader hdr = new PacketHeader(// size of packet
4, // OffsetInBlock
1024, // sequencenumber
100, // lastPacketInBlock
false, // chunk length
4096, false);
ByteArrayOutputStream baos = new ByteArrayOutputStream();
hdr.write(new DataOutputStream(baos));
// Read back using DataInput
PacketHeader readBack = new PacketHeader();
ByteArrayInputStream bais = new ByteArrayInputStream(baos.toByteArray());
readBack.readFields(new DataInputStream(bais));
assertEquals(hdr, readBack);
// Read back using ByteBuffer
readBack = new PacketHeader();
readBack.readFields(ByteBuffer.wrap(baos.toByteArray()));
assertEquals(hdr, readBack);
assertTrue(hdr.sanityCheck(99));
assertFalse(hdr.sanityCheck(100));
}
use of org.apache.hadoop.hdfs.protocol.datatransfer.PacketHeader in project hadoop by apache.
the class TestDataTransferProtocol method testDataTransferProtocol.
@Test
public void testDataTransferProtocol() throws IOException {
Random random = new Random();
int oneMil = 1024 * 1024;
Path file = new Path("dataprotocol.dat");
int numDataNodes = 1;
Configuration conf = new HdfsConfiguration();
conf.setInt(DFSConfigKeys.DFS_REPLICATION_KEY, numDataNodes);
MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(numDataNodes).build();
try {
cluster.waitActive();
datanode = cluster.getFileSystem().getDataNodeStats(DatanodeReportType.LIVE)[0];
dnAddr = NetUtils.createSocketAddr(datanode.getXferAddr());
FileSystem fileSys = cluster.getFileSystem();
int fileLen = Math.min(conf.getInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, 4096), 4096);
DFSTestUtil.createFile(fileSys, file, fileLen, fileLen, fileSys.getDefaultBlockSize(file), fileSys.getDefaultReplication(file), 0L);
// get the first blockid for the file
final ExtendedBlock firstBlock = DFSTestUtil.getFirstBlock(fileSys, file);
final String poolId = firstBlock.getBlockPoolId();
long newBlockId = firstBlock.getBlockId() + 1;
recvBuf.reset();
sendBuf.reset();
// bad version
recvOut.writeShort((short) (DataTransferProtocol.DATA_TRANSFER_VERSION - 1));
sendOut.writeShort((short) (DataTransferProtocol.DATA_TRANSFER_VERSION - 1));
sendRecvData("Wrong Version", true);
// bad ops
sendBuf.reset();
sendOut.writeShort((short) DataTransferProtocol.DATA_TRANSFER_VERSION);
sendOut.writeByte(Op.WRITE_BLOCK.code - 1);
sendRecvData("Wrong Op Code", true);
/* Test OP_WRITE_BLOCK */
sendBuf.reset();
DataChecksum badChecksum = Mockito.spy(DEFAULT_CHECKSUM);
Mockito.doReturn(-1).when(badChecksum).getBytesPerChecksum();
writeBlock(poolId, newBlockId, badChecksum);
recvBuf.reset();
sendResponse(Status.ERROR, null, null, recvOut);
sendRecvData("wrong bytesPerChecksum while writing", true);
sendBuf.reset();
recvBuf.reset();
writeBlock(poolId, ++newBlockId, DEFAULT_CHECKSUM);
PacketHeader hdr = new PacketHeader(// size of packet
4, // offset in block,
0, // seqno
100, // last packet
false, // bad datalen
-1 - random.nextInt(oneMil), false);
hdr.write(sendOut);
sendResponse(Status.SUCCESS, "", null, recvOut);
new PipelineAck(100, new int[] { PipelineAck.combineHeader(PipelineAck.ECN.DISABLED, Status.ERROR) }).write(recvOut);
sendRecvData("negative DATA_CHUNK len while writing block " + newBlockId, true);
// test for writing a valid zero size block
sendBuf.reset();
recvBuf.reset();
writeBlock(poolId, ++newBlockId, DEFAULT_CHECKSUM);
hdr = new PacketHeader(// size of packet
8, // OffsetInBlock
0, // sequencenumber
100, // lastPacketInBlock
true, // chunk length
0, false);
hdr.write(sendOut);
// zero checksum
sendOut.writeInt(0);
sendOut.flush();
//ok finally write a block with 0 len
sendResponse(Status.SUCCESS, "", null, recvOut);
new PipelineAck(100, new int[] { PipelineAck.combineHeader(PipelineAck.ECN.DISABLED, Status.SUCCESS) }).write(recvOut);
sendRecvData("Writing a zero len block blockid " + newBlockId, false);
/* Test OP_READ_BLOCK */
String bpid = cluster.getNamesystem().getBlockPoolId();
ExtendedBlock blk = new ExtendedBlock(bpid, firstBlock.getLocalBlock());
long blkid = blk.getBlockId();
// bad block id
sendBuf.reset();
recvBuf.reset();
blk.setBlockId(blkid - 1);
sender.readBlock(blk, BlockTokenSecretManager.DUMMY_TOKEN, "cl", 0L, fileLen, true, CachingStrategy.newDefaultStrategy());
sendRecvData("Wrong block ID " + newBlockId + " for read", false);
// negative block start offset -1L
sendBuf.reset();
blk.setBlockId(blkid);
sender.readBlock(blk, BlockTokenSecretManager.DUMMY_TOKEN, "cl", -1L, fileLen, true, CachingStrategy.newDefaultStrategy());
sendRecvData("Negative start-offset for read for block " + firstBlock.getBlockId(), false);
// bad block start offset
sendBuf.reset();
sender.readBlock(blk, BlockTokenSecretManager.DUMMY_TOKEN, "cl", fileLen, fileLen, true, CachingStrategy.newDefaultStrategy());
sendRecvData("Wrong start-offset for reading block " + firstBlock.getBlockId(), false);
// negative length is ok. Datanode assumes we want to read the whole block.
recvBuf.reset();
BlockOpResponseProto.newBuilder().setStatus(Status.SUCCESS).setReadOpChecksumInfo(ReadOpChecksumInfoProto.newBuilder().setChecksum(DataTransferProtoUtil.toProto(DEFAULT_CHECKSUM)).setChunkOffset(0L)).build().writeDelimitedTo(recvOut);
sendBuf.reset();
sender.readBlock(blk, BlockTokenSecretManager.DUMMY_TOKEN, "cl", 0L, -1L - random.nextInt(oneMil), true, CachingStrategy.newDefaultStrategy());
sendRecvData("Negative length for reading block " + firstBlock.getBlockId(), false);
// length is more than size of block.
recvBuf.reset();
sendResponse(Status.ERROR, null, "opReadBlock " + firstBlock + " received exception java.io.IOException: " + "Offset 0 and length 4097 don't match block " + firstBlock + " ( blockLen 4096 )", recvOut);
sendBuf.reset();
sender.readBlock(blk, BlockTokenSecretManager.DUMMY_TOKEN, "cl", 0L, fileLen + 1, true, CachingStrategy.newDefaultStrategy());
sendRecvData("Wrong length for reading block " + firstBlock.getBlockId(), false);
//At the end of all this, read the file to make sure that succeeds finally.
sendBuf.reset();
sender.readBlock(blk, BlockTokenSecretManager.DUMMY_TOKEN, "cl", 0L, fileLen, true, CachingStrategy.newDefaultStrategy());
readFile(fileSys, file, fileLen);
} finally {
cluster.shutdown();
}
}
use of org.apache.hadoop.hdfs.protocol.datatransfer.PacketHeader in project hadoop by apache.
the class TestDataTransferProtocol method writeZeroLengthPacket.
private void writeZeroLengthPacket(ExtendedBlock block, String description) throws IOException {
PacketHeader hdr = new PacketHeader(// size of packet
8, // OffsetInBlock
block.getNumBytes(), // sequencenumber
100, // lastPacketInBlock
true, // chunk length
0, // sync block
false);
hdr.write(sendOut);
// zero checksum
sendOut.writeInt(0);
//ok finally write a block with 0 len
sendResponse(Status.SUCCESS, "", null, recvOut);
new PipelineAck(100, new int[] { PipelineAck.combineHeader(PipelineAck.ECN.DISABLED, Status.SUCCESS) }).write(recvOut);
sendRecvData(description, false);
}
Aggregations