Search in sources :

Example 1 with Checksum

use of java.util.zip.Checksum in project hadoop by apache.

the class TestShuffleHandler method createIndexFile.

private static void createIndexFile(File indexFile, Configuration conf) throws IOException {
    if (indexFile.exists()) {
        System.out.println("Deleting existing file");
        indexFile.delete();
    }
    indexFile.createNewFile();
    FSDataOutputStream output = FileSystem.getLocal(conf).getRaw().append(new Path(indexFile.getAbsolutePath()));
    Checksum crc = new PureJavaCrc32();
    crc.reset();
    CheckedOutputStream chk = new CheckedOutputStream(output, crc);
    String msg = "Writing new index file. This file will be used only " + "for the testing.";
    chk.write(Arrays.copyOf(msg.getBytes(), MapTask.MAP_OUTPUT_INDEX_RECORD_LENGTH));
    output.writeLong(chk.getChecksum().getValue());
    output.close();
}
Also used : Path(org.apache.hadoop.fs.Path) PureJavaCrc32(org.apache.hadoop.util.PureJavaCrc32) Checksum(java.util.zip.Checksum) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) CheckedOutputStream(java.util.zip.CheckedOutputStream)

Example 2 with Checksum

use of java.util.zip.Checksum in project hadoop by apache.

the class GenSort method outputRecords.

public static void outputRecords(OutputStream out, boolean useAscii, Unsigned16 firstRecordNumber, Unsigned16 recordsToGenerate, Unsigned16 checksum) throws IOException {
    byte[] row = new byte[100];
    Unsigned16 recordNumber = new Unsigned16(firstRecordNumber);
    Unsigned16 lastRecordNumber = new Unsigned16(firstRecordNumber);
    Checksum crc = new PureJavaCrc32();
    Unsigned16 tmp = new Unsigned16();
    lastRecordNumber.add(recordsToGenerate);
    Unsigned16 ONE = new Unsigned16(1);
    Unsigned16 rand = Random16.skipAhead(firstRecordNumber);
    while (!recordNumber.equals(lastRecordNumber)) {
        Random16.nextRand(rand);
        if (useAscii) {
            generateAsciiRecord(row, rand, recordNumber);
        } else {
            generateRecord(row, rand, recordNumber);
        }
        if (checksum != null) {
            crc.reset();
            crc.update(row, 0, row.length);
            tmp.set(crc.getValue());
            checksum.add(tmp);
        }
        recordNumber.add(ONE);
        out.write(row);
    }
}
Also used : PureJavaCrc32(org.apache.hadoop.util.PureJavaCrc32) Checksum(java.util.zip.Checksum)

Example 3 with Checksum

use of java.util.zip.Checksum in project hadoop by apache.

the class BlockReceiver method receivePacket.

/** 
   * Receives and processes a packet. It can contain many chunks.
   * returns the number of data bytes that the packet has.
   */
private int receivePacket() throws IOException {
    // read the next packet
    packetReceiver.receiveNextPacket(in);
    PacketHeader header = packetReceiver.getHeader();
    if (LOG.isDebugEnabled()) {
        LOG.debug("Receiving one packet for block " + block + ": " + header);
    }
    // Sanity check the header
    if (header.getOffsetInBlock() > replicaInfo.getNumBytes()) {
        throw new IOException("Received an out-of-sequence packet for " + block + "from " + inAddr + " at offset " + header.getOffsetInBlock() + ". Expecting packet starting at " + replicaInfo.getNumBytes());
    }
    if (header.getDataLen() < 0) {
        throw new IOException("Got wrong length during writeBlock(" + block + ") from " + inAddr + " at offset " + header.getOffsetInBlock() + ": " + header.getDataLen());
    }
    long offsetInBlock = header.getOffsetInBlock();
    long seqno = header.getSeqno();
    boolean lastPacketInBlock = header.isLastPacketInBlock();
    final int len = header.getDataLen();
    boolean syncBlock = header.getSyncBlock();
    // avoid double sync'ing on close
    if (syncBlock && lastPacketInBlock) {
        this.syncOnClose = false;
    }
    // update received bytes
    final long firstByteInBlock = offsetInBlock;
    offsetInBlock += len;
    if (replicaInfo.getNumBytes() < offsetInBlock) {
        replicaInfo.setNumBytes(offsetInBlock);
    }
    // put in queue for pending acks, unless sync was requested
    if (responder != null && !syncBlock && !shouldVerifyChecksum()) {
        ((PacketResponder) responder.getRunnable()).enqueue(seqno, lastPacketInBlock, offsetInBlock, Status.SUCCESS);
    }
    // Drop heartbeat for testing.
    if (seqno < 0 && len == 0 && DataNodeFaultInjector.get().dropHeartbeatPacket()) {
        return 0;
    }
    //First write the packet to the mirror:
    if (mirrorOut != null && !mirrorError) {
        try {
            long begin = Time.monotonicNow();
            // For testing. Normally no-op.
            DataNodeFaultInjector.get().stopSendingPacketDownstream(mirrorAddr);
            packetReceiver.mirrorPacketTo(mirrorOut);
            mirrorOut.flush();
            long now = Time.monotonicNow();
            setLastSentTime(now);
            long duration = now - begin;
            DataNodeFaultInjector.get().logDelaySendingPacketDownstream(mirrorAddr, duration);
            trackSendPacketToLastNodeInPipeline(duration);
            if (duration > datanodeSlowLogThresholdMs) {
                LOG.warn("Slow BlockReceiver write packet to mirror took " + duration + "ms (threshold=" + datanodeSlowLogThresholdMs + "ms)");
            }
        } catch (IOException e) {
            handleMirrorOutError(e);
        }
    }
    ByteBuffer dataBuf = packetReceiver.getDataSlice();
    ByteBuffer checksumBuf = packetReceiver.getChecksumSlice();
    if (lastPacketInBlock || len == 0) {
        if (LOG.isDebugEnabled()) {
            LOG.debug("Receiving an empty packet or the end of the block " + block);
        }
        // sync block if requested
        if (syncBlock) {
            flushOrSync(true);
        }
    } else {
        final int checksumLen = diskChecksum.getChecksumSize(len);
        final int checksumReceivedLen = checksumBuf.capacity();
        if (checksumReceivedLen > 0 && checksumReceivedLen != checksumLen) {
            throw new IOException("Invalid checksum length: received length is " + checksumReceivedLen + " but expected length is " + checksumLen);
        }
        if (checksumReceivedLen > 0 && shouldVerifyChecksum()) {
            try {
                verifyChunks(dataBuf, checksumBuf);
            } catch (IOException ioe) {
                // checksum error detected locally. there is no reason to continue.
                if (responder != null) {
                    try {
                        ((PacketResponder) responder.getRunnable()).enqueue(seqno, lastPacketInBlock, offsetInBlock, Status.ERROR_CHECKSUM);
                        // Wait until the responder sends back the response
                        // and interrupt this thread.
                        Thread.sleep(3000);
                    } catch (InterruptedException e) {
                    }
                }
                throw new IOException("Terminating due to a checksum error." + ioe);
            }
            if (needsChecksumTranslation) {
                // overwrite the checksums in the packet buffer with the
                // appropriate polynomial for the disk storage.
                translateChunks(dataBuf, checksumBuf);
            }
        }
        if (checksumReceivedLen == 0 && !streams.isTransientStorage()) {
            // checksum is missing, need to calculate it
            checksumBuf = ByteBuffer.allocate(checksumLen);
            diskChecksum.calculateChunkedSums(dataBuf, checksumBuf);
        }
        // by this point, the data in the buffer uses the disk checksum
        final boolean shouldNotWriteChecksum = checksumReceivedLen == 0 && streams.isTransientStorage();
        try {
            long onDiskLen = replicaInfo.getBytesOnDisk();
            if (onDiskLen < offsetInBlock) {
                // Normally the beginning of an incoming packet is aligned with the
                // existing data on disk. If the beginning packet data offset is not
                // checksum chunk aligned, the end of packet will not go beyond the
                // next chunk boundary.
                // When a failure-recovery is involved, the client state and the
                // the datanode state may not exactly agree. I.e. the client may
                // resend part of data that is already on disk. Correct number of
                // bytes should be skipped when writing the data and checksum
                // buffers out to disk.
                long partialChunkSizeOnDisk = onDiskLen % bytesPerChecksum;
                long lastChunkBoundary = onDiskLen - partialChunkSizeOnDisk;
                boolean alignedOnDisk = partialChunkSizeOnDisk == 0;
                boolean alignedInPacket = firstByteInBlock % bytesPerChecksum == 0;
                // If the end of the on-disk data is not chunk-aligned, the last
                // checksum needs to be overwritten.
                boolean overwriteLastCrc = !alignedOnDisk && !shouldNotWriteChecksum;
                // If the starting offset of the packat data is at the last chunk
                // boundary of the data on disk, the partial checksum recalculation
                // can be skipped and the checksum supplied by the client can be used
                // instead. This reduces disk reads and cpu load.
                boolean doCrcRecalc = overwriteLastCrc && (lastChunkBoundary != firstByteInBlock);
                // chunk boundary.
                if (!alignedInPacket && len > bytesPerChecksum) {
                    throw new IOException("Unexpected packet data length for " + block + " from " + inAddr + ": a partial chunk must be " + " sent in an individual packet (data length = " + len + " > bytesPerChecksum = " + bytesPerChecksum + ")");
                }
                // If the last portion of the block file is not a full chunk,
                // then read in pre-existing partial data chunk and recalculate
                // the checksum so that the checksum calculation can continue
                // from the right state. If the client provided the checksum for
                // the whole chunk, this is not necessary.
                Checksum partialCrc = null;
                if (doCrcRecalc) {
                    if (LOG.isDebugEnabled()) {
                        LOG.debug("receivePacket for " + block + ": previous write did not end at the chunk boundary." + " onDiskLen=" + onDiskLen);
                    }
                    long offsetInChecksum = BlockMetadataHeader.getHeaderSize() + onDiskLen / bytesPerChecksum * checksumSize;
                    partialCrc = computePartialChunkCrc(onDiskLen, offsetInChecksum);
                }
                // The data buffer position where write will begin. If the packet
                // data and on-disk data have no overlap, this will not be at the
                // beginning of the buffer.
                int startByteToDisk = (int) (onDiskLen - firstByteInBlock) + dataBuf.arrayOffset() + dataBuf.position();
                // Actual number of data bytes to write.
                int numBytesToDisk = (int) (offsetInBlock - onDiskLen);
                // Write data to disk.
                long begin = Time.monotonicNow();
                streams.writeDataToDisk(dataBuf.array(), startByteToDisk, numBytesToDisk);
                long duration = Time.monotonicNow() - begin;
                if (duration > datanodeSlowLogThresholdMs) {
                    LOG.warn("Slow BlockReceiver write data to disk cost:" + duration + "ms (threshold=" + datanodeSlowLogThresholdMs + "ms)");
                }
                if (duration > maxWriteToDiskMs) {
                    maxWriteToDiskMs = duration;
                }
                final byte[] lastCrc;
                if (shouldNotWriteChecksum) {
                    lastCrc = null;
                } else {
                    int skip = 0;
                    byte[] crcBytes = null;
                    // First, prepare to overwrite the partial crc at the end.
                    if (overwriteLastCrc) {
                        // not chunk-aligned on disk
                        // prepare to overwrite last checksum
                        adjustCrcFilePosition();
                    }
                    // CRC by reading the rest of the chunk, then write it out.
                    if (doCrcRecalc) {
                        // Calculate new crc for this chunk.
                        int bytesToReadForRecalc = (int) (bytesPerChecksum - partialChunkSizeOnDisk);
                        if (numBytesToDisk < bytesToReadForRecalc) {
                            bytesToReadForRecalc = numBytesToDisk;
                        }
                        partialCrc.update(dataBuf.array(), startByteToDisk, bytesToReadForRecalc);
                        byte[] buf = FSOutputSummer.convertToByteStream(partialCrc, checksumSize);
                        crcBytes = copyLastChunkChecksum(buf, checksumSize, buf.length);
                        checksumOut.write(buf);
                        if (LOG.isDebugEnabled()) {
                            LOG.debug("Writing out partial crc for data len " + len + ", skip=" + skip);
                        }
                        //  For the partial chunk that was just read.
                        skip++;
                    }
                    // Determine how many checksums need to be skipped up to the last
                    // boundary. The checksum after the boundary was already counted
                    // above. Only count the number of checksums skipped up to the
                    // boundary here.
                    long skippedDataBytes = lastChunkBoundary - firstByteInBlock;
                    if (skippedDataBytes > 0) {
                        skip += (int) (skippedDataBytes / bytesPerChecksum) + ((skippedDataBytes % bytesPerChecksum == 0) ? 0 : 1);
                    }
                    // Convert to number of bytes
                    skip *= checksumSize;
                    // write the rest of checksum
                    final int offset = checksumBuf.arrayOffset() + checksumBuf.position() + skip;
                    final int end = offset + checksumLen - skip;
                    // more to write after that.
                    if (offset >= end && doCrcRecalc) {
                        lastCrc = crcBytes;
                    } else {
                        final int remainingBytes = checksumLen - skip;
                        lastCrc = copyLastChunkChecksum(checksumBuf.array(), checksumSize, end);
                        checksumOut.write(checksumBuf.array(), offset, remainingBytes);
                    }
                }
                /// flush entire packet, sync if requested
                flushOrSync(syncBlock);
                replicaInfo.setLastChecksumAndDataLen(offsetInBlock, lastCrc);
                datanode.metrics.incrBytesWritten(len);
                datanode.metrics.incrTotalWriteTime(duration);
                manageWriterOsCache(offsetInBlock);
            }
        } catch (IOException iex) {
            // Volume error check moved to FileIoProvider
            throw iex;
        }
    }
    // (after the fsync finished)
    if (responder != null && (syncBlock || shouldVerifyChecksum())) {
        ((PacketResponder) responder.getRunnable()).enqueue(seqno, lastPacketInBlock, offsetInBlock, Status.SUCCESS);
    }
    /*
     * Send in-progress responses for the replaceBlock() calls back to caller to
     * avoid timeouts due to balancer throttling. HDFS-6247
     */
    if (isReplaceBlock && (Time.monotonicNow() - lastResponseTime > responseInterval)) {
        BlockOpResponseProto.Builder response = BlockOpResponseProto.newBuilder().setStatus(Status.IN_PROGRESS);
        response.build().writeDelimitedTo(replyOut);
        replyOut.flush();
        lastResponseTime = Time.monotonicNow();
    }
    if (throttler != null) {
        // throttle I/O
        throttler.throttle(len);
    }
    return lastPacketInBlock ? -1 : len;
}
Also used : Checksum(java.util.zip.Checksum) DataChecksum(org.apache.hadoop.util.DataChecksum) BlockOpResponseProto(org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.BlockOpResponseProto) PacketHeader(org.apache.hadoop.hdfs.protocol.datatransfer.PacketHeader) IOException(java.io.IOException) ByteBuffer(java.nio.ByteBuffer)

Example 4 with Checksum

use of java.util.zip.Checksum in project neo4j by neo4j.

the class EphemeralFileSystemAbstraction method checksum.

public long checksum() {
    Checksum checksum = new CRC32();
    byte[] data = new byte[(int) ByteUnit.kibiBytes(1)];
    // Go through file name list in sorted order, so that checksum is consistent
    List<File> names = new ArrayList<>(files.size());
    names.addAll(files.keySet());
    names.sort(Comparator.comparing(File::getAbsolutePath));
    for (File name : names) {
        EphemeralFileData file = files.get(name);
        ByteBuffer buf = file.fileAsBuffer.buf();
        buf.position(0);
        while (buf.position() < buf.limit()) {
            int len = Math.min(data.length, buf.limit() - buf.position());
            buf.get(data);
            checksum.update(data, 0, len);
        }
    }
    return checksum.getValue();
}
Also used : CRC32(java.util.zip.CRC32) Checksum(java.util.zip.Checksum) ArrayList(java.util.ArrayList) File(java.io.File) ByteBuffer(java.nio.ByteBuffer) MappedByteBuffer(java.nio.MappedByteBuffer)

Example 5 with Checksum

use of java.util.zip.Checksum in project netty by netty.

the class FastLzFrameEncoder method encode.

@Override
protected void encode(ChannelHandlerContext ctx, ByteBuf in, ByteBuf out) throws Exception {
    final Checksum checksum = this.checksum;
    for (; ; ) {
        if (!in.isReadable()) {
            return;
        }
        final int idx = in.readerIndex();
        final int length = Math.min(in.readableBytes(), MAX_CHUNK_LENGTH);
        final int outputIdx = out.writerIndex();
        out.setMedium(outputIdx, MAGIC_NUMBER);
        int outputOffset = outputIdx + CHECKSUM_OFFSET + (checksum != null ? 4 : 0);
        final byte blockType;
        final int chunkLength;
        if (length < MIN_LENGTH_TO_COMPRESSION) {
            blockType = BLOCK_TYPE_NON_COMPRESSED;
            out.ensureWritable(outputOffset + 2 + length);
            final byte[] output = out.array();
            final int outputPtr = out.arrayOffset() + outputOffset + 2;
            if (checksum != null) {
                final byte[] input;
                final int inputPtr;
                if (in.hasArray()) {
                    input = in.array();
                    inputPtr = in.arrayOffset() + idx;
                } else {
                    input = new byte[length];
                    in.getBytes(idx, input);
                    inputPtr = 0;
                }
                checksum.reset();
                checksum.update(input, inputPtr, length);
                out.setInt(outputIdx + CHECKSUM_OFFSET, (int) checksum.getValue());
                System.arraycopy(input, inputPtr, output, outputPtr, length);
            } else {
                in.getBytes(idx, output, outputPtr, length);
            }
            chunkLength = length;
        } else {
            // try to compress
            final byte[] input;
            final int inputPtr;
            if (in.hasArray()) {
                input = in.array();
                inputPtr = in.arrayOffset() + idx;
            } else {
                input = new byte[length];
                in.getBytes(idx, input);
                inputPtr = 0;
            }
            if (checksum != null) {
                checksum.reset();
                checksum.update(input, inputPtr, length);
                out.setInt(outputIdx + CHECKSUM_OFFSET, (int) checksum.getValue());
            }
            final int maxOutputLength = calculateOutputBufferLength(length);
            out.ensureWritable(outputOffset + 4 + maxOutputLength);
            final byte[] output = out.array();
            final int outputPtr = out.arrayOffset() + outputOffset + 4;
            final int compressedLength = compress(input, inputPtr, length, output, outputPtr, level);
            if (compressedLength < length) {
                blockType = BLOCK_TYPE_COMPRESSED;
                chunkLength = compressedLength;
                out.setShort(outputOffset, chunkLength);
                outputOffset += 2;
            } else {
                blockType = BLOCK_TYPE_NON_COMPRESSED;
                System.arraycopy(input, inputPtr, output, outputPtr - 2, length);
                chunkLength = length;
            }
        }
        out.setShort(outputOffset, length);
        out.setByte(outputIdx + OPTIONS_OFFSET, blockType | (checksum != null ? BLOCK_WITH_CHECKSUM : BLOCK_WITHOUT_CHECKSUM));
        out.writerIndex(outputOffset + 2 + chunkLength);
        in.skipBytes(length);
    }
}
Also used : Checksum(java.util.zip.Checksum)

Aggregations

Checksum (java.util.zip.Checksum)87 CRC32 (java.util.zip.CRC32)29 IOException (java.io.IOException)18 ByteBuffer (java.nio.ByteBuffer)15 Adler32 (java.util.zip.Adler32)14 File (java.io.File)9 EOFException (java.io.EOFException)7 InputStream (java.io.InputStream)7 FileInputStream (java.io.FileInputStream)6 Path (java.nio.file.Path)6 Test (org.junit.Test)5 Test (org.junit.jupiter.api.Test)5 StoreChannel (org.neo4j.io.fs.StoreChannel)5 BufferedOutputStream (java.io.BufferedOutputStream)4 CheckedInputStream (java.util.zip.CheckedInputStream)4 BinaryInputArchive (org.apache.jute.BinaryInputArchive)4 Record (org.apache.jute.Record)4 TxnHeader (org.apache.zookeeper.txn.TxnHeader)4 ByteArrayInputStream (java.io.ByteArrayInputStream)3 FileOutputStream (java.io.FileOutputStream)3