Search in sources :

Example 46 with Checksum

use of java.util.zip.Checksum in project zookeeper by apache.

the class FileTxnLog method append.

/**
 * append an entry to the transaction log
 * @param hdr the header of the transaction
 * @param txn the transaction part of the entry
 * returns true iff something appended, otw false
 */
public synchronized boolean append(TxnHeader hdr, Record txn) throws IOException {
    if (hdr == null) {
        return false;
    }
    if (hdr.getZxid() <= lastZxidSeen) {
        LOG.warn("Current zxid " + hdr.getZxid() + " is <= " + lastZxidSeen + " for " + hdr.getType());
    } else {
        lastZxidSeen = hdr.getZxid();
    }
    if (logStream == null) {
        if (LOG.isInfoEnabled()) {
            LOG.info("Creating new log file: " + Util.makeLogName(hdr.getZxid()));
        }
        logFileWrite = new File(logDir, Util.makeLogName(hdr.getZxid()));
        fos = new FileOutputStream(logFileWrite);
        logStream = new BufferedOutputStream(fos);
        oa = BinaryOutputArchive.getArchive(logStream);
        FileHeader fhdr = new FileHeader(TXNLOG_MAGIC, VERSION, dbId);
        fhdr.serialize(oa, "fileheader");
        // Make sure that the magic number is written before padding.
        logStream.flush();
        currentSize = fos.getChannel().position();
        streamsToFlush.add(fos);
    }
    currentSize = padFile(fos.getChannel());
    byte[] buf = Util.marshallTxnEntry(hdr, txn);
    if (buf == null || buf.length == 0) {
        throw new IOException("Faulty serialization for header " + "and txn");
    }
    Checksum crc = makeChecksumAlgorithm();
    crc.update(buf, 0, buf.length);
    oa.writeLong(crc.getValue(), "txnEntryCRC");
    Util.writeTxnBytes(oa, buf);
    return true;
}
Also used : Checksum(java.util.zip.Checksum) FileOutputStream(java.io.FileOutputStream) IOException(java.io.IOException) RandomAccessFile(java.io.RandomAccessFile) File(java.io.File) BufferedOutputStream(java.io.BufferedOutputStream)

Example 47 with Checksum

use of java.util.zip.Checksum in project processdash by dtuma.

the class PackageLaunchProfile method calculateContentToken.

private String calculateContentToken() throws IOException {
    List<File> files = new ArrayList<File>();
    for (FileSet fs : filesets) {
        DirectoryScanner ds = fs.getDirectoryScanner(getProject());
        for (String name : ds.getIncludedFiles()) files.add(new File(ds.getBasedir(), name));
    }
    if (files.isEmpty())
        throw new BuildException("You must designate at least one file " + "to include in the launch profile.");
    Collections.sort(files, FILENAME_SORTER);
    Checksum ck = new Adler32();
    for (File f : files) calcChecksum(f, ck);
    return Long.toString(Math.abs(ck.getValue()), Character.MAX_RADIX);
}
Also used : FileSet(org.apache.tools.ant.types.FileSet) Checksum(java.util.zip.Checksum) DirectoryScanner(org.apache.tools.ant.DirectoryScanner) ArrayList(java.util.ArrayList) BuildException(org.apache.tools.ant.BuildException) File(java.io.File) Adler32(java.util.zip.Adler32)

Example 48 with Checksum

use of java.util.zip.Checksum in project zookeeper by apache.

the class LogFormatter method main.

/**
 * @param args
 */
public static void main(String[] args) throws Exception {
    if (args.length != 1) {
        System.err.println("USAGE: LogFormatter log_file");
        System.exit(2);
    }
    FileInputStream fis = new FileInputStream(args[0]);
    BinaryInputArchive logStream = BinaryInputArchive.getArchive(fis);
    FileHeader fhdr = new FileHeader();
    fhdr.deserialize(logStream, "fileheader");
    if (fhdr.getMagic() != FileTxnLog.TXNLOG_MAGIC) {
        System.err.println("Invalid magic number for " + args[0]);
        System.exit(2);
    }
    System.out.println("ZooKeeper Transactional Log File with dbid " + fhdr.getDbid() + " txnlog format version " + fhdr.getVersion());
    int count = 0;
    while (true) {
        long crcValue;
        byte[] bytes;
        try {
            crcValue = logStream.readLong("crcvalue");
            bytes = logStream.readBuffer("txnEntry");
        } catch (EOFException e) {
            System.out.println("EOF reached after " + count + " txns.");
            return;
        }
        if (bytes.length == 0) {
            // Since we preallocate, we define EOF to be an
            // empty transaction
            System.out.println("EOF reached after " + count + " txns.");
            return;
        }
        Checksum crc = new Adler32();
        crc.update(bytes, 0, bytes.length);
        if (crcValue != crc.getValue()) {
            throw new IOException("CRC doesn't match " + crcValue + " vs " + crc.getValue());
        }
        TxnHeader hdr = new TxnHeader();
        Record txn = SerializeUtils.deserializeTxn(bytes, hdr);
        System.out.println(DateFormat.getDateTimeInstance(DateFormat.SHORT, DateFormat.LONG).format(new Date(hdr.getTime())) + " session 0x" + Long.toHexString(hdr.getClientId()) + " cxid 0x" + Long.toHexString(hdr.getCxid()) + " zxid 0x" + Long.toHexString(hdr.getZxid()) + " " + TraceFormatter.op2String(hdr.getType()) + " " + txn);
        if (logStream.readByte("EOR") != 'B') {
            LOG.error("Last transaction was partial.");
            throw new EOFException("Last transaction was partial.");
        }
        count++;
    }
}
Also used : IOException(java.io.IOException) FileInputStream(java.io.FileInputStream) Adler32(java.util.zip.Adler32) Date(java.util.Date) BinaryInputArchive(org.apache.jute.BinaryInputArchive) Checksum(java.util.zip.Checksum) EOFException(java.io.EOFException) Record(org.apache.jute.Record) FileHeader(org.apache.zookeeper.server.persistence.FileHeader) TxnHeader(org.apache.zookeeper.txn.TxnHeader)

Example 49 with Checksum

use of java.util.zip.Checksum in project nifi by apache.

the class TailFile method processTailFile.

private void processTailFile(final ProcessContext context, final ProcessSession session, final String tailFile) {
    // If user changes the file that is being tailed, we need to consume the already-rolled-over data according
    // to the Initial Start Position property
    boolean rolloverOccurred;
    TailFileObject tfo = states.get(tailFile);
    if (tfo.isTailFileChanged()) {
        rolloverOccurred = false;
        final String recoverPosition = context.getProperty(START_POSITION).getValue();
        if (START_BEGINNING_OF_TIME.getValue().equals(recoverPosition)) {
            recoverRolledFiles(context, session, tailFile, tfo.getExpectedRecoveryChecksum(), tfo.getState().getTimestamp(), tfo.getState().getPosition());
        } else if (START_CURRENT_FILE.getValue().equals(recoverPosition)) {
            cleanup();
            tfo.setState(new TailFileState(tailFile, null, null, 0L, 0L, 0L, null, tfo.getState().getBuffer()));
        } else {
            final String filename = tailFile;
            final File file = new File(filename);
            try {
                final FileChannel fileChannel = FileChannel.open(file.toPath(), StandardOpenOption.READ);
                getLogger().debug("Created FileChannel {} for {}", new Object[] { fileChannel, file });
                final Checksum checksum = new CRC32();
                final long position = file.length();
                final long timestamp = file.lastModified();
                try (final InputStream fis = new FileInputStream(file);
                    final CheckedInputStream in = new CheckedInputStream(fis, checksum)) {
                    StreamUtils.copy(in, new NullOutputStream(), position);
                }
                fileChannel.position(position);
                cleanup();
                tfo.setState(new TailFileState(filename, file, fileChannel, position, timestamp, file.length(), checksum, tfo.getState().getBuffer()));
            } catch (final IOException ioe) {
                getLogger().error("Attempted to position Reader at current position in file {} but failed to do so due to {}", new Object[] { file, ioe.toString() }, ioe);
                context.yield();
                return;
            }
        }
        tfo.setTailFileChanged(false);
    } else {
        // Recover any data that may have rolled over since the last time that this processor ran.
        // If expectedRecoveryChecksum != null, that indicates that this is the first iteration since processor was started, so use whatever checksum value
        // was present when the state was last persisted. In this case, we must then null out the value so that the next iteration won't keep using the "recovered"
        // value. If the value is null, then we know that either the processor has already recovered that data, or there was no state persisted. In either case,
        // use whatever checksum value is currently in the state.
        Long expectedChecksumValue = tfo.getExpectedRecoveryChecksum();
        if (expectedChecksumValue == null) {
            expectedChecksumValue = tfo.getState().getChecksum() == null ? null : tfo.getState().getChecksum().getValue();
        }
        rolloverOccurred = recoverRolledFiles(context, session, tailFile, expectedChecksumValue, tfo.getState().getTimestamp(), tfo.getState().getPosition());
        tfo.setExpectedRecoveryChecksum(null);
    }
    // initialize local variables from state object; this is done so that we can easily change the values throughout
    // the onTrigger method and then create a new state object after we finish processing the files.
    TailFileState state = tfo.getState();
    File file = state.getFile();
    FileChannel reader = state.getReader();
    Checksum checksum = state.getChecksum();
    if (checksum == null) {
        checksum = new CRC32();
    }
    long position = state.getPosition();
    long timestamp = state.getTimestamp();
    long length = state.getLength();
    // Create a reader if necessary.
    if (file == null || reader == null) {
        file = new File(tailFile);
        reader = createReader(file, position);
        if (reader == null) {
            context.yield();
            return;
        }
    }
    final long startNanos = System.nanoTime();
    // Check if file has rotated
    // We determine that the file has rotated if any of the following conditions are met:
    // 1. 'rolloverOccured' == true, which indicates that we have found a new file matching the rollover pattern.
    // 2. The file was modified after the timestamp in our state, AND the file is smaller than we expected. This satisfies
    // the case where we are tailing File A, and that file is then renamed (say to B) and a new file named A is created
    // and is written to. In such a case, File A may have a file size smaller than we have in our state, so we know that
    // it rolled over.
    // 3. The File Channel that we have indicates that the size of the file is different than file.length() indicates, AND
    // the File Channel also indicates that we have read all data in the file. This case may also occur in the same scenario
    // as #2, above. In this case, the File Channel is pointing to File A, but the 'file' object is pointing to File B. They
    // both have the same name but are different files. As a result, once we have consumed all data from the File Channel,
    // we want to roll over and consume data from the new file.
    boolean rotated = rolloverOccurred;
    if (!rotated) {
        final long fileLength = file.length();
        if (length > fileLength) {
            rotated = true;
        } else {
            try {
                final long readerSize = reader.size();
                final long readerPosition = reader.position();
                if (readerSize == readerPosition && readerSize != fileLength) {
                    rotated = true;
                }
            } catch (final IOException e) {
                getLogger().warn("Failed to determined the size or position of the File Channel when " + "determining if the file has rolled over. Will assume that the file being tailed has not rolled over", e);
            }
        }
    }
    if (rotated) {
        // Since file has rotated, we close the reader, create a new one, and then reset our state.
        try {
            reader.close();
            getLogger().debug("Closed FileChannel {}", new Object[] { reader, reader });
        } catch (final IOException ioe) {
            getLogger().warn("Failed to close reader for {} due to {}", new Object[] { file, ioe });
        }
        reader = createReader(file, 0L);
        position = 0L;
        checksum.reset();
    }
    if (file.length() == position || !file.exists()) {
        // no data to consume so rather than continually running, yield to allow other processors to use the thread.
        getLogger().debug("No data to consume; created no FlowFiles");
        tfo.setState(new TailFileState(tailFile, file, reader, position, timestamp, length, checksum, state.getBuffer()));
        persistState(tfo, context);
        context.yield();
        return;
    }
    // If there is data to consume, read as much as we can.
    final TailFileState currentState = state;
    final Checksum chksum = checksum;
    // data has been written to file. Stream it to a new FlowFile.
    FlowFile flowFile = session.create();
    final FileChannel fileReader = reader;
    final AtomicLong positionHolder = new AtomicLong(position);
    flowFile = session.write(flowFile, new OutputStreamCallback() {

        @Override
        public void process(final OutputStream rawOut) throws IOException {
            try (final OutputStream out = new BufferedOutputStream(rawOut)) {
                positionHolder.set(readLines(fileReader, currentState.getBuffer(), out, chksum));
            }
        }
    });
    // If there ended up being no data, just remove the FlowFile
    if (flowFile.getSize() == 0) {
        session.remove(flowFile);
        getLogger().debug("No data to consume; removed created FlowFile");
    } else {
        // determine filename for FlowFile by using <base filename of log file>.<initial offset>-<final offset>.<extension>
        final String tailFilename = file.getName();
        final String baseName = StringUtils.substringBeforeLast(tailFilename, ".");
        final String flowFileName;
        if (baseName.length() < tailFilename.length()) {
            flowFileName = baseName + "." + position + "-" + positionHolder.get() + "." + StringUtils.substringAfterLast(tailFilename, ".");
        } else {
            flowFileName = baseName + "." + position + "-" + positionHolder.get();
        }
        final Map<String, String> attributes = new HashMap<>(3);
        attributes.put(CoreAttributes.FILENAME.key(), flowFileName);
        attributes.put(CoreAttributes.MIME_TYPE.key(), "text/plain");
        attributes.put("tailfile.original.path", tailFile);
        flowFile = session.putAllAttributes(flowFile, attributes);
        session.getProvenanceReporter().receive(flowFile, file.toURI().toString(), "FlowFile contains bytes " + position + " through " + positionHolder.get() + " of source file", TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startNanos));
        session.transfer(flowFile, REL_SUCCESS);
        position = positionHolder.get();
        // Set timestamp to the latest of when the file was modified and the current timestamp stored in the state.
        // We do this because when we read a file that has been rolled over, we set the state to 1 millisecond later than the last mod date
        // in order to avoid ingesting that file again. If we then read from this file during the same second (or millisecond, depending on the
        // operating system file last mod precision), then we could set the timestamp to a smaller value, which could result in reading in the
        // rotated file a second time.
        timestamp = Math.max(state.getTimestamp(), file.lastModified());
        length = file.length();
        getLogger().debug("Created {} and routed to success", new Object[] { flowFile });
    }
    // Create a new state object to represent our current position, timestamp, etc.
    tfo.setState(new TailFileState(tailFile, file, reader, position, timestamp, length, checksum, state.getBuffer()));
    // We must commit session before persisting state in order to avoid data loss on restart
    session.commit();
    persistState(tfo, context);
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) CRC32(java.util.zip.CRC32) HashMap(java.util.HashMap) FileChannel(java.nio.channels.FileChannel) CheckedInputStream(java.util.zip.CheckedInputStream) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) NullOutputStream(org.apache.nifi.stream.io.NullOutputStream) ByteArrayOutputStream(java.io.ByteArrayOutputStream) BufferedOutputStream(java.io.BufferedOutputStream) OutputStream(java.io.OutputStream) IOException(java.io.IOException) FileInputStream(java.io.FileInputStream) CheckedInputStream(java.util.zip.CheckedInputStream) AtomicLong(java.util.concurrent.atomic.AtomicLong) Checksum(java.util.zip.Checksum) AtomicLong(java.util.concurrent.atomic.AtomicLong) OutputStreamCallback(org.apache.nifi.processor.io.OutputStreamCallback) FlowFile(org.apache.nifi.flowfile.FlowFile) File(java.io.File) BufferedOutputStream(java.io.BufferedOutputStream) NullOutputStream(org.apache.nifi.stream.io.NullOutputStream)

Example 50 with Checksum

use of java.util.zip.Checksum in project nifi by apache.

the class TailFile method recoverState.

/**
 * Updates member variables to reflect the "expected recovery checksum" and
 * seek to the appropriate location in the tailed file, updating our
 * checksum, so that we are ready to proceed with the
 * {@link #onTrigger(ProcessContext, ProcessSession)} call.
 *
 * @param context the ProcessContext
 * @param stateValues the values that were recovered from state that was
 * previously stored. This Map should be populated with the keys defined in
 * {@link TailFileState.StateKeys}.
 * @param filePath the file of the file for which state must be recovered
 * @throws IOException if unable to seek to the appropriate location in the
 * tailed file.
 */
private void recoverState(final ProcessContext context, final Map<String, String> stateValues, final String filePath) throws IOException {
    final String prefix = MAP_PREFIX + states.get(filePath).getFilenameIndex() + '.';
    if (!stateValues.containsKey(prefix + TailFileState.StateKeys.FILENAME)) {
        resetState(filePath);
        return;
    }
    if (!stateValues.containsKey(prefix + TailFileState.StateKeys.POSITION)) {
        resetState(filePath);
        return;
    }
    if (!stateValues.containsKey(prefix + TailFileState.StateKeys.TIMESTAMP)) {
        resetState(filePath);
        return;
    }
    if (!stateValues.containsKey(prefix + TailFileState.StateKeys.LENGTH)) {
        resetState(filePath);
        return;
    }
    final String checksumValue = stateValues.get(prefix + TailFileState.StateKeys.CHECKSUM);
    final boolean checksumPresent = (checksumValue != null);
    final String storedStateFilename = stateValues.get(prefix + TailFileState.StateKeys.FILENAME);
    final long position = Long.parseLong(stateValues.get(prefix + TailFileState.StateKeys.POSITION));
    final long timestamp = Long.parseLong(stateValues.get(prefix + TailFileState.StateKeys.TIMESTAMP));
    final long length = Long.parseLong(stateValues.get(prefix + TailFileState.StateKeys.LENGTH));
    FileChannel reader = null;
    File tailFile = null;
    if (checksumPresent && filePath.equals(storedStateFilename)) {
        states.get(filePath).setExpectedRecoveryChecksum(Long.parseLong(checksumValue));
        // We have an expected checksum and the currently configured filename is the same as the state file.
        // We need to check if the existing file is the same as the one referred to in the state file based on
        // the checksum.
        final Checksum checksum = new CRC32();
        final File existingTailFile = new File(storedStateFilename);
        if (existingTailFile.length() >= position) {
            try (final InputStream tailFileIs = new FileInputStream(existingTailFile);
                final CheckedInputStream in = new CheckedInputStream(tailFileIs, checksum)) {
                try {
                    StreamUtils.copy(in, new NullOutputStream(), states.get(filePath).getState().getPosition());
                } catch (final EOFException eof) {
                    // If we hit EOFException, then the file is smaller than we expected. Assume rollover.
                    getLogger().debug("When recovering state, file being tailed has less data than was stored in the state. " + "Assuming rollover. Will begin tailing current file from beginning.");
                }
                final long checksumResult = in.getChecksum().getValue();
                if (checksumResult == states.get(filePath).getExpectedRecoveryChecksum()) {
                    // Checksums match. This means that we want to resume reading from where we left off.
                    // So we will populate the reader object so that it will be used in onTrigger. If the
                    // checksums do not match, then we will leave the reader object null, so that the next
                    // call to onTrigger will result in a new Reader being created and starting at the
                    // beginning of the file.
                    getLogger().debug("When recovering state, checksum of tailed file matches the stored checksum. Will resume where left off.");
                    tailFile = existingTailFile;
                    reader = FileChannel.open(tailFile.toPath(), StandardOpenOption.READ);
                    getLogger().debug("Created FileChannel {} for {} in recoverState", new Object[] { reader, tailFile });
                    reader.position(position);
                } else {
                    // we don't seek the reader to the position, so our reader will start at beginning of file.
                    getLogger().debug("When recovering state, checksum of tailed file does not match the stored checksum. Will begin tailing current file from beginning.");
                }
            }
        } else {
            // fewer bytes than our position, so we know we weren't already reading from this file. Keep reader at a position of 0.
            getLogger().debug("When recovering state, existing file to tail is only {} bytes but position flag is {}; " + "this indicates that the file has rotated. Will begin tailing current file from beginning.", new Object[] { existingTailFile.length(), position });
        }
        states.get(filePath).setState(new TailFileState(filePath, tailFile, reader, position, timestamp, length, checksum, ByteBuffer.allocate(65536)));
    } else {
        resetState(filePath);
    }
    getLogger().debug("Recovered state {}", new Object[] { states.get(filePath).getState() });
}
Also used : CRC32(java.util.zip.CRC32) FileChannel(java.nio.channels.FileChannel) CheckedInputStream(java.util.zip.CheckedInputStream) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) FileInputStream(java.io.FileInputStream) CheckedInputStream(java.util.zip.CheckedInputStream) Checksum(java.util.zip.Checksum) EOFException(java.io.EOFException) FlowFile(org.apache.nifi.flowfile.FlowFile) File(java.io.File) NullOutputStream(org.apache.nifi.stream.io.NullOutputStream)

Aggregations

Checksum (java.util.zip.Checksum)84 CRC32 (java.util.zip.CRC32)29 IOException (java.io.IOException)16 ByteBuffer (java.nio.ByteBuffer)15 Adler32 (java.util.zip.Adler32)12 File (java.io.File)8 InputStream (java.io.InputStream)7 FileInputStream (java.io.FileInputStream)6 Path (java.nio.file.Path)6 EOFException (java.io.EOFException)5 Test (org.junit.Test)5 Test (org.junit.jupiter.api.Test)5 StoreChannel (org.neo4j.io.fs.StoreChannel)5 CheckedInputStream (java.util.zip.CheckedInputStream)4 BufferedOutputStream (java.io.BufferedOutputStream)3 ByteArrayInputStream (java.io.ByteArrayInputStream)3 UnsupportedEncodingException (java.io.UnsupportedEncodingException)3 ArrayList (java.util.ArrayList)3 PureJavaCrc32 (org.apache.hadoop.util.PureJavaCrc32)3 BinaryInputArchive (org.apache.jute.BinaryInputArchive)3