Search in sources :

Example 1 with TimestampedLong

use of org.apache.nifi.util.timebuffer.TimestampedLong in project nifi by apache.

the class EventIdFirstSchemaRecordWriter method writeRecord.

@Override
public StorageSummary writeRecord(final ProvenanceEventRecord record) throws IOException {
    if (isDirty()) {
        throw new IOException("Cannot update Provenance Repository because this Record Writer has already failed to write to the Repository");
    }
    final long lockStart;
    final long writeStart;
    final long startBytes;
    final long endBytes;
    final long recordIdentifier;
    final long serializeStart = System.nanoTime();
    final ByteArrayDataOutputStream bados = streamCache.checkOut();
    try {
        writeRecord(record, 0L, bados.getDataOutputStream());
        lockStart = System.nanoTime();
        synchronized (this) {
            writeStart = System.nanoTime();
            try {
                recordIdentifier = record.getEventId() == -1L ? getIdGenerator().getAndIncrement() : record.getEventId();
                startBytes = getBytesWritten();
                ensureStreamState(recordIdentifier, startBytes);
                final DataOutputStream out = getBufferedOutputStream();
                final int recordIdOffset = (int) (recordIdentifier - firstEventId);
                out.writeInt(recordIdOffset);
                final ByteArrayOutputStream baos = bados.getByteArrayOutputStream();
                out.writeInt(baos.size());
                baos.writeTo(out);
                recordCount.incrementAndGet();
                endBytes = getBytesWritten();
            } catch (final IOException ioe) {
                markDirty();
                throw ioe;
            }
        }
    } finally {
        streamCache.checkIn(bados);
    }
    if (logger.isDebugEnabled()) {
        // Collect stats and periodically dump them if log level is set to at least info.
        final long writeNanos = System.nanoTime() - writeStart;
        writeTimes.add(new TimestampedLong(writeNanos));
        final long serializeNanos = lockStart - serializeStart;
        serializeTimes.add(new TimestampedLong(serializeNanos));
        final long lockNanos = writeStart - lockStart;
        lockTimes.add(new TimestampedLong(lockNanos));
        bytesWritten.add(new TimestampedLong(endBytes - startBytes));
        final long recordCount = totalRecordCount.incrementAndGet();
        if (recordCount % 1_000_000 == 0) {
            final long sixtySecondsAgo = System.currentTimeMillis() - 60000L;
            final Long writeNanosLast60 = writeTimes.getAggregateValue(sixtySecondsAgo).getValue();
            final Long lockNanosLast60 = lockTimes.getAggregateValue(sixtySecondsAgo).getValue();
            final Long serializeNanosLast60 = serializeTimes.getAggregateValue(sixtySecondsAgo).getValue();
            final Long bytesWrittenLast60 = bytesWritten.getAggregateValue(sixtySecondsAgo).getValue();
            logger.debug("In the last 60 seconds, have spent {} millis writing to file ({} MB), {} millis waiting on synchronize block, {} millis serializing events", TimeUnit.NANOSECONDS.toMillis(writeNanosLast60), bytesWrittenLast60 / 1024 / 1024, TimeUnit.NANOSECONDS.toMillis(lockNanosLast60), TimeUnit.NANOSECONDS.toMillis(serializeNanosLast60));
        }
    }
    final long serializedLength = endBytes - startBytes;
    final TocWriter tocWriter = getTocWriter();
    final Integer blockIndex = tocWriter == null ? null : tocWriter.getCurrentBlockIndex();
    final File file = getFile();
    final String storageLocation = file.getParentFile().getName() + "/" + file.getName();
    return new StorageSummary(recordIdentifier, storageLocation, blockIndex, serializedLength, endBytes);
}
Also used : DataOutputStream(java.io.DataOutputStream) ByteArrayDataOutputStream(org.apache.nifi.provenance.util.ByteArrayDataOutputStream) IOException(java.io.IOException) ByteArrayOutputStream(java.io.ByteArrayOutputStream) ByteArrayDataOutputStream(org.apache.nifi.provenance.util.ByteArrayDataOutputStream) TimestampedLong(org.apache.nifi.util.timebuffer.TimestampedLong) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) StorageSummary(org.apache.nifi.provenance.serialization.StorageSummary) TocWriter(org.apache.nifi.provenance.toc.TocWriter) TimestampedLong(org.apache.nifi.util.timebuffer.TimestampedLong) AtomicLong(java.util.concurrent.atomic.AtomicLong) File(java.io.File)

Example 2 with TimestampedLong

use of org.apache.nifi.util.timebuffer.TimestampedLong in project nifi by apache.

the class PersistentProvenanceRepository method rollover.

/**
 * <p>
 * MUST be called with the write lock held.
 * </p>
 *
 * Rolls over the data in the journal files, merging them into a single
 * Provenance Event Log File, and compressing and indexing as needed.
 *
 * @param force if true, will force a rollover regardless of whether or not
 * data has been written
 * @throws IOException if unable to complete rollover
 */
private void rollover(final boolean force) throws IOException {
    if (!configuration.isAllowRollover()) {
        return;
    }
    // have written something to the stream, then roll over
    if (force || recordsWrittenSinceRollover.get() > 0L || dirtyWriterCount.get() > 0) {
        final List<File> journalsToMerge = new ArrayList<>();
        for (final RecordWriter writer : writers) {
            if (!writer.isClosed()) {
                final File writerFile = writer.getFile();
                journalsToMerge.add(writerFile);
                try {
                    writer.close();
                } catch (final IOException ioe) {
                    logger.warn("Failed to close {} due to {}", writer, ioe.toString());
                    if (logger.isDebugEnabled()) {
                        logger.warn("", ioe);
                    }
                }
            }
        }
        if (logger.isDebugEnabled()) {
            if (journalsToMerge.isEmpty()) {
                logger.debug("No journals to merge; all RecordWriters were already closed");
            } else {
                logger.debug("Going to merge {} files for journals starting with ID {}", journalsToMerge.size(), LuceneUtil.substringBefore(journalsToMerge.get(0).getName(), "."));
            }
        }
        // Choose a storage directory to store the merged file in.
        final long storageDirIdx = storageDirectoryIndex.getAndIncrement();
        final List<File> storageDirs = new ArrayList<>(configuration.getStorageDirectories().values());
        final File storageDir = storageDirs.get((int) (storageDirIdx % storageDirs.size()));
        Future<?> future = null;
        if (!journalsToMerge.isEmpty()) {
            // Run the rollover logic in a background thread.
            final AtomicReference<Future<?>> futureReference = new AtomicReference<>();
            final AtomicInteger retryAttempts = new AtomicInteger(MAX_JOURNAL_ROLLOVER_RETRIES);
            final int recordsWritten = recordsWrittenSinceRollover.getAndSet(0);
            final Runnable rolloverRunnable = new Runnable() {

                @Override
                public void run() {
                    File fileRolledOver = null;
                    try {
                        try {
                            fileRolledOver = mergeJournals(journalsToMerge, getMergeFile(journalsToMerge, storageDir), eventReporter);
                        } catch (final IOException ioe) {
                            logger.error("Failed to merge Journal Files {} into a Provenance Log File due to {}", journalsToMerge, ioe.toString());
                            logger.error("", ioe);
                        }
                        if (fileRolledOver != null) {
                            final File file = fileRolledOver;
                            // update our map of id to Path
                            // We need to make sure that another thread doesn't also update the map at the same time. We cannot
                            // use the write lock when purging old events, and we want to use the same approach here.
                            boolean updated = false;
                            final Long fileFirstEventId = Long.valueOf(LuceneUtil.substringBefore(fileRolledOver.getName(), "."));
                            while (!updated) {
                                final SortedMap<Long, Path> existingPathMap = idToPathMap.get();
                                final SortedMap<Long, Path> newIdToPathMap = new TreeMap<>(new PathMapComparator());
                                newIdToPathMap.putAll(existingPathMap);
                                newIdToPathMap.put(fileFirstEventId, file.toPath());
                                updated = idToPathMap.compareAndSet(existingPathMap, newIdToPathMap);
                            }
                            final TimedCountSize countSize = updateCounts.getAggregateValue(System.currentTimeMillis() - TimeUnit.MILLISECONDS.convert(5, TimeUnit.MINUTES));
                            logger.info("Successfully Rolled over Provenance Event file containing {} records. In the past 5 minutes, " + "{} events have been written to the Provenance Repository, totaling {}", recordsWritten, countSize.getCount(), FormatUtils.formatDataSize(countSize.getSize()));
                        }
                        // if files were rolled over or if out of retries stop the future
                        if (fileRolledOver != null || retryAttempts.decrementAndGet() == 0) {
                            if (fileRolledOver == null && retryAttempts.get() == 0) {
                                logger.error("Failed to merge Journal Files {} after {} attempts.", journalsToMerge, MAX_JOURNAL_ROLLOVER_RETRIES);
                            }
                            rolloverCompletions.getAndIncrement();
                            // Cancel the future so that we don't run anymore
                            Future<?> future;
                            while ((future = futureReference.get()) == null) {
                                try {
                                    Thread.sleep(10L);
                                } catch (final InterruptedException ie) {
                                }
                            }
                            future.cancel(false);
                        } else {
                            logger.warn("Couldn't merge journals. Will try again. journalsToMerge: {}, storageDir: {}", journalsToMerge, storageDir);
                        }
                    } catch (final Exception e) {
                        logger.error("Failed to merge journals. Will try again. journalsToMerge: {}, storageDir: {}, cause: {}", journalsToMerge, storageDir, e.toString());
                        logger.error("", e);
                    }
                }
            };
            // We are going to schedule the future to run immediately and then repeat every 10 seconds. This allows us to keep retrying if we
            // fail for some reason. When we succeed or if retries are exceeded, the Runnable will cancel itself.
            future = rolloverExecutor.scheduleWithFixedDelay(rolloverRunnable, 0, getRolloverRetryMillis(), TimeUnit.MILLISECONDS);
            futureReference.set(future);
        }
        streamStartTime.set(System.currentTimeMillis());
        bytesWrittenSinceRollover.set(0);
        // We don't want to create new 'writers' until the number of unmerged journals falls below our threshold. So we wait
        // here before we repopulate the 'writers' member variable and release the lock.
        int journalFileCount = getJournalCount();
        long repoSize = getSize(getLogFiles(), 0L);
        final int journalCountThreshold = configuration.getJournalCount() * 5;
        final long sizeThreshold = (long) (configuration.getMaxStorageCapacity() * ROLLOVER_HIGH_WATER);
        // that is no longer the case.
        if (journalFileCount > journalCountThreshold || repoSize > sizeThreshold) {
            final long stopTheWorldStart = System.nanoTime();
            logger.warn("The rate of the dataflow is exceeding the provenance recording rate. " + "Slowing down flow to accommodate. Currently, there are {} journal files ({} bytes) and " + "threshold for blocking is {} ({} bytes)", journalFileCount, repoSize, journalCountThreshold, sizeThreshold);
            eventReporter.reportEvent(Severity.WARNING, "Provenance Repository", "The rate of the dataflow is " + "exceeding the provenance recording rate. Slowing down flow to accommodate");
            while (journalFileCount > journalCountThreshold || repoSize > sizeThreshold) {
                // if a shutdown happens while we are in this loop, kill the rollover thread and break
                if (this.closed.get()) {
                    if (future != null) {
                        future.cancel(true);
                    }
                    break;
                }
                if (repoSize > sizeThreshold) {
                    logger.debug("Provenance Repository has exceeded its size threshold; will trigger purging of oldest events");
                    purgeOldEvents();
                    journalFileCount = getJournalCount();
                    repoSize = getSize(getLogFiles(), 0L);
                    continue;
                } else {
                    // due to the runnable that we scheduled above
                    try {
                        Thread.sleep(100L);
                    } catch (final InterruptedException ie) {
                    }
                }
                logger.debug("Provenance Repository is still behind. Keeping flow slowed down " + "to accommodate. Currently, there are {} journal files ({} bytes) and " + "threshold for blocking is {} ({} bytes)", journalFileCount, repoSize, journalCountThreshold, sizeThreshold);
                journalFileCount = getJournalCount();
                repoSize = getSize(getLogFiles(), 0L);
            }
            final long stopTheWorldNanos = System.nanoTime() - stopTheWorldStart;
            backpressurePauseMillis.add(new TimestampedLong(stopTheWorldNanos));
            final TimestampedLong pauseNanosLastFiveMinutes = backpressurePauseMillis.getAggregateValue(System.currentTimeMillis() - TimeUnit.MILLISECONDS.convert(5, TimeUnit.MINUTES));
            logger.info("Provenance Repository has now caught up with rolling over journal files. Current number of " + "journal files to be rolled over is {}. Provenance Repository Back Pressure paused Session commits for {} ({} total in the last 5 minutes).", journalFileCount, FormatUtils.formatNanos(stopTheWorldNanos, true), FormatUtils.formatNanos(pauseNanosLastFiveMinutes.getValue(), true));
        }
        // we've finished rolling over successfully. Create new writers and reset state.
        writers = createWriters(configuration, idGenerator.get());
        dirtyWriterCount.set(0);
        streamStartTime.set(System.currentTimeMillis());
        recordsWrittenSinceRollover.getAndSet(0);
    }
}
Also used : Path(java.nio.file.Path) ArrayList(java.util.ArrayList) AtomicReference(java.util.concurrent.atomic.AtomicReference) IOException(java.io.IOException) TreeMap(java.util.TreeMap) TimedCountSize(org.apache.nifi.util.timebuffer.TimedCountSize) IndexNotFoundException(org.apache.lucene.index.IndexNotFoundException) ResourceNotFoundException(org.apache.nifi.web.ResourceNotFoundException) AccessDeniedException(org.apache.nifi.authorization.AccessDeniedException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) EOFException(java.io.EOFException) FileNotFoundException(java.io.FileNotFoundException) TimestampedLong(org.apache.nifi.util.timebuffer.TimestampedLong) RecordWriter(org.apache.nifi.provenance.serialization.RecordWriter) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) AtomicLong(java.util.concurrent.atomic.AtomicLong) TimestampedLong(org.apache.nifi.util.timebuffer.TimestampedLong) Future(java.util.concurrent.Future) File(java.io.File)

Example 3 with TimestampedLong

use of org.apache.nifi.util.timebuffer.TimestampedLong in project nifi by apache.

the class LuceneEventIndex method incrementAndReportStats.

private void incrementAndReportStats() {
    final long fiveMinutesAgo = System.currentTimeMillis() - TimeUnit.MINUTES.toMillis(5);
    final TimestampedLong nanosLastFive = queuePauseNanos.getAggregateValue(fiveMinutesAgo);
    if (nanosLastFive == null) {
        return;
    }
    final TimestampedLong eventsLast5 = eventsIndexed.getAggregateValue(fiveMinutesAgo);
    if (eventsLast5 == null) {
        return;
    }
    final long numEventsLast5 = eventsLast5.getValue();
    final long millis = TimeUnit.NANOSECONDS.toMillis(nanosLastFive.getValue());
    logger.debug("In the last 5 minutes, have spent {} CPU-millis waiting to enqueue events for indexing and have indexed {} events ({} since NiFi started)", millis, numEventsLast5, eventCount.get());
}
Also used : TimestampedLong(org.apache.nifi.util.timebuffer.TimestampedLong)

Example 4 with TimestampedLong

use of org.apache.nifi.util.timebuffer.TimestampedLong in project nifi by apache.

the class EncryptedSchemaRecordWriter method writeRecord.

@Override
public StorageSummary writeRecord(final ProvenanceEventRecord record) throws IOException {
    final long encryptStart = System.nanoTime();
    byte[] cipherBytes;
    try {
        byte[] serialized;
        try (final ByteArrayOutputStream baos = new ByteArrayOutputStream(256);
            final DataOutputStream dos = new DataOutputStream(baos)) {
            writeRecord(record, 0L, dos);
            serialized = baos.toByteArray();
        }
        String eventId = record.getBestEventIdentifier();
        cipherBytes = encrypt(serialized, eventId);
    } catch (EncryptionException e) {
        logger.error("Encountered an error: ", e);
        throw new IOException("Error encrypting the provenance record", e);
    }
    final long encryptStop = System.nanoTime();
    final long lockStart = System.nanoTime();
    final long writeStart;
    final long startBytes;
    final long endBytes;
    final long recordIdentifier;
    synchronized (this) {
        writeStart = System.nanoTime();
        try {
            recordIdentifier = record.getEventId() == -1L ? getIdGenerator().getAndIncrement() : record.getEventId();
            startBytes = getBytesWritten();
            ensureStreamState(recordIdentifier, startBytes);
            final DataOutputStream out = getBufferedOutputStream();
            final int recordIdOffset = (int) (recordIdentifier - getFirstEventId());
            out.writeInt(recordIdOffset);
            out.writeInt(cipherBytes.length);
            out.write(cipherBytes);
            getRecordCount().incrementAndGet();
            endBytes = getBytesWritten();
        } catch (final IOException ioe) {
            markDirty();
            throw ioe;
        }
    }
    if (logger.isDebugEnabled()) {
        // Collect stats and periodically dump them if log level is set to at least info.
        final long writeNanos = System.nanoTime() - writeStart;
        getWriteTimes().add(new TimestampedLong(writeNanos));
        final long serializeNanos = lockStart - encryptStart;
        getSerializeTimes().add(new TimestampedLong(serializeNanos));
        final long encryptNanos = encryptStop - encryptStart;
        getEncryptTimes().add(new TimestampedLong(encryptNanos));
        final long lockNanos = writeStart - lockStart;
        getLockTimes().add(new TimestampedLong(lockNanos));
        getBytesWrittenBuffer().add(new TimestampedLong(endBytes - startBytes));
        final long recordCount = getTotalRecordCount().incrementAndGet();
        if (recordCount % debugFrequency == 0) {
            printStats();
        }
    }
    final long serializedLength = endBytes - startBytes;
    final TocWriter tocWriter = getTocWriter();
    final Integer blockIndex = tocWriter == null ? null : tocWriter.getCurrentBlockIndex();
    final File file = getFile();
    final String storageLocation = file.getParentFile().getName() + "/" + file.getName();
    return new StorageSummary(recordIdentifier, storageLocation, blockIndex, serializedLength, endBytes);
}
Also used : DataOutputStream(java.io.DataOutputStream) ByteArrayOutputStream(java.io.ByteArrayOutputStream) IOException(java.io.IOException) TimestampedLong(org.apache.nifi.util.timebuffer.TimestampedLong) StorageSummary(org.apache.nifi.provenance.serialization.StorageSummary) TocWriter(org.apache.nifi.provenance.toc.TocWriter) File(java.io.File)

Example 5 with TimestampedLong

use of org.apache.nifi.util.timebuffer.TimestampedLong in project nifi by apache.

the class LuceneEventIndex method addEvent.

protected void addEvent(final ProvenanceEventRecord event, final StorageSummary location) {
    for (final CachedQuery cachedQuery : cachedQueries) {
        cachedQuery.update(event, location);
    }
    final Document document = eventConverter.convert(event, location);
    if (document == null) {
        logger.debug("Received Provenance Event {} to index but it contained no information that should be indexed, so skipping it", event.getEventId());
    } else {
        final StoredDocument doc = new StoredDocument(document, location);
        boolean added = false;
        while (!added && !closed) {
            added = documentQueue.offer(doc);
            if (!added) {
                final long start = System.nanoTime();
                try {
                    added = documentQueue.offer(doc, 1, TimeUnit.SECONDS);
                } catch (final InterruptedException e) {
                    Thread.currentThread().interrupt();
                    logger.warn("Interrupted while attempting to enqueue Provenance Event for indexing; this event will not be indexed");
                    return;
                }
                final long nanos = System.nanoTime() - start;
                queuePauseNanos.add(new TimestampedLong(nanos));
            }
            if (added) {
                final long totalEventCount = eventCount.incrementAndGet();
                if (totalEventCount % 1_000_000 == 0 && logger.isDebugEnabled()) {
                    incrementAndReportStats();
                }
            }
        }
    }
}
Also used : TimestampedLong(org.apache.nifi.util.timebuffer.TimestampedLong) Document(org.apache.lucene.document.Document)

Aggregations

TimestampedLong (org.apache.nifi.util.timebuffer.TimestampedLong)6 File (java.io.File)4 IOException (java.io.IOException)4 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)3 AtomicLong (java.util.concurrent.atomic.AtomicLong)3 ByteArrayOutputStream (java.io.ByteArrayOutputStream)2 DataOutputStream (java.io.DataOutputStream)2 EOFException (java.io.EOFException)2 FileNotFoundException (java.io.FileNotFoundException)2 Path (java.nio.file.Path)2 ArrayList (java.util.ArrayList)2 TreeMap (java.util.TreeMap)2 ExecutionException (java.util.concurrent.ExecutionException)2 Future (java.util.concurrent.Future)2 AtomicReference (java.util.concurrent.atomic.AtomicReference)2 RecordWriter (org.apache.nifi.provenance.serialization.RecordWriter)2 StorageSummary (org.apache.nifi.provenance.serialization.StorageSummary)2 TocWriter (org.apache.nifi.provenance.toc.TocWriter)2 TimedCountSize (org.apache.nifi.util.timebuffer.TimedCountSize)2 ResourceNotFoundException (org.apache.nifi.web.ResourceNotFoundException)2