Search in sources :

Example 6 with SwapContents

use of org.apache.nifi.controller.repository.SwapContents in project nifi by apache.

the class StandardFlowFileQueue method migrateSwapToActive.

/**
 * If there are FlowFiles waiting on the swap queue, move them to the active
 * queue until we meet our threshold. This prevents us from having to swap
 * them to disk & then back out.
 *
 * This method MUST be called with the writeLock held.
 */
private void migrateSwapToActive() {
    // Migrate as many FlowFiles as we can from the Swap Queue to the Active Queue, so that we don't
    // have to swap them out & then swap them back in.
    // If we don't do this, we could get into a situation where we have potentially thousands of FlowFiles
    // sitting on the Swap Queue but not getting processed because there aren't enough to be swapped out.
    // In particular, this can happen if the queue is typically filled with surges.
    // For example, if the queue has 25,000 FlowFiles come in, it may process 20,000 of them and leave
    // 5,000 sitting on the Swap Queue. If it then takes an hour for an additional 5,000 FlowFiles to come in,
    // those FlowFiles sitting on the Swap Queue will sit there for an hour, waiting to be swapped out and
    // swapped back in again.
    // Calling this method when records are polled prevents this condition by migrating FlowFiles from the
    // Swap Queue to the Active Queue. However, we don't do this if there are FlowFiles already swapped out
    // to disk, because we want them to be swapped back in in the same order that they were swapped out.
    final int activeQueueSize = activeQueue.size();
    if (activeQueueSize > 0 && activeQueueSize > swapThreshold - SWAP_RECORD_POLL_SIZE) {
        return;
    }
    // first.
    if (!swapLocations.isEmpty()) {
        final String swapLocation = swapLocations.get(0);
        boolean partialContents = false;
        SwapContents swapContents = null;
        try {
            swapContents = swapManager.swapIn(swapLocation, this);
            swapLocations.remove(0);
        } catch (final IncompleteSwapFileException isfe) {
            logger.error("Failed to swap in all FlowFiles from Swap File {}; Swap File ended prematurely. The records that were present will still be swapped in", swapLocation);
            logger.error("", isfe);
            swapContents = isfe.getPartialContents();
            partialContents = true;
            swapLocations.remove(0);
        } catch (final FileNotFoundException fnfe) {
            logger.error("Failed to swap in FlowFiles from Swap File {} because the Swap File can no longer be found", swapLocation);
            if (eventReporter != null) {
                eventReporter.reportEvent(Severity.ERROR, "Swap File", "Failed to swap in FlowFiles from Swap File " + swapLocation + " because the Swap File can no longer be found");
            }
            swapLocations.remove(0);
            return;
        } catch (final IOException ioe) {
            logger.error("Failed to swap in FlowFiles from Swap File {}; Swap File appears to be corrupt!", swapLocation);
            logger.error("", ioe);
            if (eventReporter != null) {
                eventReporter.reportEvent(Severity.ERROR, "Swap File", "Failed to swap in FlowFiles from Swap File " + swapLocation + "; Swap File appears to be corrupt! Some FlowFiles in the queue may not be accessible. See logs for more information.");
            }
            // drive and we may have connectivity problems, etc.
            return;
        } catch (final Throwable t) {
            logger.error("Failed to swap in FlowFiles from Swap File {}", swapLocation, t);
            // in swapLocations, we will continue to retry.
            throw t;
        }
        final QueueSize swapSize = swapContents.getSummary().getQueueSize();
        final long contentSize = swapSize.getByteCount();
        final int flowFileCount = swapSize.getObjectCount();
        incrementSwapQueueSize(-flowFileCount, -contentSize, -1);
        if (partialContents) {
            // if we have partial results, we need to calculate the content size of the flowfiles
            // actually swapped back in.
            long contentSizeSwappedIn = 0L;
            for (final FlowFileRecord swappedIn : swapContents.getFlowFiles()) {
                contentSizeSwappedIn += swappedIn.getSize();
            }
            incrementActiveQueueSize(swapContents.getFlowFiles().size(), contentSizeSwappedIn);
        } else {
            // we swapped in the whole swap file. We can just use the info that we got from the summary.
            incrementActiveQueueSize(flowFileCount, contentSize);
        }
        activeQueue.addAll(swapContents.getFlowFiles());
        return;
    }
    // of other checks for 99.999% of the cases.
    if (size.get().swappedCount == 0 && swapQueue.isEmpty()) {
        return;
    }
    if (size.get().swappedCount > swapQueue.size()) {
        // the files to be swapped back in first
        return;
    }
    int recordsMigrated = 0;
    long bytesMigrated = 0L;
    final Iterator<FlowFileRecord> swapItr = swapQueue.iterator();
    while (activeQueue.size() < swapThreshold && swapItr.hasNext()) {
        final FlowFileRecord toMigrate = swapItr.next();
        activeQueue.add(toMigrate);
        bytesMigrated += toMigrate.getSize();
        recordsMigrated++;
        swapItr.remove();
    }
    if (recordsMigrated > 0) {
        incrementActiveQueueSize(recordsMigrated, bytesMigrated);
        incrementSwapQueueSize(-recordsMigrated, -bytesMigrated, 0);
    }
    if (size.get().swappedCount == 0) {
        swapMode = false;
    }
}
Also used : QueueSize(org.apache.nifi.controller.queue.QueueSize) IncompleteSwapFileException(org.apache.nifi.controller.repository.IncompleteSwapFileException) SwapContents(org.apache.nifi.controller.repository.SwapContents) FileNotFoundException(java.io.FileNotFoundException) IOException(java.io.IOException) FlowFileRecord(org.apache.nifi.controller.repository.FlowFileRecord)

Example 7 with SwapContents

use of org.apache.nifi.controller.repository.SwapContents in project nifi by apache.

the class SimpleSwapDeserializer method deserializeFlowFiles.

@Override
public SwapContents deserializeFlowFiles(final DataInputStream in, final String swapLocation, final FlowFileQueue queue, final ResourceClaimManager claimManager) throws IOException {
    final int swapEncodingVersion = in.readInt();
    if (swapEncodingVersion > SWAP_ENCODING_VERSION) {
        throw new IOException("Cannot swap FlowFiles in from SwapFile because the encoding version is " + swapEncodingVersion + ", which is too new (expecting " + SWAP_ENCODING_VERSION + " or less)");
    }
    // Connection ID
    final String connectionId = in.readUTF();
    if (!connectionId.equals(queue.getIdentifier())) {
        throw new IllegalArgumentException("Cannot deserialize FlowFiles from Swap File at location " + swapLocation + " because those FlowFiles belong to Connection with ID " + connectionId + " and an attempt was made to swap them into a Connection with ID " + queue.getIdentifier());
    }
    int numRecords = 0;
    long contentSize = 0L;
    Long maxRecordId = null;
    try {
        numRecords = in.readInt();
        // Content Size
        contentSize = in.readLong();
        if (swapEncodingVersion > 7) {
            // Max Record ID
            maxRecordId = in.readLong();
        }
    } catch (final EOFException eof) {
        final QueueSize queueSize = new QueueSize(numRecords, contentSize);
        final SwapSummary summary = new StandardSwapSummary(queueSize, maxRecordId, Collections.emptyList());
        final SwapContents partialContents = new StandardSwapContents(summary, Collections.emptyList());
        throw new IncompleteSwapFileException(swapLocation, partialContents);
    }
    final QueueSize queueSize = new QueueSize(numRecords, contentSize);
    return deserializeFlowFiles(in, queueSize, maxRecordId, swapEncodingVersion, claimManager, swapLocation);
}
Also used : SwapSummary(org.apache.nifi.controller.repository.SwapSummary) IOException(java.io.IOException) QueueSize(org.apache.nifi.controller.queue.QueueSize) IncompleteSwapFileException(org.apache.nifi.controller.repository.IncompleteSwapFileException) EOFException(java.io.EOFException) SwapContents(org.apache.nifi.controller.repository.SwapContents)

Example 8 with SwapContents

use of org.apache.nifi.controller.repository.SwapContents in project nifi by apache.

the class SimpleSwapDeserializer method getSwapSummary.

@Override
public SwapSummary getSwapSummary(final DataInputStream in, final String swapLocation, final ResourceClaimManager claimManager) throws IOException {
    final int swapEncodingVersion = in.readInt();
    if (swapEncodingVersion > SWAP_ENCODING_VERSION) {
        final String errMsg = "Cannot swap FlowFiles in from " + swapLocation + " because the encoding version is " + swapEncodingVersion + ", which is too new (expecting " + SWAP_ENCODING_VERSION + " or less)";
        throw new IOException(errMsg);
    }
    final int numRecords;
    final long contentSize;
    Long maxRecordId = null;
    try {
        // ignore Connection ID
        in.readUTF();
        numRecords = in.readInt();
        contentSize = in.readLong();
        if (numRecords == 0) {
            return StandardSwapSummary.EMPTY_SUMMARY;
        }
        if (swapEncodingVersion > 7) {
            maxRecordId = in.readLong();
        }
    } catch (final EOFException eof) {
        logger.warn("Found premature End-of-File when reading Swap File {}. EOF occurred before any FlowFiles were encountered", swapLocation);
        return StandardSwapSummary.EMPTY_SUMMARY;
    }
    final QueueSize queueSize = new QueueSize(numRecords, contentSize);
    final SwapContents swapContents = deserializeFlowFiles(in, queueSize, maxRecordId, swapEncodingVersion, claimManager, swapLocation);
    return swapContents.getSummary();
}
Also used : QueueSize(org.apache.nifi.controller.queue.QueueSize) EOFException(java.io.EOFException) SwapContents(org.apache.nifi.controller.repository.SwapContents) IOException(java.io.IOException)

Example 9 with SwapContents

use of org.apache.nifi.controller.repository.SwapContents in project nifi by apache.

the class SimpleSwapDeserializer method deserializeFlowFiles.

private static SwapContents deserializeFlowFiles(final DataInputStream in, final QueueSize queueSize, final Long maxRecordId, final int serializationVersion, final ResourceClaimManager claimManager, final String location) throws IOException {
    final List<FlowFileRecord> flowFiles = new ArrayList<>(queueSize.getObjectCount());
    final List<ResourceClaim> resourceClaims = new ArrayList<>(queueSize.getObjectCount());
    Long maxId = maxRecordId;
    for (int i = 0; i < queueSize.getObjectCount(); i++) {
        try {
            // legacy encoding had an "action" because it used to be couple with FlowFile Repository code
            if (serializationVersion < 3) {
                final int action = in.read();
                if (action != 1) {
                    throw new IOException("Swap File is version " + serializationVersion + " but did not contain a 'UPDATE' record type");
                }
            }
            final StandardFlowFileRecord.Builder ffBuilder = new StandardFlowFileRecord.Builder();
            final long recordId = in.readLong();
            if (maxId == null || recordId > maxId) {
                maxId = recordId;
            }
            ffBuilder.id(recordId);
            ffBuilder.entryDate(in.readLong());
            if (serializationVersion > 1) {
                // Lineage information was added in version 2
                if (serializationVersion < 10) {
                    final int numLineageIdentifiers = in.readInt();
                    for (int lineageIdIdx = 0; lineageIdIdx < numLineageIdentifiers; lineageIdIdx++) {
                        // skip each identifier
                        in.readUTF();
                    }
                }
                // version 9 adds in a 'lineage start index'
                final long lineageStartDate = in.readLong();
                final long lineageStartIndex;
                if (serializationVersion > 8) {
                    lineageStartIndex = in.readLong();
                } else {
                    lineageStartIndex = 0L;
                }
                ffBuilder.lineageStart(lineageStartDate, lineageStartIndex);
                if (serializationVersion > 5) {
                    // Version 9 adds in a 'queue date index'
                    final long lastQueueDate = in.readLong();
                    final long queueDateIndex;
                    if (serializationVersion > 8) {
                        queueDateIndex = in.readLong();
                    } else {
                        queueDateIndex = 0L;
                    }
                    ffBuilder.lastQueued(lastQueueDate, queueDateIndex);
                }
            }
            ffBuilder.size(in.readLong());
            if (serializationVersion < 3) {
                // connection Id
                readString(in);
            }
            final boolean hasClaim = in.readBoolean();
            ResourceClaim resourceClaim = null;
            if (hasClaim) {
                final String claimId;
                if (serializationVersion < 5) {
                    claimId = String.valueOf(in.readLong());
                } else {
                    claimId = in.readUTF();
                }
                final String container = in.readUTF();
                final String section = in.readUTF();
                final long resourceOffset;
                final long resourceLength;
                if (serializationVersion < 6) {
                    resourceOffset = 0L;
                    resourceLength = -1L;
                } else {
                    resourceOffset = in.readLong();
                    resourceLength = in.readLong();
                }
                final long claimOffset = in.readLong();
                final boolean lossTolerant;
                if (serializationVersion >= 4) {
                    lossTolerant = in.readBoolean();
                } else {
                    lossTolerant = false;
                }
                resourceClaim = claimManager.getResourceClaim(container, section, claimId);
                if (resourceClaim == null) {
                    logger.error("Swap file indicates that FlowFile was referencing Resource Claim at container={}, section={}, claimId={}, " + "but this Resource Claim cannot be found! Will create a temporary Resource Claim, but this may affect the framework's " + "ability to properly clean up this resource", container, section, claimId);
                    resourceClaim = claimManager.newResourceClaim(container, section, claimId, lossTolerant, true);
                }
                final StandardContentClaim claim = new StandardContentClaim(resourceClaim, resourceOffset);
                claim.setLength(resourceLength);
                ffBuilder.contentClaim(claim);
                ffBuilder.contentClaimOffset(claimOffset);
            }
            boolean attributesChanged = true;
            if (serializationVersion < 3) {
                attributesChanged = in.readBoolean();
            }
            if (attributesChanged) {
                final int numAttributes = in.readInt();
                for (int j = 0; j < numAttributes; j++) {
                    final String key = readString(in);
                    final String value = readString(in);
                    ffBuilder.addAttribute(key, value);
                }
            }
            final FlowFileRecord record = ffBuilder.build();
            if (resourceClaim != null) {
                resourceClaims.add(resourceClaim);
            }
            flowFiles.add(record);
        } catch (final EOFException eof) {
            final SwapSummary swapSummary = new StandardSwapSummary(queueSize, maxId, resourceClaims);
            final SwapContents partialContents = new StandardSwapContents(swapSummary, flowFiles);
            throw new IncompleteSwapFileException(location, partialContents);
        }
    }
    final SwapSummary swapSummary = new StandardSwapSummary(queueSize, maxId, resourceClaims);
    return new StandardSwapContents(swapSummary, flowFiles);
}
Also used : StandardFlowFileRecord(org.apache.nifi.controller.repository.StandardFlowFileRecord) ArrayList(java.util.ArrayList) SwapSummary(org.apache.nifi.controller.repository.SwapSummary) IOException(java.io.IOException) StandardContentClaim(org.apache.nifi.controller.repository.claim.StandardContentClaim) IncompleteSwapFileException(org.apache.nifi.controller.repository.IncompleteSwapFileException) EOFException(java.io.EOFException) SwapContents(org.apache.nifi.controller.repository.SwapContents) ResourceClaim(org.apache.nifi.controller.repository.claim.ResourceClaim) FlowFileRecord(org.apache.nifi.controller.repository.FlowFileRecord) StandardFlowFileRecord(org.apache.nifi.controller.repository.StandardFlowFileRecord)

Aggregations

SwapContents (org.apache.nifi.controller.repository.SwapContents)9 IOException (java.io.IOException)5 FlowFileRecord (org.apache.nifi.controller.repository.FlowFileRecord)5 File (java.io.File)4 ArrayList (java.util.ArrayList)4 QueueSize (org.apache.nifi.controller.queue.QueueSize)4 IncompleteSwapFileException (org.apache.nifi.controller.repository.IncompleteSwapFileException)4 DataInputStream (java.io.DataInputStream)3 EOFException (java.io.EOFException)3 FileInputStream (java.io.FileInputStream)3 FlowFileQueue (org.apache.nifi.controller.queue.FlowFileQueue)3 SwapSummary (org.apache.nifi.controller.repository.SwapSummary)3 Test (org.junit.Test)3 BufferedInputStream (java.io.BufferedInputStream)2 FileNotFoundException (java.io.FileNotFoundException)2 FileOutputStream (java.io.FileOutputStream)2 InputStream (java.io.InputStream)2 HashMap (java.util.HashMap)2 ResourceClaimManager (org.apache.nifi.controller.repository.claim.ResourceClaimManager)2 StandardResourceClaimManager (org.apache.nifi.controller.repository.claim.StandardResourceClaimManager)2