Search in sources :

Example 26 with ContentClaim

use of org.apache.nifi.controller.repository.claim.ContentClaim in project nifi by apache.

the class VolatileContentRepository method purge.

@Override
public void purge() {
    for (final ContentClaim claim : claimMap.keySet()) {
        claimManager.decrementClaimantCount(resolveClaim(claim).getResourceClaim());
        final ContentClaim backup = getBackupClaim(claim);
        if (backup != null) {
            getBackupRepository().remove(backup);
        }
    }
}
Also used : ContentClaim(org.apache.nifi.controller.repository.claim.ContentClaim) StandardContentClaim(org.apache.nifi.controller.repository.claim.StandardContentClaim)

Example 27 with ContentClaim

use of org.apache.nifi.controller.repository.claim.ContentClaim in project nifi by apache.

the class WriteAheadFlowFileRepository method loadFlowFiles.

@Override
public long loadFlowFiles(final QueueProvider queueProvider, final long minimumSequenceNumber) throws IOException {
    final Map<String, FlowFileQueue> queueMap = new HashMap<>();
    for (final FlowFileQueue queue : queueProvider.getAllQueues()) {
        queueMap.put(queue.getIdentifier(), queue);
    }
    serdeFactory.setQueueMap(queueMap);
    // Since we used to use the MinimalLockingWriteAheadRepository, we need to ensure that if the FlowFile
    // Repo was written using that impl, that we properly recover from the implementation.
    Collection<RepositoryRecord> recordList = wal.recoverRecords();
    // then we will update the new WAL (with fsync()) and delete the old repository so that we won't recover it again.
    if (recordList == null || recordList.isEmpty()) {
        if (walImplementation.equals(SEQUENTIAL_ACCESS_WAL)) {
            // Configured to use Sequential Access WAL but it has no records. Check if there are records in
            // a MinimalLockingWriteAheadLog that we can recover.
            recordList = migrateFromMinimalLockingLog(wal).orElse(new ArrayList<>());
        } else {
            // Configured to use Minimal Locking WAL but it has no records. Check if there are records in
            // a SequentialAccess Log that we can recover.
            recordList = migrateFromSequentialAccessLog(wal).orElse(new ArrayList<>());
        }
    }
    serdeFactory.setQueueMap(null);
    for (final RepositoryRecord record : recordList) {
        final ContentClaim claim = record.getCurrentClaim();
        if (claim != null) {
            claimManager.incrementClaimantCount(claim.getResourceClaim());
        }
    }
    // Determine the next sequence number for FlowFiles
    int numFlowFilesMissingQueue = 0;
    long maxId = minimumSequenceNumber;
    for (final RepositoryRecord record : recordList) {
        final long recordId = serdeFactory.getRecordIdentifier(record);
        if (recordId > maxId) {
            maxId = recordId;
        }
        final FlowFileRecord flowFile = record.getCurrent();
        final FlowFileQueue queue = record.getOriginalQueue();
        if (queue == null) {
            numFlowFilesMissingQueue++;
        } else {
            queue.put(flowFile);
        }
    }
    // Set the AtomicLong to 1 more than the max ID so that calls to #getNextFlowFileSequence() will
    // return the appropriate number.
    flowFileSequenceGenerator.set(maxId + 1);
    logger.info("Successfully restored {} FlowFiles", recordList.size() - numFlowFilesMissingQueue);
    if (numFlowFilesMissingQueue > 0) {
        logger.warn("On recovery, found {} FlowFiles whose queue no longer exists. These FlowFiles will be dropped.", numFlowFilesMissingQueue);
    }
    final Runnable checkpointRunnable = new Runnable() {

        @Override
        public void run() {
            try {
                logger.info("Initiating checkpoint of FlowFile Repository");
                final long start = System.nanoTime();
                final int numRecordsCheckpointed = checkpoint();
                final long end = System.nanoTime();
                final long millis = TimeUnit.MILLISECONDS.convert(end - start, TimeUnit.NANOSECONDS);
                logger.info("Successfully checkpointed FlowFile Repository with {} records in {} milliseconds", numRecordsCheckpointed, millis);
            } catch (final Throwable t) {
                logger.error("Unable to checkpoint FlowFile Repository due to " + t.toString(), t);
            }
        }
    };
    checkpointFuture = checkpointExecutor.scheduleWithFixedDelay(checkpointRunnable, checkpointDelayMillis, checkpointDelayMillis, TimeUnit.MILLISECONDS);
    return maxId;
}
Also used : HashMap(java.util.HashMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) ArrayList(java.util.ArrayList) FlowFileQueue(org.apache.nifi.controller.queue.FlowFileQueue) ContentClaim(org.apache.nifi.controller.repository.claim.ContentClaim)

Example 28 with ContentClaim

use of org.apache.nifi.controller.repository.claim.ContentClaim in project nifi by apache.

the class WriteAheadFlowFileRepository method updateRepository.

private void updateRepository(final Collection<RepositoryRecord> records, final boolean sync) throws IOException {
    for (final RepositoryRecord record : records) {
        if (record.getType() != RepositoryRecordType.DELETE && record.getType() != RepositoryRecordType.CONTENTMISSING && record.getType() != RepositoryRecordType.CLEANUP_TRANSIENT_CLAIMS && record.getDestination() == null) {
            throw new IllegalArgumentException("Record " + record + " has no destination and Type is " + record.getType());
        }
    }
    // Partition records by whether or not their type is 'CLEANUP_TRANSIENT_CLAIMS'. We do this because we don't want to send
    // these types of records to the Write-Ahead Log.
    final Map<Boolean, List<RepositoryRecord>> partitionedRecords = records.stream().collect(Collectors.partitioningBy(record -> record.getType() == RepositoryRecordType.CLEANUP_TRANSIENT_CLAIMS));
    List<RepositoryRecord> recordsForWal = partitionedRecords.get(Boolean.FALSE);
    if (recordsForWal == null) {
        recordsForWal = Collections.emptyList();
    }
    // update the repository.
    final int partitionIndex = wal.update(recordsForWal, sync);
    // The below code is not entirely thread-safe, but we are OK with that because the results aren't really harmful.
    // Specifically, if two different threads call updateRepository with DELETE records for the same Content Claim,
    // it's quite possible for claimant count to be 0 below, which results in two different threads adding the Content
    // Claim to the 'claimsAwaitDestruction' map. As a result, we can call #markDestructable with the same ContentClaim
    // multiple times, and the #markDestructable method is not necessarily idempotent.
    // However, the result of this is that the FileSystem Repository may end up trying to remove the content multiple times.
    // This does not, however, cause problems, as ContentRepository should handle this
    // This does indicate that some refactoring should probably be performed, though, as this is not a very clean interface.
    final Set<ResourceClaim> claimsToAdd = new HashSet<>();
    for (final RepositoryRecord record : records) {
        if (record.getType() == RepositoryRecordType.DELETE) {
            // For any DELETE record that we have, if claim is destructible, mark it so
            if (record.getCurrentClaim() != null && isDestructable(record.getCurrentClaim())) {
                claimsToAdd.add(record.getCurrentClaim().getResourceClaim());
            }
            // If the original claim is different than the current claim and the original claim is destructible, mark it so
            if (record.getOriginalClaim() != null && !record.getOriginalClaim().equals(record.getCurrentClaim()) && isDestructable(record.getOriginalClaim())) {
                claimsToAdd.add(record.getOriginalClaim().getResourceClaim());
            }
        } else if (record.getType() == RepositoryRecordType.UPDATE) {
            // if we have an update, and the original is no longer needed, mark original as destructible
            if (record.getOriginalClaim() != null && record.getCurrentClaim() != record.getOriginalClaim() && isDestructable(record.getOriginalClaim())) {
                claimsToAdd.add(record.getOriginalClaim().getResourceClaim());
            }
        }
        final List<ContentClaim> transientClaims = record.getTransientClaims();
        if (transientClaims != null) {
            for (final ContentClaim transientClaim : transientClaims) {
                if (isDestructable(transientClaim)) {
                    claimsToAdd.add(transientClaim.getResourceClaim());
                }
            }
        }
    }
    if (!claimsToAdd.isEmpty()) {
        // Get / Register a Set<ContentClaim> for the given Partiton Index
        final Integer partitionKey = Integer.valueOf(partitionIndex);
        BlockingQueue<ResourceClaim> claimQueue = claimsAwaitingDestruction.get(partitionKey);
        if (claimQueue == null) {
            claimQueue = new LinkedBlockingQueue<>();
            final BlockingQueue<ResourceClaim> existingClaimQueue = claimsAwaitingDestruction.putIfAbsent(partitionKey, claimQueue);
            if (existingClaimQueue != null) {
                claimQueue = existingClaimQueue;
            }
        }
        claimQueue.addAll(claimsToAdd);
    }
}
Also used : ContentClaim(org.apache.nifi.controller.repository.claim.ContentClaim) WriteAheadRepository(org.wali.WriteAheadRepository) ScheduledFuture(java.util.concurrent.ScheduledFuture) SortedSet(java.util.SortedSet) SequentialAccessWriteAheadLog(org.apache.nifi.wali.SequentialAccessWriteAheadLog) LoggerFactory(org.slf4j.LoggerFactory) SyncListener(org.wali.SyncListener) HashMap(java.util.HashMap) TreeSet(java.util.TreeSet) ArrayList(java.util.ArrayList) ConcurrentMap(java.util.concurrent.ConcurrentMap) HashSet(java.util.HashSet) MinimalLockingWriteAheadLog(org.wali.MinimalLockingWriteAheadLog) ResourceClaim(org.apache.nifi.controller.repository.claim.ResourceClaim) Map(java.util.Map) ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) Path(java.nio.file.Path) Logger(org.slf4j.Logger) Files(java.nio.file.Files) Collection(java.util.Collection) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) Set(java.util.Set) IOException(java.io.IOException) BlockingQueue(java.util.concurrent.BlockingQueue) LinkedBlockingQueue(java.util.concurrent.LinkedBlockingQueue) Collectors(java.util.stream.Collectors) File(java.io.File) Executors(java.util.concurrent.Executors) TimeUnit(java.util.concurrent.TimeUnit) AtomicLong(java.util.concurrent.atomic.AtomicLong) ResourceClaimManager(org.apache.nifi.controller.repository.claim.ResourceClaimManager) List(java.util.List) FormatUtils(org.apache.nifi.util.FormatUtils) NiFiProperties(org.apache.nifi.util.NiFiProperties) Optional(java.util.Optional) Collections(java.util.Collections) FlowFileQueue(org.apache.nifi.controller.queue.FlowFileQueue) ContentClaim(org.apache.nifi.controller.repository.claim.ContentClaim) ArrayList(java.util.ArrayList) List(java.util.List) ResourceClaim(org.apache.nifi.controller.repository.claim.ResourceClaim) HashSet(java.util.HashSet)

Example 29 with ContentClaim

use of org.apache.nifi.controller.repository.claim.ContentClaim in project nifi by apache.

the class SchemaSwapSerializer method serializeFlowFiles.

@Override
public void serializeFlowFiles(final List<FlowFileRecord> toSwap, final FlowFileQueue queue, final String swapLocation, final OutputStream out) throws IOException {
    schema.writeTo(out);
    long contentSize = 0L;
    long maxFlowFileId = -1L;
    final List<ResourceClaim> resourceClaims = new ArrayList<>();
    for (final FlowFileRecord flowFile : toSwap) {
        contentSize += flowFile.getSize();
        if (flowFile.getId() > maxFlowFileId) {
            maxFlowFileId = flowFile.getId();
        }
        final ContentClaim contentClaim = flowFile.getContentClaim();
        if (contentClaim != null) {
            resourceClaims.add(contentClaim.getResourceClaim());
        }
    }
    final QueueSize queueSize = new QueueSize(toSwap.size(), contentSize);
    final SwapSummary swapSummary = new StandardSwapSummary(queueSize, maxFlowFileId, resourceClaims);
    final Record summaryRecord = new SwapSummaryFieldMap(swapSummary, queue.getIdentifier(), SwapSchema.SWAP_SUMMARY_SCHEMA_V1);
    final List<Record> flowFileRecords = toSwap.stream().map(flowFile -> new FlowFileRecordFieldMap(flowFile, flowFileSchema)).collect(Collectors.toList());
    // Create a simple record to hold the summary and the flowfile contents
    final RecordField summaryField = new SimpleRecordField(SwapSchema.SWAP_SUMMARY, FieldType.COMPLEX, Repetition.EXACTLY_ONE);
    final RecordField contentsField = new ComplexRecordField(SwapSchema.FLOWFILE_CONTENTS, Repetition.ZERO_OR_MORE, FlowFileSchema.FLOWFILE_SCHEMA_V2.getFields());
    final List<RecordField> fields = new ArrayList<>(2);
    fields.add(summaryField);
    fields.add(contentsField);
    final Map<RecordField, Object> swapFileMap = new LinkedHashMap<>();
    swapFileMap.put(summaryField, summaryRecord);
    swapFileMap.put(contentsField, flowFileRecords);
    final Record swapFileRecord = new FieldMapRecord(swapFileMap, new RecordSchema(fields));
    final SchemaRecordWriter writer = new SchemaRecordWriter();
    writer.writeRecord(swapFileRecord, out);
    out.flush();
}
Also used : OutputStream(java.io.OutputStream) ContentClaim(org.apache.nifi.controller.repository.claim.ContentClaim) FlowFileSchema(org.apache.nifi.controller.repository.schema.FlowFileSchema) Record(org.apache.nifi.repository.schema.Record) RecordField(org.apache.nifi.repository.schema.RecordField) ComplexRecordField(org.apache.nifi.repository.schema.ComplexRecordField) FlowFileRecord(org.apache.nifi.controller.repository.FlowFileRecord) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) QueueSize(org.apache.nifi.controller.queue.QueueSize) ArrayList(java.util.ArrayList) FieldMapRecord(org.apache.nifi.repository.schema.FieldMapRecord) RecordSchema(org.apache.nifi.repository.schema.RecordSchema) LinkedHashMap(java.util.LinkedHashMap) SwapSummary(org.apache.nifi.controller.repository.SwapSummary) FieldType(org.apache.nifi.repository.schema.FieldType) List(java.util.List) ResourceClaim(org.apache.nifi.controller.repository.claim.ResourceClaim) Map(java.util.Map) FlowFileRecordFieldMap(org.apache.nifi.controller.repository.schema.FlowFileRecordFieldMap) SimpleRecordField(org.apache.nifi.repository.schema.SimpleRecordField) Repetition(org.apache.nifi.repository.schema.Repetition) SchemaRecordWriter(org.apache.nifi.repository.schema.SchemaRecordWriter) FlowFileQueue(org.apache.nifi.controller.queue.FlowFileQueue) FieldMapRecord(org.apache.nifi.repository.schema.FieldMapRecord) RecordField(org.apache.nifi.repository.schema.RecordField) ComplexRecordField(org.apache.nifi.repository.schema.ComplexRecordField) SimpleRecordField(org.apache.nifi.repository.schema.SimpleRecordField) ArrayList(java.util.ArrayList) SwapSummary(org.apache.nifi.controller.repository.SwapSummary) LinkedHashMap(java.util.LinkedHashMap) QueueSize(org.apache.nifi.controller.queue.QueueSize) ContentClaim(org.apache.nifi.controller.repository.claim.ContentClaim) SimpleRecordField(org.apache.nifi.repository.schema.SimpleRecordField) FlowFileRecordFieldMap(org.apache.nifi.controller.repository.schema.FlowFileRecordFieldMap) ResourceClaim(org.apache.nifi.controller.repository.claim.ResourceClaim) Record(org.apache.nifi.repository.schema.Record) FlowFileRecord(org.apache.nifi.controller.repository.FlowFileRecord) FieldMapRecord(org.apache.nifi.repository.schema.FieldMapRecord) ComplexRecordField(org.apache.nifi.repository.schema.ComplexRecordField) FlowFileRecord(org.apache.nifi.controller.repository.FlowFileRecord) RecordSchema(org.apache.nifi.repository.schema.RecordSchema) SchemaRecordWriter(org.apache.nifi.repository.schema.SchemaRecordWriter)

Example 30 with ContentClaim

use of org.apache.nifi.controller.repository.claim.ContentClaim in project nifi by apache.

the class SimpleSwapSerializer method serializeFlowFiles.

@Override
public void serializeFlowFiles(final List<FlowFileRecord> toSwap, final FlowFileQueue queue, final String swapLocation, final OutputStream destination) throws IOException {
    if (toSwap == null || toSwap.isEmpty()) {
        return;
    }
    long contentSize = 0L;
    for (final FlowFileRecord record : toSwap) {
        contentSize += record.getSize();
    }
    // persist record to disk via the swap file
    final DataOutputStream out = new DataOutputStream(destination);
    try {
        out.writeInt(SWAP_ENCODING_VERSION);
        out.writeUTF(queue.getIdentifier());
        out.writeInt(toSwap.size());
        out.writeLong(contentSize);
        // get the max record id and write that out so that we know it quickly for restoration
        long maxRecordId = 0L;
        for (final FlowFileRecord flowFile : toSwap) {
            if (flowFile.getId() > maxRecordId) {
                maxRecordId = flowFile.getId();
            }
        }
        out.writeLong(maxRecordId);
        for (final FlowFileRecord flowFile : toSwap) {
            out.writeLong(flowFile.getId());
            out.writeLong(flowFile.getEntryDate());
            out.writeLong(flowFile.getLineageStartDate());
            out.writeLong(flowFile.getLineageStartIndex());
            out.writeLong(flowFile.getLastQueueDate());
            out.writeLong(flowFile.getQueueDateIndex());
            out.writeLong(flowFile.getSize());
            final ContentClaim claim = flowFile.getContentClaim();
            if (claim == null) {
                out.writeBoolean(false);
            } else {
                out.writeBoolean(true);
                final ResourceClaim resourceClaim = claim.getResourceClaim();
                out.writeUTF(resourceClaim.getId());
                out.writeUTF(resourceClaim.getContainer());
                out.writeUTF(resourceClaim.getSection());
                out.writeLong(claim.getOffset());
                out.writeLong(claim.getLength());
                out.writeLong(flowFile.getContentClaimOffset());
                out.writeBoolean(resourceClaim.isLossTolerant());
            }
            final Map<String, String> attributes = flowFile.getAttributes();
            out.writeInt(attributes.size());
            for (final Map.Entry<String, String> entry : attributes.entrySet()) {
                writeString(entry.getKey(), out);
                writeString(entry.getValue(), out);
            }
        }
    } finally {
        out.flush();
    }
    logger.info("Successfully swapped out {} FlowFiles from {} to Swap File {}", toSwap.size(), queue, swapLocation);
}
Also used : ContentClaim(org.apache.nifi.controller.repository.claim.ContentClaim) DataOutputStream(java.io.DataOutputStream) ResourceClaim(org.apache.nifi.controller.repository.claim.ResourceClaim) FlowFileRecord(org.apache.nifi.controller.repository.FlowFileRecord) Map(java.util.Map)

Aggregations

ContentClaim (org.apache.nifi.controller.repository.claim.ContentClaim)79 StandardContentClaim (org.apache.nifi.controller.repository.claim.StandardContentClaim)51 Test (org.junit.Test)40 OutputStream (java.io.OutputStream)39 ByteArrayOutputStream (java.io.ByteArrayOutputStream)30 IOException (java.io.IOException)26 InputStream (java.io.InputStream)22 ResourceClaim (org.apache.nifi.controller.repository.claim.ResourceClaim)22 ByteArrayInputStream (java.io.ByteArrayInputStream)20 FlowFile (org.apache.nifi.flowfile.FlowFile)19 Path (java.nio.file.Path)18 ArrayList (java.util.ArrayList)16 HashMap (java.util.HashMap)16 FlowFileQueue (org.apache.nifi.controller.queue.FlowFileQueue)14 Map (java.util.Map)13 FileOutputStream (java.io.FileOutputStream)12 FilterOutputStream (java.io.FilterOutputStream)12 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)12 FlowFileAccessException (org.apache.nifi.processor.exception.FlowFileAccessException)12 ProvenanceEventRecord (org.apache.nifi.provenance.ProvenanceEventRecord)12