use of org.apache.nifi.controller.repository.FlowFileRecord in project nifi by apache.
the class FlowController method replayFlowFile.
public ProvenanceEventRecord replayFlowFile(final ProvenanceEventRecord event, final NiFiUser user) throws IOException {
if (event == null) {
throw new NullPointerException();
}
// Check that the event is a valid type.
final ProvenanceEventType type = event.getEventType();
if (type == ProvenanceEventType.JOIN) {
throw new IllegalArgumentException("Cannot replay events that are created from multiple parents");
}
// Make sure event has the Content Claim info
final Long contentSize = event.getPreviousFileSize();
final String contentClaimId = event.getPreviousContentClaimIdentifier();
final String contentClaimSection = event.getPreviousContentClaimSection();
final String contentClaimContainer = event.getPreviousContentClaimContainer();
if (contentSize == null || contentClaimId == null || contentClaimSection == null || contentClaimContainer == null) {
throw new IllegalArgumentException("Cannot replay data from Provenance Event because the event does not contain the required Content Claim");
}
// Make sure that the source queue exists
if (event.getSourceQueueIdentifier() == null) {
throw new IllegalArgumentException("Cannot replay data from Provenance Event because the event does not specify the Source FlowFile Queue");
}
final List<Connection> connections = getGroup(getRootGroupId()).findAllConnections();
FlowFileQueue queue = null;
for (final Connection connection : connections) {
if (event.getSourceQueueIdentifier().equals(connection.getIdentifier())) {
queue = connection.getFlowFileQueue();
break;
}
}
if (queue == null) {
throw new IllegalStateException("Cannot replay data from Provenance Event because the Source FlowFile Queue with ID " + event.getSourceQueueIdentifier() + " no longer exists");
}
// Create the ContentClaim. To do so, we first need the appropriate Resource Claim. Because we don't know whether or
// not the Resource Claim is still active, we first call ResourceClaimManager.getResourceClaim. If this returns
// null, then we know that the Resource Claim is no longer active and can just create a new one that is not writable.
// It's critical though that we first call getResourceClaim because otherwise, if the Resource Claim is active and we
// create a new one that is not writable, we could end up archiving or destroying the Resource Claim while it's still
// being written to by the Content Repository. This is important only because we are creating a FlowFile with this Resource
// Claim. If, for instance, we are simply creating the claim to request its content, as in #getContentAvailability, etc.
// then this is not necessary.
ResourceClaim resourceClaim = resourceClaimManager.getResourceClaim(event.getPreviousContentClaimContainer(), event.getPreviousContentClaimSection(), event.getPreviousContentClaimIdentifier());
if (resourceClaim == null) {
resourceClaim = resourceClaimManager.newResourceClaim(event.getPreviousContentClaimContainer(), event.getPreviousContentClaimSection(), event.getPreviousContentClaimIdentifier(), false, false);
}
// Increment Claimant Count, since we will now be referencing the Content Claim
resourceClaimManager.incrementClaimantCount(resourceClaim);
final long claimOffset = event.getPreviousContentClaimOffset() == null ? 0L : event.getPreviousContentClaimOffset().longValue();
final StandardContentClaim contentClaim = new StandardContentClaim(resourceClaim, claimOffset);
contentClaim.setLength(event.getPreviousFileSize() == null ? -1L : event.getPreviousFileSize());
if (!contentRepository.isAccessible(contentClaim)) {
resourceClaimManager.decrementClaimantCount(resourceClaim);
throw new IllegalStateException("Cannot replay data from Provenance Event because the data is no longer available in the Content Repository");
}
final String parentUUID = event.getFlowFileUuid();
final String newFlowFileUUID = UUID.randomUUID().toString();
// We need to create a new FlowFile by populating it with information from the
// Provenance Event. Particularly of note here is that we are setting the FlowFile's
// contentClaimOffset to 0. This is done for backward compatibility reasons. ContentClaim
// used to not have a concept of an offset, and the offset was tied only to the FlowFile. This
// was later refactored, so that the offset was part of the ContentClaim. If we set the offset
// in both places, we'll end up skipping over that many bytes twice instead of once (once to get
// to the beginning of the Content Claim and again to get to the offset within that Content Claim).
// To avoid this, we just always set the offset in the Content Claim itself and set the
// FlowFileRecord's contentClaimOffset to 0.
final FlowFileRecord flowFileRecord = new StandardFlowFileRecord.Builder().addAttributes(event.getPreviousAttributes()).contentClaim(contentClaim).contentClaimOffset(// use 0 because we used the content claim offset in the Content Claim itself
0L).entryDate(System.currentTimeMillis()).id(flowFileRepository.getNextFlowFileSequence()).lineageStart(event.getLineageStartDate(), 0L).size(contentSize.longValue()).addAttribute("flowfile.replay", "true").addAttribute("flowfile.replay.timestamp", String.valueOf(new Date())).addAttribute(CoreAttributes.UUID.key(), newFlowFileUUID).removeAttributes(CoreAttributes.DISCARD_REASON.key(), CoreAttributes.ALTERNATE_IDENTIFIER.key()).build();
// Register a Provenance Event to indicate that we replayed the data.
final ProvenanceEventRecord replayEvent = new StandardProvenanceEventRecord.Builder().setEventType(ProvenanceEventType.REPLAY).addChildUuid(newFlowFileUUID).addParentUuid(parentUUID).setFlowFileUUID(parentUUID).setAttributes(Collections.emptyMap(), flowFileRecord.getAttributes()).setCurrentContentClaim(event.getContentClaimContainer(), event.getContentClaimSection(), event.getContentClaimIdentifier(), event.getContentClaimOffset(), event.getFileSize()).setDetails("Replay requested by " + user.getIdentity()).setEventTime(System.currentTimeMillis()).setFlowFileEntryDate(System.currentTimeMillis()).setLineageStartDate(event.getLineageStartDate()).setComponentType(event.getComponentType()).setComponentId(event.getComponentId()).build();
provenanceRepository.registerEvent(replayEvent);
// Update the FlowFile Repository to indicate that we have added the FlowFile to the flow
final StandardRepositoryRecord record = new StandardRepositoryRecord(queue);
record.setWorking(flowFileRecord);
record.setDestination(queue);
flowFileRepository.updateRepository(Collections.singleton(record));
// Enqueue the data
queue.put(flowFileRecord);
return replayEvent;
}
use of org.apache.nifi.controller.repository.FlowFileRecord in project nifi by apache.
the class TestStandardFlowFileQueue method testExpire.
@Test
public void testExpire() {
queue.setFlowFileExpiration("1 ms");
for (int i = 0; i < 100; i++) {
queue.put(new TestFlowFile());
}
// just make sure that the flowfiles have time to expire.
try {
Thread.sleep(100L);
} catch (final InterruptedException ie) {
}
final Set<FlowFileRecord> expiredRecords = new HashSet<>(100);
final FlowFileRecord pulled = queue.poll(expiredRecords);
assertNull(pulled);
assertEquals(100, expiredRecords.size());
final QueueSize activeSize = queue.getActiveQueueSize();
assertEquals(0, activeSize.getObjectCount());
assertEquals(0L, activeSize.getByteCount());
final QueueSize unackSize = queue.getUnacknowledgedQueueSize();
assertEquals(0, unackSize.getObjectCount());
assertEquals(0L, unackSize.getByteCount());
}
use of org.apache.nifi.controller.repository.FlowFileRecord in project nifi by apache.
the class TestStandardFlowFileQueue method testBackPressureAfterPollFilter.
@Test
public void testBackPressureAfterPollFilter() throws InterruptedException {
queue.setBackPressureObjectThreshold(10);
queue.setFlowFileExpiration("10 millis");
for (int i = 0; i < 9; i++) {
queue.put(new TestFlowFile());
assertFalse(queue.isFull());
}
queue.put(new TestFlowFile());
assertTrue(queue.isFull());
Thread.sleep(100L);
final FlowFileFilter filter = new FlowFileFilter() {
@Override
public FlowFileFilterResult filter(final FlowFile flowFile) {
return FlowFileFilterResult.REJECT_AND_CONTINUE;
}
};
final Set<FlowFileRecord> expiredRecords = new HashSet<>();
final List<FlowFileRecord> polled = queue.poll(filter, expiredRecords);
assertTrue(polled.isEmpty());
assertEquals(10, expiredRecords.size());
assertFalse(queue.isFull());
assertTrue(queue.isEmpty());
assertTrue(queue.isActiveQueueEmpty());
}
use of org.apache.nifi.controller.repository.FlowFileRecord in project nifi by apache.
the class TestStandardFlowFileQueue method testQueueCountsUpdatedWhenIncompleteSwapFile.
@Test
public void testQueueCountsUpdatedWhenIncompleteSwapFile() {
for (int i = 1; i <= 20000; i++) {
queue.put(new TestFlowFile());
}
assertEquals(20000, queue.size().getObjectCount());
assertEquals(20000, queue.size().getByteCount());
assertEquals(1, swapManager.swappedOut.size());
// when we swap in, cause an IncompleteSwapFileException to be
// thrown and contain only 9,999 of the 10,000 FlowFiles
swapManager.enableIncompleteSwapFileException(9999);
final Set<FlowFileRecord> expired = Collections.emptySet();
FlowFileRecord flowFile;
for (int i = 0; i < 10000; i++) {
flowFile = queue.poll(expired);
assertNotNull(flowFile);
queue.acknowledge(Collections.singleton(flowFile));
}
// 10,000 FlowFiles on queue - all swapped out
assertEquals(10000, queue.size().getObjectCount());
assertEquals(10000, queue.size().getByteCount());
assertEquals(1, swapManager.swappedOut.size());
assertEquals(0, swapManager.swapInCalledCount);
// Trigger swap in. This will remove 1 FlowFile from queue, leaving 9,999 but
// on swap in, we will get only 9,999 FlowFiles put onto the queue, and the queue size will
// be decremented by 10,000 (because the Swap File's header tells us that there are 10K
// FlowFiles, even though only 9999 are in the swap file)
flowFile = queue.poll(expired);
assertNotNull(flowFile);
queue.acknowledge(Collections.singleton(flowFile));
// size should be 9,998 because we lost 1 on Swap In, and then we pulled one above.
assertEquals(9998, queue.size().getObjectCount());
assertEquals(9998, queue.size().getByteCount());
assertEquals(0, swapManager.swappedOut.size());
assertEquals(1, swapManager.swapInCalledCount);
for (int i = 0; i < 9998; i++) {
flowFile = queue.poll(expired);
assertNotNull("Null FlowFile when i = " + i, flowFile);
queue.acknowledge(Collections.singleton(flowFile));
final QueueSize queueSize = queue.size();
assertEquals(9998 - i - 1, queueSize.getObjectCount());
assertEquals(9998 - i - 1, queueSize.getByteCount());
}
final QueueSize queueSize = queue.size();
assertEquals(0, queueSize.getObjectCount());
assertEquals(0L, queueSize.getByteCount());
flowFile = queue.poll(expired);
assertNull(flowFile);
}
use of org.apache.nifi.controller.repository.FlowFileRecord in project nifi by apache.
the class TestStandardFlowFileQueue method testBackPressureAfterPollSingle.
@Test
public void testBackPressureAfterPollSingle() throws InterruptedException {
queue.setBackPressureObjectThreshold(10);
queue.setFlowFileExpiration("10 millis");
for (int i = 0; i < 9; i++) {
queue.put(new TestFlowFile());
assertFalse(queue.isFull());
}
queue.put(new TestFlowFile());
assertTrue(queue.isFull());
Thread.sleep(100L);
final Set<FlowFileRecord> expiredRecords = new HashSet<>();
final FlowFileRecord polled = queue.poll(expiredRecords);
assertNull(polled);
assertEquals(10, expiredRecords.size());
assertFalse(queue.isFull());
assertTrue(queue.isEmpty());
assertTrue(queue.isActiveQueueEmpty());
}
Aggregations