use of org.apache.nifi.controller.repository.FlowFileRecord in project nifi by apache.
the class TestStandardFlowFileQueue method testBackPressure.
@Test
public void testBackPressure() {
queue.setBackPressureObjectThreshold(10);
assertTrue(queue.isEmpty());
assertTrue(queue.isActiveQueueEmpty());
assertFalse(queue.isFull());
for (int i = 0; i < 9; i++) {
queue.put(new TestFlowFile());
assertFalse(queue.isFull());
assertFalse(queue.isEmpty());
assertFalse(queue.isActiveQueueEmpty());
}
queue.put(new TestFlowFile());
assertTrue(queue.isFull());
assertFalse(queue.isEmpty());
assertFalse(queue.isActiveQueueEmpty());
final Set<FlowFileRecord> expiredRecords = new HashSet<>();
final FlowFileRecord polled = queue.poll(expiredRecords);
assertNotNull(polled);
assertTrue(expiredRecords.isEmpty());
assertFalse(queue.isEmpty());
assertFalse(queue.isActiveQueueEmpty());
// queue is still full because FlowFile has not yet been acknowledged.
assertTrue(queue.isFull());
queue.acknowledge(polled);
// FlowFile has been acknowledged; queue should no longer be full.
assertFalse(queue.isFull());
assertFalse(queue.isEmpty());
assertFalse(queue.isActiveQueueEmpty());
}
use of org.apache.nifi.controller.repository.FlowFileRecord in project nifi by apache.
the class TestStandardFlowFileQueue method testSwapIn.
@Test
public void testSwapIn() {
for (int i = 1; i <= 20000; i++) {
queue.put(new TestFlowFile());
}
assertEquals(1, swapManager.swappedOut.size());
queue.put(new TestFlowFile());
assertEquals(1, swapManager.swappedOut.size());
final Set<FlowFileRecord> exp = new HashSet<>();
for (int i = 0; i < 9999; i++) {
final FlowFileRecord flowFile = queue.poll(exp);
assertNotNull(flowFile);
assertEquals(1, queue.getUnacknowledgedQueueSize().getObjectCount());
assertEquals(1, queue.getUnacknowledgedQueueSize().getByteCount());
queue.acknowledge(Collections.singleton(flowFile));
assertEquals(0, queue.getUnacknowledgedQueueSize().getObjectCount());
assertEquals(0, queue.getUnacknowledgedQueueSize().getByteCount());
}
assertEquals(0, swapManager.swapInCalledCount);
assertEquals(1, queue.getActiveQueueSize().getObjectCount());
assertNotNull(queue.poll(exp));
assertEquals(0, swapManager.swapInCalledCount);
assertEquals(0, queue.getActiveQueueSize().getObjectCount());
assertEquals(1, swapManager.swapOutCalledCount);
// this should trigger a swap-in of 10,000 records, and then pull 1 off the top.
assertNotNull(queue.poll(exp));
assertEquals(1, swapManager.swapInCalledCount);
assertEquals(9999, queue.getActiveQueueSize().getObjectCount());
assertTrue(swapManager.swappedOut.isEmpty());
queue.poll(exp);
}
use of org.apache.nifi.controller.repository.FlowFileRecord in project nifi by apache.
the class TestSchemaSwapSerializerDeserializer method testRoundTripSerializeDeserializeSummary.
@Test
public void testRoundTripSerializeDeserializeSummary() throws IOException {
final ResourceClaimManager resourceClaimManager = new StandardResourceClaimManager();
final ResourceClaim firstResourceClaim = resourceClaimManager.newResourceClaim("container", "section", "id", true, false);
resourceClaimManager.incrementClaimantCount(firstResourceClaim);
final List<FlowFileRecord> toSwap = new ArrayList<>(10000);
final Map<String, String> attrs = new HashMap<>();
long size = 0L;
final ContentClaim firstClaim = MockFlowFile.createContentClaim("id", resourceClaimManager);
for (int i = 0; i < 10000; i++) {
attrs.put("i", String.valueOf(i));
final FlowFileRecord ff = i < 2 ? new MockFlowFile(attrs, i, firstClaim) : new MockFlowFile(attrs, i, resourceClaimManager);
toSwap.add(ff);
size += i;
}
final FlowFileQueue flowFileQueue = Mockito.mock(FlowFileQueue.class);
Mockito.when(flowFileQueue.getIdentifier()).thenReturn("87bb99fe-412c-49f6-a441-d1b0af4e20b4");
final String swapLocation = "target/testRoundTrip.swap";
final File swapFile = new File(swapLocation);
Files.deleteIfExists(swapFile.toPath());
final SwapSerializer serializer = new SchemaSwapSerializer();
try (final FileOutputStream fos = new FileOutputStream(swapFile)) {
serializer.serializeFlowFiles(toSwap, flowFileQueue, swapLocation, fos);
}
final SwapDeserializer deserializer = new SchemaSwapDeserializer();
final SwapSummary swapSummary;
try (final FileInputStream fis = new FileInputStream(swapFile);
final DataInputStream dis = new DataInputStream(fis)) {
swapSummary = deserializer.getSwapSummary(dis, swapLocation, resourceClaimManager);
}
assertEquals(10000, swapSummary.getQueueSize().getObjectCount());
assertEquals(size, swapSummary.getQueueSize().getByteCount());
assertEquals(9999, swapSummary.getMaxFlowFileId().intValue());
final List<ResourceClaim> resourceClaims = swapSummary.getResourceClaims();
assertEquals(10000, resourceClaims.size());
assertFalse(resourceClaims.stream().anyMatch(claim -> claim == null));
assertEquals(2, resourceClaims.stream().filter(claim -> claim.getId().equals("id")).collect(Collectors.counting()).intValue());
final Set<ResourceClaim> uniqueClaims = new HashSet<>(resourceClaims);
assertEquals(9999, uniqueClaims.size());
}
use of org.apache.nifi.controller.repository.FlowFileRecord in project nifi by apache.
the class TestSimpleSwapSerializerDeserializer method testWritePerformance.
@Test
@Ignore("For manual testing only. Not intended to be run as part of the automated unit tests but can " + "be convenient for determining a baseline for performance if making modifications.")
public void testWritePerformance() throws IOException, InterruptedException {
final ResourceClaimManager resourceClaimManager = new StandardResourceClaimManager();
final List<FlowFileRecord> toSwap = new ArrayList<>(10000);
final Map<String, String> attrs = new HashMap<>();
for (int i = 0; i < 10000; i++) {
attrs.put("i", String.valueOf(i));
final FlowFileRecord ff = new MockFlowFile(attrs, i, resourceClaimManager);
toSwap.add(ff);
}
final FlowFileQueue flowFileQueue = Mockito.mock(FlowFileQueue.class);
Mockito.when(flowFileQueue.getIdentifier()).thenReturn("87bb99fe-412c-49f6-a441-d1b0af4e20b4");
final String swapLocation = "target/testRoundTrip.swap";
final int iterations = 1000;
final long start = System.nanoTime();
final SwapSerializer serializer = new SimpleSwapSerializer();
for (int i = 0; i < iterations; i++) {
try (final OutputStream out = new NullOutputStream()) {
serializer.serializeFlowFiles(toSwap, flowFileQueue, swapLocation, out);
}
}
final long millis = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - start);
System.out.println("Wrote " + iterations + " Swap Files in " + millis + " millis");
}
use of org.apache.nifi.controller.repository.FlowFileRecord in project nifi by apache.
the class StandardFlowFileQueue method migrateSwapToActive.
/**
* If there are FlowFiles waiting on the swap queue, move them to the active
* queue until we meet our threshold. This prevents us from having to swap
* them to disk & then back out.
*
* This method MUST be called with the writeLock held.
*/
private void migrateSwapToActive() {
// Migrate as many FlowFiles as we can from the Swap Queue to the Active Queue, so that we don't
// have to swap them out & then swap them back in.
// If we don't do this, we could get into a situation where we have potentially thousands of FlowFiles
// sitting on the Swap Queue but not getting processed because there aren't enough to be swapped out.
// In particular, this can happen if the queue is typically filled with surges.
// For example, if the queue has 25,000 FlowFiles come in, it may process 20,000 of them and leave
// 5,000 sitting on the Swap Queue. If it then takes an hour for an additional 5,000 FlowFiles to come in,
// those FlowFiles sitting on the Swap Queue will sit there for an hour, waiting to be swapped out and
// swapped back in again.
// Calling this method when records are polled prevents this condition by migrating FlowFiles from the
// Swap Queue to the Active Queue. However, we don't do this if there are FlowFiles already swapped out
// to disk, because we want them to be swapped back in in the same order that they were swapped out.
final int activeQueueSize = activeQueue.size();
if (activeQueueSize > 0 && activeQueueSize > swapThreshold - SWAP_RECORD_POLL_SIZE) {
return;
}
// first.
if (!swapLocations.isEmpty()) {
final String swapLocation = swapLocations.get(0);
boolean partialContents = false;
SwapContents swapContents = null;
try {
swapContents = swapManager.swapIn(swapLocation, this);
swapLocations.remove(0);
} catch (final IncompleteSwapFileException isfe) {
logger.error("Failed to swap in all FlowFiles from Swap File {}; Swap File ended prematurely. The records that were present will still be swapped in", swapLocation);
logger.error("", isfe);
swapContents = isfe.getPartialContents();
partialContents = true;
swapLocations.remove(0);
} catch (final FileNotFoundException fnfe) {
logger.error("Failed to swap in FlowFiles from Swap File {} because the Swap File can no longer be found", swapLocation);
if (eventReporter != null) {
eventReporter.reportEvent(Severity.ERROR, "Swap File", "Failed to swap in FlowFiles from Swap File " + swapLocation + " because the Swap File can no longer be found");
}
swapLocations.remove(0);
return;
} catch (final IOException ioe) {
logger.error("Failed to swap in FlowFiles from Swap File {}; Swap File appears to be corrupt!", swapLocation);
logger.error("", ioe);
if (eventReporter != null) {
eventReporter.reportEvent(Severity.ERROR, "Swap File", "Failed to swap in FlowFiles from Swap File " + swapLocation + "; Swap File appears to be corrupt! Some FlowFiles in the queue may not be accessible. See logs for more information.");
}
// drive and we may have connectivity problems, etc.
return;
} catch (final Throwable t) {
logger.error("Failed to swap in FlowFiles from Swap File {}", swapLocation, t);
// in swapLocations, we will continue to retry.
throw t;
}
final QueueSize swapSize = swapContents.getSummary().getQueueSize();
final long contentSize = swapSize.getByteCount();
final int flowFileCount = swapSize.getObjectCount();
incrementSwapQueueSize(-flowFileCount, -contentSize, -1);
if (partialContents) {
// if we have partial results, we need to calculate the content size of the flowfiles
// actually swapped back in.
long contentSizeSwappedIn = 0L;
for (final FlowFileRecord swappedIn : swapContents.getFlowFiles()) {
contentSizeSwappedIn += swappedIn.getSize();
}
incrementActiveQueueSize(swapContents.getFlowFiles().size(), contentSizeSwappedIn);
} else {
// we swapped in the whole swap file. We can just use the info that we got from the summary.
incrementActiveQueueSize(flowFileCount, contentSize);
}
activeQueue.addAll(swapContents.getFlowFiles());
return;
}
// of other checks for 99.999% of the cases.
if (size.get().swappedCount == 0 && swapQueue.isEmpty()) {
return;
}
if (size.get().swappedCount > swapQueue.size()) {
// the files to be swapped back in first
return;
}
int recordsMigrated = 0;
long bytesMigrated = 0L;
final Iterator<FlowFileRecord> swapItr = swapQueue.iterator();
while (activeQueue.size() < swapThreshold && swapItr.hasNext()) {
final FlowFileRecord toMigrate = swapItr.next();
activeQueue.add(toMigrate);
bytesMigrated += toMigrate.getSize();
recordsMigrated++;
swapItr.remove();
}
if (recordsMigrated > 0) {
incrementActiveQueueSize(recordsMigrated, bytesMigrated);
incrementSwapQueueSize(-recordsMigrated, -bytesMigrated, 0);
}
if (size.get().swappedCount == 0) {
swapMode = false;
}
}
Aggregations