Search in sources :

Example 1 with MinimalLockingWriteAheadLog

use of org.wali.MinimalLockingWriteAheadLog in project nifi by apache.

the class TestWriteAheadFlowFileRepository method testUpdatePerformance.

@Test
@Ignore("Intended only for local performance testing before/after making changes")
public void testUpdatePerformance() throws IOException, InterruptedException {
    final FlowFileQueue queue = new FlowFileQueue() {

        @Override
        public String getIdentifier() {
            return "4444";
        }

        @Override
        public List<FlowFilePrioritizer> getPriorities() {
            return null;
        }

        @Override
        public SwapSummary recoverSwappedFlowFiles() {
            return null;
        }

        @Override
        public void purgeSwapFiles() {
        }

        @Override
        public int getSwapFileCount() {
            return 0;
        }

        @Override
        public void setPriorities(List<FlowFilePrioritizer> newPriorities) {
        }

        @Override
        public void setBackPressureObjectThreshold(long maxQueueSize) {
        }

        @Override
        public long getBackPressureObjectThreshold() {
            return 0;
        }

        @Override
        public void setBackPressureDataSizeThreshold(String maxDataSize) {
        }

        @Override
        public String getBackPressureDataSizeThreshold() {
            return null;
        }

        @Override
        public QueueSize size() {
            return null;
        }

        @Override
        public boolean isEmpty() {
            return false;
        }

        @Override
        public boolean isActiveQueueEmpty() {
            return false;
        }

        @Override
        public QueueSize getUnacknowledgedQueueSize() {
            return null;
        }

        @Override
        public QueueSize getActiveQueueSize() {
            return size();
        }

        @Override
        public QueueSize getSwapQueueSize() {
            return null;
        }

        @Override
        public void acknowledge(FlowFileRecord flowFile) {
        }

        @Override
        public void acknowledge(Collection<FlowFileRecord> flowFiles) {
        }

        @Override
        public boolean isAllActiveFlowFilesPenalized() {
            return false;
        }

        @Override
        public boolean isAnyActiveFlowFilePenalized() {
            return false;
        }

        @Override
        public boolean isFull() {
            return false;
        }

        @Override
        public void put(FlowFileRecord file) {
        }

        @Override
        public void putAll(Collection<FlowFileRecord> files) {
        }

        @Override
        public FlowFileRecord poll(Set<FlowFileRecord> expiredRecords) {
            return null;
        }

        @Override
        public List<FlowFileRecord> poll(int maxResults, Set<FlowFileRecord> expiredRecords) {
            return null;
        }

        @Override
        public long drainQueue(Queue<FlowFileRecord> sourceQueue, List<FlowFileRecord> destination, int maxResults, Set<FlowFileRecord> expiredRecords) {
            return 0;
        }

        @Override
        public List<FlowFileRecord> poll(FlowFileFilter filter, Set<FlowFileRecord> expiredRecords) {
            return null;
        }

        @Override
        public String getFlowFileExpiration() {
            return null;
        }

        @Override
        public int getFlowFileExpiration(TimeUnit timeUnit) {
            return 0;
        }

        @Override
        public void setFlowFileExpiration(String flowExpirationPeriod) {
        }

        @Override
        public DropFlowFileStatus dropFlowFiles(String requestIdentifier, String requestor) {
            return null;
        }

        @Override
        public DropFlowFileStatus getDropFlowFileStatus(String requestIdentifier) {
            return null;
        }

        @Override
        public DropFlowFileStatus cancelDropFlowFileRequest(String requestIdentifier) {
            return null;
        }

        @Override
        public ListFlowFileStatus listFlowFiles(String requestIdentifier, int maxResults) {
            return null;
        }

        @Override
        public ListFlowFileStatus getListFlowFileStatus(String requestIdentifier) {
            return null;
        }

        @Override
        public ListFlowFileStatus cancelListFlowFileRequest(String requestIdentifier) {
            return null;
        }

        @Override
        public FlowFileRecord getFlowFile(String flowFileUuid) throws IOException {
            return null;
        }

        @Override
        public void verifyCanList() throws IllegalStateException {
        }
    };
    final int numPartitions = 16;
    final int numThreads = 8;
    final int totalUpdates = 160_000_000;
    final int batchSize = 10;
    final Path path = Paths.get("target/minimal-locking-repo");
    deleteRecursively(path.toFile());
    assertTrue(path.toFile().mkdirs());
    final ResourceClaimManager claimManager = new StandardResourceClaimManager();
    final RepositoryRecordSerdeFactory serdeFactory = new RepositoryRecordSerdeFactory(claimManager);
    final WriteAheadRepository<RepositoryRecord> repo = new MinimalLockingWriteAheadLog<>(path, numPartitions, serdeFactory, null);
    final Collection<RepositoryRecord> initialRecs = repo.recoverRecords();
    assertTrue(initialRecs.isEmpty());
    final int updateCountPerThread = totalUpdates / numThreads;
    final Thread[] threads = new Thread[numThreads];
    for (int j = 0; j < 2; j++) {
        for (int i = 0; i < numThreads; i++) {
            final Thread t = new Thread(new Runnable() {

                @Override
                public void run() {
                    final List<RepositoryRecord> records = new ArrayList<>();
                    final int numBatches = updateCountPerThread / batchSize;
                    final MockFlowFile baseFlowFile = new MockFlowFile(0L);
                    for (int i = 0; i < numBatches; i++) {
                        records.clear();
                        for (int k = 0; k < batchSize; k++) {
                            final FlowFileRecord flowFile = new MockFlowFile(i % 100_000, baseFlowFile);
                            final String uuid = flowFile.getAttribute("uuid");
                            final StandardRepositoryRecord record = new StandardRepositoryRecord(null, flowFile);
                            record.setDestination(queue);
                            final Map<String, String> updatedAttrs = Collections.singletonMap("uuid", uuid);
                            record.setWorking(flowFile, updatedAttrs);
                            records.add(record);
                        }
                        try {
                            repo.update(records, false);
                        } catch (IOException e) {
                            e.printStackTrace();
                            Assert.fail(e.toString());
                        }
                    }
                }
            });
            t.setDaemon(true);
            threads[i] = t;
        }
        final long start = System.nanoTime();
        for (final Thread t : threads) {
            t.start();
        }
        for (final Thread t : threads) {
            t.join();
        }
        final long millis = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - start);
        if (j == 0) {
            System.out.println(millis + " ms to insert " + updateCountPerThread * numThreads + " updates using " + numPartitions + " partitions and " + numThreads + " threads, *as a warmup!*");
        } else {
            System.out.println(millis + " ms to insert " + updateCountPerThread * numThreads + " updates using " + numPartitions + " partitions and " + numThreads + " threads");
        }
    }
}
Also used : Set(java.util.Set) StandardResourceClaimManager(org.apache.nifi.controller.repository.claim.StandardResourceClaimManager) ResourceClaimManager(org.apache.nifi.controller.repository.claim.ResourceClaimManager) StandardFlowFileQueue(org.apache.nifi.controller.StandardFlowFileQueue) FlowFileQueue(org.apache.nifi.controller.queue.FlowFileQueue) StandardResourceClaimManager(org.apache.nifi.controller.repository.claim.StandardResourceClaimManager) TimeUnit(java.util.concurrent.TimeUnit) List(java.util.List) ArrayList(java.util.ArrayList) FlowFilePrioritizer(org.apache.nifi.flowfile.FlowFilePrioritizer) Queue(java.util.Queue) StandardFlowFileQueue(org.apache.nifi.controller.StandardFlowFileQueue) FlowFileQueue(org.apache.nifi.controller.queue.FlowFileQueue) Path(java.nio.file.Path) FlowFileFilter(org.apache.nifi.processor.FlowFileFilter) IOException(java.io.IOException) MockFlowFile(org.apache.nifi.util.MockFlowFile) MinimalLockingWriteAheadLog(org.wali.MinimalLockingWriteAheadLog) Collection(java.util.Collection) Map(java.util.Map) HashMap(java.util.HashMap) Ignore(org.junit.Ignore) Test(org.junit.Test)

Example 2 with MinimalLockingWriteAheadLog

use of org.wali.MinimalLockingWriteAheadLog in project nifi by apache.

the class WriteAheadFlowFileRepository method migrateFromMinimalLockingLog.

@SuppressWarnings("deprecation")
private Optional<Collection<RepositoryRecord>> migrateFromMinimalLockingLog(final WriteAheadRepository<RepositoryRecord> toUpdate) throws IOException {
    final List<File> partitionDirs = new ArrayList<>();
    for (final File recoveryFile : recoveryFiles) {
        final File[] partitions = recoveryFile.listFiles(file -> file.getName().startsWith("partition-"));
        for (final File partition : partitions) {
            partitionDirs.add(partition);
        }
    }
    if (partitionDirs == null || partitionDirs.isEmpty()) {
        return Optional.empty();
    }
    logger.info("Encountered FlowFile Repository that was written using the 'Minimal Locking Write-Ahead Log'. " + "Will recover from this version and re-write the repository using the new version of the Write-Ahead Log.");
    final SortedSet<Path> paths = recoveryFiles.stream().map(File::toPath).collect(Collectors.toCollection(TreeSet::new));
    final Collection<RepositoryRecord> recordList;
    final MinimalLockingWriteAheadLog<RepositoryRecord> minimalLockingWal = new MinimalLockingWriteAheadLog<>(paths, partitionDirs.size(), serdeFactory, null);
    try {
        recordList = minimalLockingWal.recoverRecords();
    } finally {
        minimalLockingWal.shutdown();
    }
    toUpdate.update(recordList, true);
    // Delete the old repository
    logger.info("Successfully recovered files from existing Write-Ahead Log and transitioned to new implementation. Will now delete old files.");
    for (final File partitionDir : partitionDirs) {
        deleteRecursively(partitionDir);
    }
    for (final File recoveryFile : recoveryFiles) {
        final File snapshotFile = new File(recoveryFile, "snapshot");
        if (!snapshotFile.delete() && snapshotFile.exists()) {
            logger.warn("Failed to delete old file {}; this file should be cleaned up manually", snapshotFile);
        }
        final File partialFile = new File(recoveryFile, "snapshot.partial");
        if (!partialFile.delete() && partialFile.exists()) {
            logger.warn("Failed to delete old file {}; this file should be cleaned up manually", partialFile);
        }
    }
    return Optional.of(recordList);
}
Also used : Path(java.nio.file.Path) ArrayList(java.util.ArrayList) MinimalLockingWriteAheadLog(org.wali.MinimalLockingWriteAheadLog) File(java.io.File)

Example 3 with MinimalLockingWriteAheadLog

use of org.wali.MinimalLockingWriteAheadLog in project nifi by apache.

the class WriteAheadFlowFileRepository method initialize.

@Override
public void initialize(final ResourceClaimManager claimManager) throws IOException {
    this.claimManager = claimManager;
    for (final File file : flowFileRepositoryPaths) {
        Files.createDirectories(file.toPath());
    }
    // TODO: Should ensure that only 1 instance running and pointing at a particular path
    // TODO: Allow for backup path that can be used if disk out of space?? Would allow a snapshot to be stored on
    // backup and then the data deleted from the normal location; then can move backup to normal location and
    // delete backup. On restore, if no files exist in partition's directory, would have to check backup directory
    serdeFactory = new RepositoryRecordSerdeFactory(claimManager);
    if (walImplementation.equals(SEQUENTIAL_ACCESS_WAL)) {
        wal = new SequentialAccessWriteAheadLog<>(flowFileRepositoryPaths.get(0), serdeFactory, this);
    } else if (walImplementation.equals(MINIMAL_LOCKING_WALI)) {
        final SortedSet<Path> paths = flowFileRepositoryPaths.stream().map(File::toPath).collect(Collectors.toCollection(TreeSet::new));
        wal = new MinimalLockingWriteAheadLog<>(paths, numPartitions, serdeFactory, this);
    } else {
        throw new IllegalStateException("Cannot create Write-Ahead Log because the configured property '" + WRITE_AHEAD_LOG_IMPL + "' has an invalid value of '" + walImplementation + "'. Please update nifi.properties to indicate a valid value for this property.");
    }
    logger.info("Initialized FlowFile Repository using {} partitions", numPartitions);
}
Also used : TreeSet(java.util.TreeSet) MinimalLockingWriteAheadLog(org.wali.MinimalLockingWriteAheadLog) File(java.io.File) SortedSet(java.util.SortedSet)

Aggregations

MinimalLockingWriteAheadLog (org.wali.MinimalLockingWriteAheadLog)3 File (java.io.File)2 Path (java.nio.file.Path)2 ArrayList (java.util.ArrayList)2 IOException (java.io.IOException)1 Collection (java.util.Collection)1 HashMap (java.util.HashMap)1 List (java.util.List)1 Map (java.util.Map)1 Queue (java.util.Queue)1 Set (java.util.Set)1 SortedSet (java.util.SortedSet)1 TreeSet (java.util.TreeSet)1 TimeUnit (java.util.concurrent.TimeUnit)1 StandardFlowFileQueue (org.apache.nifi.controller.StandardFlowFileQueue)1 FlowFileQueue (org.apache.nifi.controller.queue.FlowFileQueue)1 ResourceClaimManager (org.apache.nifi.controller.repository.claim.ResourceClaimManager)1 StandardResourceClaimManager (org.apache.nifi.controller.repository.claim.StandardResourceClaimManager)1 FlowFilePrioritizer (org.apache.nifi.flowfile.FlowFilePrioritizer)1 FlowFileFilter (org.apache.nifi.processor.FlowFileFilter)1