Search in sources :

Example 11 with StopWatch

use of org.apache.nifi.util.StopWatch in project nifi by apache.

the class PersistentProvenanceRepository method purgeOldEvents.

/**
 * Purges old events from the repository
 *
 * @throws IOException if unable to purge old events due to an I/O problem
 */
synchronized void purgeOldEvents() throws IOException {
    while (!recoveryFinished.get()) {
        try {
            Thread.sleep(100L);
        } catch (final InterruptedException ie) {
        }
    }
    final List<File> toPurge = new ArrayList<>();
    final long timeCutoff = System.currentTimeMillis() - configuration.getMaxRecordLife(TimeUnit.MILLISECONDS);
    final List<File> sortedByBasename = getLogFiles();
    long bytesUsed = getSize(sortedByBasename, timeCutoff);
    for (final Path path : idToPathMap.get().values()) {
        final File file = path.toFile();
        final long lastModified = file.lastModified();
        if (lastModified > 0L && lastModified < timeCutoff) {
            toPurge.add(file);
        }
    }
    // This comparator sorts the data based on the "basename" of the files. I.e., the numeric portion.
    // We do this because the numeric portion represents the ID of the first event in the log file.
    // As a result, we are sorting based on time, since the ID is monotonically increasing. By doing this,
    // are able to avoid hitting disk continually to check timestamps
    final Comparator<File> sortByBasenameComparator = new Comparator<File>() {

        @Override
        public int compare(final File o1, final File o2) {
            final String baseName1 = LuceneUtil.substringBefore(o1.getName(), ".");
            final String baseName2 = LuceneUtil.substringBefore(o2.getName(), ".");
            Long id1 = null;
            Long id2 = null;
            try {
                id1 = Long.parseLong(baseName1);
            } catch (final NumberFormatException nfe) {
                id1 = null;
            }
            try {
                id2 = Long.parseLong(baseName2);
            } catch (final NumberFormatException nfe) {
                id2 = null;
            }
            if (id1 == null && id2 == null) {
                return 0;
            }
            if (id1 == null) {
                return 1;
            }
            if (id2 == null) {
                return -1;
            }
            return Long.compare(id1, id2);
        }
    };
    // If we have too much data (at least 90% of our max capacity), start aging it off
    if (bytesUsed > configuration.getMaxStorageCapacity() * PURGE_OLD_EVENTS_HIGH_WATER) {
        Collections.sort(sortedByBasename, sortByBasenameComparator);
        for (final File file : sortedByBasename) {
            toPurge.add(file);
            bytesUsed -= file.length();
            if (bytesUsed < configuration.getMaxStorageCapacity() * PURGE_OLD_EVENTS_LOW_WATER) {
                // we've shrunk the repo size down enough to stop
                break;
            }
        }
    }
    // Sort all of the files that we want to purge such that the oldest events are aged off first
    Collections.sort(toPurge, sortByBasenameComparator);
    logger.debug("Purging old event files: {}", toPurge);
    // Remove any duplicates that we may have.
    final Set<File> uniqueFilesToPurge = new LinkedHashSet<>(toPurge);
    // Age off the data.
    final Set<String> removed = new LinkedHashSet<>();
    for (File file : uniqueFilesToPurge) {
        final String baseName = LuceneUtil.substringBefore(file.getName(), ".");
        ExpirationAction currentAction = null;
        try {
            for (final ExpirationAction action : expirationActions) {
                currentAction = action;
                if (!action.hasBeenPerformed(file)) {
                    final File fileBeforeAction = file;
                    final StopWatch stopWatch = new StopWatch(true);
                    file = action.execute(file);
                    stopWatch.stop();
                    logger.info("Successfully performed Expiration Action {} on Provenance Event file {} in {}", action, fileBeforeAction, stopWatch.getDuration());
                }
            }
            removed.add(baseName);
        } catch (final FileNotFoundException fnf) {
            logger.warn("Failed to perform Expiration Action {} on Provenance Event file {} because the file no longer exists; will not " + "perform additional Expiration Actions on this file", currentAction, file);
            removed.add(baseName);
        } catch (final Throwable t) {
            logger.warn("Failed to perform Expiration Action {} on Provenance Event file {} due to {}; will not perform additional " + "Expiration Actions on this file at this time", currentAction, file, t.toString());
            logger.warn("", t);
            eventReporter.reportEvent(Severity.WARNING, EVENT_CATEGORY, "Failed to perform Expiration Action " + currentAction + " on Provenance Event file " + file + " due to " + t.toString() + "; will not perform additional Expiration Actions " + "on this file at this time");
        }
    }
    // Update the Map ID to Path map to not include the removed file
    // We cannot obtain the write lock here because there may be a need for the lock in the rollover method,
    // if we have 'backpressure applied'. This would result in a deadlock because the rollover method would be
    // waiting for purgeOldEvents, and purgeOldEvents would be waiting for the write lock held by rollover.
    boolean updated = false;
    while (!updated) {
        final SortedMap<Long, Path> existingPathMap = idToPathMap.get();
        final SortedMap<Long, Path> newPathMap = new TreeMap<>(new PathMapComparator());
        newPathMap.putAll(existingPathMap);
        final Iterator<Map.Entry<Long, Path>> itr = newPathMap.entrySet().iterator();
        while (itr.hasNext()) {
            final Map.Entry<Long, Path> entry = itr.next();
            final String filename = entry.getValue().toFile().getName();
            final String baseName = LuceneUtil.substringBefore(filename, ".");
            if (removed.contains(baseName)) {
                itr.remove();
            }
        }
        updated = idToPathMap.compareAndSet(existingPathMap, newPathMap);
        logger.debug("After expiration, path map: {}", newPathMap);
    }
    purgeExpiredIndexes();
}
Also used : LinkedHashSet(java.util.LinkedHashSet) ArrayList(java.util.ArrayList) FileNotFoundException(java.io.FileNotFoundException) Comparator(java.util.Comparator) ExpirationAction(org.apache.nifi.provenance.expiration.ExpirationAction) Path(java.nio.file.Path) TreeMap(java.util.TreeMap) StopWatch(org.apache.nifi.util.StopWatch) AtomicLong(java.util.concurrent.atomic.AtomicLong) TimestampedLong(org.apache.nifi.util.timebuffer.TimestampedLong) File(java.io.File) Map(java.util.Map) TreeMap(java.util.TreeMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) SortedMap(java.util.SortedMap) HashMap(java.util.HashMap) ConcurrentMap(java.util.concurrent.ConcurrentMap)

Example 12 with StopWatch

use of org.apache.nifi.util.StopWatch in project nifi by apache.

the class PutFile method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) {
    FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;
    }
    final StopWatch stopWatch = new StopWatch(true);
    final Path configuredRootDirPath = Paths.get(context.getProperty(DIRECTORY).evaluateAttributeExpressions(flowFile).getValue());
    final String conflictResponse = context.getProperty(CONFLICT_RESOLUTION).getValue();
    final Integer maxDestinationFiles = context.getProperty(MAX_DESTINATION_FILES).asInteger();
    final ComponentLog logger = getLogger();
    Path tempDotCopyFile = null;
    try {
        final Path rootDirPath = configuredRootDirPath;
        final Path tempCopyFile = rootDirPath.resolve("." + flowFile.getAttribute(CoreAttributes.FILENAME.key()));
        final Path copyFile = rootDirPath.resolve(flowFile.getAttribute(CoreAttributes.FILENAME.key()));
        if (!Files.exists(rootDirPath)) {
            if (context.getProperty(CREATE_DIRS).asBoolean()) {
                Files.createDirectories(rootDirPath);
            } else {
                flowFile = session.penalize(flowFile);
                session.transfer(flowFile, REL_FAILURE);
                logger.error("Penalizing {} and routing to 'failure' because the output directory {} does not exist and Processor is " + "configured not to create missing directories", new Object[] { flowFile, rootDirPath });
                return;
            }
        }
        final Path dotCopyFile = tempCopyFile;
        tempDotCopyFile = dotCopyFile;
        Path finalCopyFile = copyFile;
        final Path finalCopyFileDir = finalCopyFile.getParent();
        if (Files.exists(finalCopyFileDir) && maxDestinationFiles != null) {
            // check if too many files already
            final int numFiles = finalCopyFileDir.toFile().list().length;
            if (numFiles >= maxDestinationFiles) {
                flowFile = session.penalize(flowFile);
                logger.warn("Penalizing {} and routing to 'failure' because the output directory {} has {} files, which exceeds the " + "configured maximum number of files", new Object[] { flowFile, finalCopyFileDir, numFiles });
                session.transfer(flowFile, REL_FAILURE);
                return;
            }
        }
        if (Files.exists(finalCopyFile)) {
            switch(conflictResponse) {
                case REPLACE_RESOLUTION:
                    Files.delete(finalCopyFile);
                    logger.info("Deleted {} as configured in order to replace with the contents of {}", new Object[] { finalCopyFile, flowFile });
                    break;
                case IGNORE_RESOLUTION:
                    session.transfer(flowFile, REL_SUCCESS);
                    logger.info("Transferring {} to success because file with same name already exists", new Object[] { flowFile });
                    return;
                case FAIL_RESOLUTION:
                    flowFile = session.penalize(flowFile);
                    logger.warn("Penalizing {} and routing to failure as configured because file with the same name already exists", new Object[] { flowFile });
                    session.transfer(flowFile, REL_FAILURE);
                    return;
                default:
                    break;
            }
        }
        session.exportTo(flowFile, dotCopyFile, false);
        final String lastModifiedTime = context.getProperty(CHANGE_LAST_MODIFIED_TIME).evaluateAttributeExpressions(flowFile).getValue();
        if (lastModifiedTime != null && !lastModifiedTime.trim().isEmpty()) {
            try {
                final DateFormat formatter = new SimpleDateFormat(FILE_MODIFY_DATE_ATTR_FORMAT, Locale.US);
                final Date fileModifyTime = formatter.parse(lastModifiedTime);
                dotCopyFile.toFile().setLastModified(fileModifyTime.getTime());
            } catch (Exception e) {
                logger.warn("Could not set file lastModifiedTime to {} because {}", new Object[] { lastModifiedTime, e });
            }
        }
        final String permissions = context.getProperty(CHANGE_PERMISSIONS).evaluateAttributeExpressions(flowFile).getValue();
        if (permissions != null && !permissions.trim().isEmpty()) {
            try {
                String perms = stringPermissions(permissions);
                if (!perms.isEmpty()) {
                    Files.setPosixFilePermissions(dotCopyFile, PosixFilePermissions.fromString(perms));
                }
            } catch (Exception e) {
                logger.warn("Could not set file permissions to {} because {}", new Object[] { permissions, e });
            }
        }
        final String owner = context.getProperty(CHANGE_OWNER).evaluateAttributeExpressions(flowFile).getValue();
        if (owner != null && !owner.trim().isEmpty()) {
            try {
                UserPrincipalLookupService lookupService = dotCopyFile.getFileSystem().getUserPrincipalLookupService();
                Files.setOwner(dotCopyFile, lookupService.lookupPrincipalByName(owner));
            } catch (Exception e) {
                logger.warn("Could not set file owner to {} because {}", new Object[] { owner, e });
            }
        }
        final String group = context.getProperty(CHANGE_GROUP).evaluateAttributeExpressions(flowFile).getValue();
        if (group != null && !group.trim().isEmpty()) {
            try {
                UserPrincipalLookupService lookupService = dotCopyFile.getFileSystem().getUserPrincipalLookupService();
                PosixFileAttributeView view = Files.getFileAttributeView(dotCopyFile, PosixFileAttributeView.class);
                view.setGroup(lookupService.lookupPrincipalByGroupName(group));
            } catch (Exception e) {
                logger.warn("Could not set file group to {} because {}", new Object[] { group, e });
            }
        }
        boolean renamed = false;
        for (int i = 0; i < 10; i++) {
            // try rename up to 10 times.
            if (dotCopyFile.toFile().renameTo(finalCopyFile.toFile())) {
                renamed = true;
                // rename was successful
                break;
            }
            // try waiting a few ms to let whatever might cause rename failure to resolve
            Thread.sleep(100L);
        }
        if (!renamed) {
            if (Files.exists(dotCopyFile) && dotCopyFile.toFile().delete()) {
                logger.debug("Deleted dot copy file {}", new Object[] { dotCopyFile });
            }
            throw new ProcessException("Could not rename: " + dotCopyFile);
        } else {
            logger.info("Produced copy of {} at location {}", new Object[] { flowFile, finalCopyFile });
        }
        session.getProvenanceReporter().send(flowFile, finalCopyFile.toFile().toURI().toString(), stopWatch.getElapsed(TimeUnit.MILLISECONDS));
        session.transfer(flowFile, REL_SUCCESS);
    } catch (final Throwable t) {
        if (tempDotCopyFile != null) {
            try {
                Files.deleteIfExists(tempDotCopyFile);
            } catch (final Exception e) {
                logger.error("Unable to remove temporary file {} due to {}", new Object[] { tempDotCopyFile, e });
            }
        }
        flowFile = session.penalize(flowFile);
        logger.error("Penalizing {} and transferring to failure due to {}", new Object[] { flowFile, t });
        session.transfer(flowFile, REL_FAILURE);
    }
}
Also used : Path(java.nio.file.Path) UserPrincipalLookupService(java.nio.file.attribute.UserPrincipalLookupService) FlowFile(org.apache.nifi.flowfile.FlowFile) ComponentLog(org.apache.nifi.logging.ComponentLog) Date(java.util.Date) ProcessException(org.apache.nifi.processor.exception.ProcessException) StopWatch(org.apache.nifi.util.StopWatch) PosixFileAttributeView(java.nio.file.attribute.PosixFileAttributeView) ProcessException(org.apache.nifi.processor.exception.ProcessException) SimpleDateFormat(java.text.SimpleDateFormat) DateFormat(java.text.DateFormat) SimpleDateFormat(java.text.SimpleDateFormat)

Example 13 with StopWatch

use of org.apache.nifi.util.StopWatch in project nifi by apache.

the class PutFileTransfer method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) {
    FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;
    }
    final ComponentLog logger = getLogger();
    final String hostname = context.getProperty(FileTransfer.HOSTNAME).evaluateAttributeExpressions(flowFile).getValue();
    final int maxNumberOfFiles = context.getProperty(FileTransfer.BATCH_SIZE).asInteger();
    int fileCount = 0;
    try (final T transfer = getFileTransfer(context)) {
        do {
            final String rootPath = context.getProperty(FileTransfer.REMOTE_PATH).evaluateAttributeExpressions(flowFile).getValue();
            final String workingDirPath;
            if (rootPath == null) {
                workingDirPath = null;
            } else {
                File workingDirectory = new File(rootPath);
                if (!workingDirectory.getPath().startsWith("/") && !workingDirectory.getPath().startsWith("\\")) {
                    workingDirectory = new File(transfer.getHomeDirectory(flowFile), workingDirectory.getPath());
                }
                workingDirPath = workingDirectory.getPath().replace("\\", "/");
            }
            final boolean rejectZeroByteFiles = context.getProperty(FileTransfer.REJECT_ZERO_BYTE).asBoolean();
            final ConflictResult conflictResult = identifyAndResolveConflictFile(context.getProperty(FileTransfer.CONFLICT_RESOLUTION).getValue(), transfer, workingDirPath, flowFile, rejectZeroByteFiles, logger);
            if (conflictResult.isTransfer()) {
                final StopWatch stopWatch = new StopWatch();
                stopWatch.start();
                beforePut(flowFile, context, transfer);
                final FlowFile flowFileToTransfer = flowFile;
                final AtomicReference<String> fullPathRef = new AtomicReference<>(null);
                session.read(flowFile, new InputStreamCallback() {

                    @Override
                    public void process(final InputStream in) throws IOException {
                        try (final InputStream bufferedIn = new BufferedInputStream(in)) {
                            if (workingDirPath != null && context.getProperty(SFTPTransfer.CREATE_DIRECTORY).asBoolean()) {
                                transfer.ensureDirectoryExists(flowFileToTransfer, new File(workingDirPath));
                            }
                            fullPathRef.set(transfer.put(flowFileToTransfer, workingDirPath, conflictResult.getFileName(), bufferedIn));
                        }
                    }
                });
                afterPut(flowFile, context, transfer);
                stopWatch.stop();
                final String dataRate = stopWatch.calculateDataRate(flowFile.getSize());
                final long millis = stopWatch.getDuration(TimeUnit.MILLISECONDS);
                logger.info("Successfully transferred {} to {} on remote host {} in {} milliseconds at a rate of {}", new Object[] { flowFile, fullPathRef.get(), hostname, millis, dataRate });
                String fullPathWithSlash = fullPathRef.get();
                if (!fullPathWithSlash.startsWith("/")) {
                    fullPathWithSlash = "/" + fullPathWithSlash;
                }
                final String destinationUri = transfer.getProtocolName() + "://" + hostname + fullPathWithSlash;
                session.getProvenanceReporter().send(flowFile, destinationUri, millis);
            }
            if (conflictResult.isPenalize()) {
                flowFile = session.penalize(flowFile);
            }
            session.transfer(flowFile, conflictResult.getRelationship());
            session.commit();
        } while (isScheduled() && (getRelationships().size() == context.getAvailableRelationships().size()) && (++fileCount < maxNumberOfFiles) && ((flowFile = session.get()) != null));
    } catch (final IOException e) {
        context.yield();
        logger.error("Unable to transfer {} to remote host {} due to {}", new Object[] { flowFile, hostname, e });
        flowFile = session.penalize(flowFile);
        session.transfer(flowFile, REL_FAILURE);
    } catch (final FlowFileAccessException e) {
        context.yield();
        logger.error("Unable to transfer {} to remote host {} due to {}", new Object[] { flowFile, hostname, e.getCause() });
        flowFile = session.penalize(flowFile);
        session.transfer(flowFile, REL_FAILURE);
    } catch (final ProcessException e) {
        context.yield();
        logger.error("Unable to transfer {} to remote host {} due to {}: {}; routing to failure", new Object[] { flowFile, hostname, e, e.getCause() });
        flowFile = session.penalize(flowFile);
        session.transfer(flowFile, REL_FAILURE);
    }
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) FlowFileAccessException(org.apache.nifi.processor.exception.FlowFileAccessException) BufferedInputStream(org.apache.nifi.stream.io.BufferedInputStream) InputStream(java.io.InputStream) AtomicReference(java.util.concurrent.atomic.AtomicReference) IOException(java.io.IOException) ComponentLog(org.apache.nifi.logging.ComponentLog) StopWatch(org.apache.nifi.util.StopWatch) ProcessException(org.apache.nifi.processor.exception.ProcessException) BufferedInputStream(org.apache.nifi.stream.io.BufferedInputStream) InputStreamCallback(org.apache.nifi.processor.io.InputStreamCallback) FlowFile(org.apache.nifi.flowfile.FlowFile) File(java.io.File)

Example 14 with StopWatch

use of org.apache.nifi.util.StopWatch in project nifi by apache.

the class PutTCP method onTrigger.

/**
 * event handler method to handle the FlowFile being forwarded to the Processor by the framework. The FlowFile contents is sent out over a TCP connection using an acquired ChannelSender object. If
 * the FlowFile contents was sent out successfully then the FlowFile is forwarded to the success relationship. If an error occurred then the FlowFile is forwarded to the failure relationship.
 *
 * @param context
 *            - the current process context.
 *
 * @param sessionFactory
 *            - a factory object to obtain a process session.
 */
@Override
public void onTrigger(final ProcessContext context, final ProcessSessionFactory sessionFactory) throws ProcessException {
    final ProcessSession session = sessionFactory.createSession();
    final FlowFile flowFile = session.get();
    if (flowFile == null) {
        final PruneResult result = pruneIdleSenders(context.getProperty(IDLE_EXPIRATION).asTimePeriod(TimeUnit.MILLISECONDS).longValue());
        // yield if we closed an idle connection, or if there were no connections in the first place
        if (result.getNumClosed() > 0 || (result.getNumClosed() == 0 && result.getNumConsidered() == 0)) {
            context.yield();
        }
        return;
    }
    ChannelSender sender = acquireSender(context, session, flowFile);
    if (sender == null) {
        return;
    }
    // can cast to a SocketChannelSender later in order to obtain the OutputStream
    if (!(sender instanceof SocketChannelSender)) {
        getLogger().error("Processor can only be used with a SocketChannelSender, but obtained: " + sender.getClass().getCanonicalName());
        context.yield();
        return;
    }
    boolean closeSender = isConnectionPerFlowFile(context);
    try {
        // We might keep the connection open across invocations of the processor so don't auto-close this
        final OutputStream out = ((SocketChannelSender) sender).getOutputStream();
        final String delimiter = getOutgoingMessageDelimiter(context, flowFile);
        final StopWatch stopWatch = new StopWatch(true);
        try (final InputStream rawIn = session.read(flowFile);
            final BufferedInputStream in = new BufferedInputStream(rawIn)) {
            IOUtils.copy(in, out);
            if (delimiter != null) {
                final Charset charSet = Charset.forName(context.getProperty(CHARSET).getValue());
                out.write(delimiter.getBytes(charSet), 0, delimiter.length());
            }
            out.flush();
        } catch (final Exception e) {
            closeSender = true;
            throw e;
        }
        session.getProvenanceReporter().send(flowFile, transitUri, stopWatch.getElapsed(TimeUnit.MILLISECONDS));
        session.transfer(flowFile, REL_SUCCESS);
        session.commit();
    } catch (Exception e) {
        onFailure(context, session, flowFile);
        getLogger().error("Exception while handling a process session, transferring {} to failure.", new Object[] { flowFile }, e);
    } finally {
        if (closeSender) {
            getLogger().debug("Closing sender");
            sender.close();
        } else {
            getLogger().debug("Relinquishing sender");
            relinquishSender(sender);
        }
    }
}
Also used : ProcessSession(org.apache.nifi.processor.ProcessSession) FlowFile(org.apache.nifi.flowfile.FlowFile) BufferedInputStream(java.io.BufferedInputStream) InputStream(java.io.InputStream) OutputStream(java.io.OutputStream) ChannelSender(org.apache.nifi.processor.util.put.sender.ChannelSender) SocketChannelSender(org.apache.nifi.processor.util.put.sender.SocketChannelSender) Charset(java.nio.charset.Charset) SocketChannelSender(org.apache.nifi.processor.util.put.sender.SocketChannelSender) ProcessException(org.apache.nifi.processor.exception.ProcessException) IOException(java.io.IOException) StopWatch(org.apache.nifi.util.StopWatch) BufferedInputStream(java.io.BufferedInputStream)

Example 15 with StopWatch

use of org.apache.nifi.util.StopWatch in project nifi by apache.

the class QueryDatabaseTable method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSessionFactory sessionFactory) throws ProcessException {
    // Fetch the column/table info once
    if (!setupComplete.get()) {
        super.setup(context);
    }
    ProcessSession session = sessionFactory.createSession();
    final List<FlowFile> resultSetFlowFiles = new ArrayList<>();
    final ComponentLog logger = getLogger();
    final DBCPService dbcpService = context.getProperty(DBCP_SERVICE).asControllerService(DBCPService.class);
    final DatabaseAdapter dbAdapter = dbAdapters.get(context.getProperty(DB_TYPE).getValue());
    final String tableName = context.getProperty(TABLE_NAME).evaluateAttributeExpressions().getValue();
    final String columnNames = context.getProperty(COLUMN_NAMES).evaluateAttributeExpressions().getValue();
    final String maxValueColumnNames = context.getProperty(MAX_VALUE_COLUMN_NAMES).evaluateAttributeExpressions().getValue();
    final String customWhereClause = context.getProperty(WHERE_CLAUSE).evaluateAttributeExpressions().getValue();
    final Integer fetchSize = context.getProperty(FETCH_SIZE).evaluateAttributeExpressions().asInteger();
    final Integer maxRowsPerFlowFile = context.getProperty(MAX_ROWS_PER_FLOW_FILE).evaluateAttributeExpressions().asInteger();
    final Integer outputBatchSizeField = context.getProperty(OUTPUT_BATCH_SIZE).evaluateAttributeExpressions().asInteger();
    final int outputBatchSize = outputBatchSizeField == null ? 0 : outputBatchSizeField;
    final Integer maxFragments = context.getProperty(MAX_FRAGMENTS).isSet() ? context.getProperty(MAX_FRAGMENTS).evaluateAttributeExpressions().asInteger() : 0;
    final JdbcCommon.AvroConversionOptions options = JdbcCommon.AvroConversionOptions.builder().recordName(tableName).maxRows(maxRowsPerFlowFile).convertNames(context.getProperty(NORMALIZE_NAMES_FOR_AVRO).asBoolean()).useLogicalTypes(context.getProperty(USE_AVRO_LOGICAL_TYPES).asBoolean()).defaultPrecision(context.getProperty(DEFAULT_PRECISION).evaluateAttributeExpressions().asInteger()).defaultScale(context.getProperty(DEFAULT_SCALE).evaluateAttributeExpressions().asInteger()).build();
    final StateManager stateManager = context.getStateManager();
    final StateMap stateMap;
    try {
        stateMap = stateManager.getState(Scope.CLUSTER);
    } catch (final IOException ioe) {
        getLogger().error("Failed to retrieve observed maximum values from the State Manager. Will not perform " + "query until this is accomplished.", ioe);
        context.yield();
        return;
    }
    // Make a mutable copy of the current state property map. This will be updated by the result row callback, and eventually
    // set as the current state map (after the session has been committed)
    final Map<String, String> statePropertyMap = new HashMap<>(stateMap.toMap());
    // If an initial max value for column(s) has been specified using properties, and this column is not in the state manager, sync them to the state property map
    for (final Map.Entry<String, String> maxProp : maxValueProperties.entrySet()) {
        String maxPropKey = maxProp.getKey().toLowerCase();
        String fullyQualifiedMaxPropKey = getStateKey(tableName, maxPropKey);
        if (!statePropertyMap.containsKey(fullyQualifiedMaxPropKey)) {
            String newMaxPropValue;
            // but store the new initial max value under the fully-qualified key.
            if (statePropertyMap.containsKey(maxPropKey)) {
                newMaxPropValue = statePropertyMap.get(maxPropKey);
            } else {
                newMaxPropValue = maxProp.getValue();
            }
            statePropertyMap.put(fullyQualifiedMaxPropKey, newMaxPropValue);
        }
    }
    List<String> maxValueColumnNameList = StringUtils.isEmpty(maxValueColumnNames) ? null : Arrays.asList(maxValueColumnNames.split("\\s*,\\s*"));
    final String selectQuery = getQuery(dbAdapter, tableName, columnNames, maxValueColumnNameList, customWhereClause, statePropertyMap);
    final StopWatch stopWatch = new StopWatch(true);
    final String fragmentIdentifier = UUID.randomUUID().toString();
    try (final Connection con = dbcpService.getConnection();
        final Statement st = con.createStatement()) {
        if (fetchSize != null && fetchSize > 0) {
            try {
                st.setFetchSize(fetchSize);
            } catch (SQLException se) {
                // Not all drivers support this, just log the error (at debug level) and move on
                logger.debug("Cannot set fetch size to {} due to {}", new Object[] { fetchSize, se.getLocalizedMessage() }, se);
            }
        }
        String jdbcURL = "DBCPService";
        try {
            DatabaseMetaData databaseMetaData = con.getMetaData();
            if (databaseMetaData != null) {
                jdbcURL = databaseMetaData.getURL();
            }
        } catch (SQLException se) {
        // Ignore and use default JDBC URL. This shouldn't happen unless the driver doesn't implement getMetaData() properly
        }
        final Integer queryTimeout = context.getProperty(QUERY_TIMEOUT).evaluateAttributeExpressions().asTimePeriod(TimeUnit.SECONDS).intValue();
        // timeout in seconds
        st.setQueryTimeout(queryTimeout);
        try {
            logger.debug("Executing query {}", new Object[] { selectQuery });
            final ResultSet resultSet = st.executeQuery(selectQuery);
            int fragmentIndex = 0;
            while (true) {
                final AtomicLong nrOfRows = new AtomicLong(0L);
                FlowFile fileToProcess = session.create();
                try {
                    fileToProcess = session.write(fileToProcess, out -> {
                        // Max values will be updated in the state property map by the callback
                        final MaxValueResultSetRowCollector maxValCollector = new MaxValueResultSetRowCollector(tableName, statePropertyMap, dbAdapter);
                        try {
                            nrOfRows.set(JdbcCommon.convertToAvroStream(resultSet, out, options, maxValCollector));
                        } catch (SQLException | RuntimeException e) {
                            throw new ProcessException("Error during database query or conversion of records to Avro.", e);
                        }
                    });
                } catch (ProcessException e) {
                    // Add flowfile to results before rethrowing so it will be removed from session in outer catch
                    resultSetFlowFiles.add(fileToProcess);
                    throw e;
                }
                if (nrOfRows.get() > 0) {
                    // set attribute how many rows were selected
                    fileToProcess = session.putAttribute(fileToProcess, RESULT_ROW_COUNT, String.valueOf(nrOfRows.get()));
                    fileToProcess = session.putAttribute(fileToProcess, RESULT_TABLENAME, tableName);
                    fileToProcess = session.putAttribute(fileToProcess, CoreAttributes.MIME_TYPE.key(), JdbcCommon.MIME_TYPE_AVRO_BINARY);
                    if (maxRowsPerFlowFile > 0) {
                        fileToProcess = session.putAttribute(fileToProcess, "fragment.identifier", fragmentIdentifier);
                        fileToProcess = session.putAttribute(fileToProcess, "fragment.index", String.valueOf(fragmentIndex));
                    }
                    logger.info("{} contains {} Avro records; transferring to 'success'", new Object[] { fileToProcess, nrOfRows.get() });
                    session.getProvenanceReporter().receive(fileToProcess, jdbcURL, stopWatch.getElapsed(TimeUnit.MILLISECONDS));
                    resultSetFlowFiles.add(fileToProcess);
                    // If we've reached the batch size, send out the flow files
                    if (outputBatchSize > 0 && resultSetFlowFiles.size() >= outputBatchSize) {
                        session.transfer(resultSetFlowFiles, REL_SUCCESS);
                        session.commit();
                        resultSetFlowFiles.clear();
                    }
                } else {
                    // If there were no rows returned, don't send the flowfile
                    session.remove(fileToProcess);
                    context.yield();
                    break;
                }
                fragmentIndex++;
                if (maxFragments > 0 && fragmentIndex >= maxFragments) {
                    break;
                }
            }
            // Even though the maximum value and total count are known at this point, to maintain consistent behavior if Output Batch Size is set, do not store the attributes
            if (outputBatchSize == 0) {
                for (int i = 0; i < resultSetFlowFiles.size(); i++) {
                    // Add maximum values as attributes
                    for (Map.Entry<String, String> entry : statePropertyMap.entrySet()) {
                        // Get just the column name from the key
                        String key = entry.getKey();
                        String colName = key.substring(key.lastIndexOf(NAMESPACE_DELIMITER) + NAMESPACE_DELIMITER.length());
                        resultSetFlowFiles.set(i, session.putAttribute(resultSetFlowFiles.get(i), "maxvalue." + colName, entry.getValue()));
                    }
                    // set count on all FlowFiles
                    if (maxRowsPerFlowFile > 0) {
                        resultSetFlowFiles.set(i, session.putAttribute(resultSetFlowFiles.get(i), "fragment.count", Integer.toString(fragmentIndex)));
                    }
                }
            }
        } catch (final SQLException e) {
            throw e;
        }
        session.transfer(resultSetFlowFiles, REL_SUCCESS);
    } catch (final ProcessException | SQLException e) {
        logger.error("Unable to execute SQL select query {} due to {}", new Object[] { selectQuery, e });
        if (!resultSetFlowFiles.isEmpty()) {
            session.remove(resultSetFlowFiles);
        }
        context.yield();
    } finally {
        session.commit();
        try {
            // Update the state
            stateManager.setState(statePropertyMap, Scope.CLUSTER);
        } catch (IOException ioe) {
            getLogger().error("{} failed to update State Manager, maximum observed values will not be recorded", new Object[] { this, ioe });
        }
    }
}
Also used : ProcessSession(org.apache.nifi.processor.ProcessSession) StandardValidators(org.apache.nifi.processor.util.StandardValidators) Arrays(java.util.Arrays) Connection(java.sql.Connection) StringUtils(org.apache.commons.lang3.StringUtils) PropertyDescriptor(org.apache.nifi.components.PropertyDescriptor) WritesAttributes(org.apache.nifi.annotation.behavior.WritesAttributes) Scope(org.apache.nifi.components.state.Scope) ResultSet(java.sql.ResultSet) Map(java.util.Map) ParseException(java.text.ParseException) TriggerSerially(org.apache.nifi.annotation.behavior.TriggerSerially) FlowFile(org.apache.nifi.flowfile.FlowFile) NORMALIZE_NAMES_FOR_AVRO(org.apache.nifi.processors.standard.util.JdbcCommon.NORMALIZE_NAMES_FOR_AVRO) Set(java.util.Set) WritesAttribute(org.apache.nifi.annotation.behavior.WritesAttribute) UUID(java.util.UUID) StateMap(org.apache.nifi.components.state.StateMap) InputRequirement(org.apache.nifi.annotation.behavior.InputRequirement) Stateful(org.apache.nifi.annotation.behavior.Stateful) List(java.util.List) DynamicProperty(org.apache.nifi.annotation.behavior.DynamicProperty) JdbcCommon(org.apache.nifi.processors.standard.util.JdbcCommon) StopWatch(org.apache.nifi.util.StopWatch) Tags(org.apache.nifi.annotation.documentation.Tags) DBCPService(org.apache.nifi.dbcp.DBCPService) ResultSetMetaData(java.sql.ResultSetMetaData) IntStream(java.util.stream.IntStream) CapabilityDescription(org.apache.nifi.annotation.documentation.CapabilityDescription) USE_AVRO_LOGICAL_TYPES(org.apache.nifi.processors.standard.util.JdbcCommon.USE_AVRO_LOGICAL_TYPES) DatabaseMetaData(java.sql.DatabaseMetaData) HashMap(java.util.HashMap) ComponentLog(org.apache.nifi.logging.ComponentLog) ProcessException(org.apache.nifi.processor.exception.ProcessException) ArrayList(java.util.ArrayList) DEFAULT_SCALE(org.apache.nifi.processors.standard.util.JdbcCommon.DEFAULT_SCALE) HashSet(java.util.HashSet) SQLException(java.sql.SQLException) Relationship(org.apache.nifi.processor.Relationship) DEFAULT_PRECISION(org.apache.nifi.processors.standard.util.JdbcCommon.DEFAULT_PRECISION) Requirement(org.apache.nifi.annotation.behavior.InputRequirement.Requirement) DatabaseAdapter(org.apache.nifi.processors.standard.db.DatabaseAdapter) StateManager(org.apache.nifi.components.state.StateManager) ProcessContext(org.apache.nifi.processor.ProcessContext) ProcessSession(org.apache.nifi.processor.ProcessSession) IOException(java.io.IOException) SeeAlso(org.apache.nifi.annotation.documentation.SeeAlso) ProcessSessionFactory(org.apache.nifi.processor.ProcessSessionFactory) TimeUnit(java.util.concurrent.TimeUnit) AtomicLong(java.util.concurrent.atomic.AtomicLong) OnScheduled(org.apache.nifi.annotation.lifecycle.OnScheduled) Statement(java.sql.Statement) CoreAttributes(org.apache.nifi.flowfile.attributes.CoreAttributes) OnStopped(org.apache.nifi.annotation.lifecycle.OnStopped) Collections(java.util.Collections) HashMap(java.util.HashMap) SQLException(java.sql.SQLException) StateMap(org.apache.nifi.components.state.StateMap) ArrayList(java.util.ArrayList) StateManager(org.apache.nifi.components.state.StateManager) ResultSet(java.sql.ResultSet) FlowFile(org.apache.nifi.flowfile.FlowFile) Statement(java.sql.Statement) Connection(java.sql.Connection) IOException(java.io.IOException) DatabaseMetaData(java.sql.DatabaseMetaData) ComponentLog(org.apache.nifi.logging.ComponentLog) DatabaseAdapter(org.apache.nifi.processors.standard.db.DatabaseAdapter) StopWatch(org.apache.nifi.util.StopWatch) JdbcCommon(org.apache.nifi.processors.standard.util.JdbcCommon) AtomicLong(java.util.concurrent.atomic.AtomicLong) ProcessException(org.apache.nifi.processor.exception.ProcessException) DBCPService(org.apache.nifi.dbcp.DBCPService) Map(java.util.Map) StateMap(org.apache.nifi.components.state.StateMap) HashMap(java.util.HashMap)

Aggregations

StopWatch (org.apache.nifi.util.StopWatch)72 FlowFile (org.apache.nifi.flowfile.FlowFile)59 IOException (java.io.IOException)41 ProcessException (org.apache.nifi.processor.exception.ProcessException)37 InputStream (java.io.InputStream)27 ComponentLog (org.apache.nifi.logging.ComponentLog)27 OutputStream (java.io.OutputStream)21 HashMap (java.util.HashMap)16 ArrayList (java.util.ArrayList)13 Map (java.util.Map)11 ProcessSession (org.apache.nifi.processor.ProcessSession)11 AtomicLong (java.util.concurrent.atomic.AtomicLong)10 InputStreamCallback (org.apache.nifi.processor.io.InputStreamCallback)10 StreamCallback (org.apache.nifi.processor.io.StreamCallback)10 HashSet (java.util.HashSet)9 Path (org.apache.hadoop.fs.Path)9 Charset (java.nio.charset.Charset)8 AtomicReference (java.util.concurrent.atomic.AtomicReference)8 FileSystem (org.apache.hadoop.fs.FileSystem)8 PropertyDescriptor (org.apache.nifi.components.PropertyDescriptor)8