Search in sources :

Example 16 with FlowFile

use of org.apache.nifi.flowfile.FlowFile in project kylo by Teradata.

the class SetSavepoint method getNextFlowFilex.

private Optional<FlowFile> getNextFlowFilex(ProcessContext context, ProcessSession session, SavepointProvider provider, PropertyValue pvSavepointId) {
    long expirationDuration = context.getProperty(EXPIRATION_DURATION).asTimePeriod(TimeUnit.MILLISECONDS);
    List<FlowFile> match = new ArrayList<>();
    List<FlowFile> noMatch = new LinkedList<>();
    session.get(session.getQueueSize().getObjectCount()).stream().sorted(Comparator.comparing(FlowFile::getLastQueueDate).reversed()).forEach(f -> {
        boolean isMatch = false;
        if (match.isEmpty()) {
            final String savepointIdStr = pvSavepointId.evaluateAttributeExpressions(f).getValue();
            String processorId = getIdentifier();
            SavepointEntry entry = provider.lookupEntry(savepointIdStr);
            if (entry == null || entry.getState(processorId) == null || isExpired(f, expirationDuration)) {
                isMatch = true;
            } else if (SavepointEntry.SavePointState.WAIT != entry.getState(processorId)) {
                isMatch = true;
            }
            // add it
            if (isMatch) {
                match.add(f);
            } else {
                noMatch.add(f);
            }
        } else {
            noMatch.add(f);
        }
    });
    // clear those that failed
    session.transfer(noMatch);
    return match.isEmpty() ? Optional.empty() : Optional.of(match.get(0));
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) ArrayList(java.util.ArrayList) SavepointEntry(com.thinkbiganalytics.nifi.v2.core.savepoint.SavepointEntry) LinkedList(java.util.LinkedList)

Example 17 with FlowFile

use of org.apache.nifi.flowfile.FlowFile in project kylo by Teradata.

the class TriggerSavepoint method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;
    }
    // Fetch the controller
    final SavepointController controller = context.getProperty(SAVEPOINT_SERVICE).asControllerService(SavepointController.class);
    final SavepointProvider provider = controller.getProvider();
    final ComponentLog logger = getLogger();
    final PropertyValue pvSavepointId = context.getProperty(SAVEPOINT_ID);
    final PropertyValue pvBehavior = context.getProperty(BEHAVIOR);
    final PropertyValue pvMaxRetries = context.getProperty(MAX_RETRIES);
    // We do processing on each flowfile here
    String behavior = pvBehavior.getValue();
    if (!FAIL.equals(behavior)) {
        final String savepointIdStr = pvSavepointId.evaluateAttributeExpressions(flowFile).getValue();
        Lock lock = null;
        try {
            lock = provider.lock(savepointIdStr);
            if (lock != null) {
                if (RELEASE.equals(behavior)) {
                    provider.release(savepointIdStr, lock, true);
                    flowFile = session.putAttribute(flowFile, SavepointProvenanceProperties.SAVE_POINT_BEHAVIOR_STATUS, behavior);
                    session.transfer(flowFile, REL_SUCCESS);
                } else if (RETRY.equals(behavior)) {
                    // Check the retry count from the SetSavepoint
                    String sRetryCount = flowFile.getAttribute(SetSavepoint.SAVEPOINT_RETRY_COUNT);
                    int retryCount = 0;
                    try {
                        if (sRetryCount != null) {
                            retryCount = Integer.parseInt(sRetryCount);
                        }
                    } catch (NumberFormatException nfe) {
                        logger.warn("{} has an invalid value '{}' on FlowFile {}", new Object[] { SetSavepoint.SAVEPOINT_RETRY_COUNT, sRetryCount, flowFile });
                    }
                    // Check retries
                    if (retryCount > pvMaxRetries.asInteger()) {
                        flowFile = session.putAttribute(flowFile, TriggerSavepoint.SAVE_POINT_MAX_RETRIES_EXCEEDED, sRetryCount);
                        session.transfer(flowFile, REL_MAX_RETRIES_EXCEEDED);
                        return;
                    }
                    // Penalize the flowfile once before retry is processed
                    String sRetryMarker = flowFile.getAttribute(SAVEPOINT_RETRY_MARKER);
                    if (StringUtils.isEmpty(sRetryMarker)) {
                        flowFile = session.penalize(flowFile);
                        flowFile = session.putAttribute(flowFile, SAVEPOINT_RETRY_MARKER, "1");
                        session.transfer(flowFile, REL_SELF);
                        return;
                    }
                    provider.retry(savepointIdStr, lock);
                    session.transfer(flowFile, REL_SUCCESS);
                }
            } else {
                // Unable to obtain lock. Try again
                session.transfer(flowFile, REL_SELF);
            }
        } catch (IOException | InvalidLockException | InvalidSetpointException e) {
            logger.info("Exception occurred for FlowFile {} exception {}", new Object[] { flowFile, e.getLocalizedMessage() }, e);
            // Check the retry count from the SetSavepoint
            String sTriggerFailureCount = flowFile.getAttribute(TriggerSavepoint.SAVEPOINT_TRIGGER_FAILURE_COUNT);
            int triggerFailureCount = 1;
            try {
                triggerFailureCount = (sTriggerFailureCount == null ? 0 : Integer.parseInt(sTriggerFailureCount));
                triggerFailureCount += 1;
            } catch (NumberFormatException nfe) {
                logger.info("Invalid attribute {}", new Object[] { TriggerSavepoint.SAVEPOINT_TRIGGER_FAILURE_COUNT });
            }
            flowFile = session.putAttribute(flowFile, TriggerSavepoint.SAVEPOINT_TRIGGER_FAILURE_COUNT, String.valueOf(triggerFailureCount));
            if (triggerFailureCount > MAX_FAILURES_ALLOWED) {
                logger.info("Maximum failures reached for sp {}, will route to fail.", new String[] { savepointIdStr });
                flowFile = session.putAttribute(flowFile, SavepointProvenanceProperties.SAVE_POINT_BEHAVIOR_STATUS, FAIL);
                flowFile = session.putAttribute(flowFile, TriggerSavepoint.SAVE_POINT_BEHAVIOR_STATUS_DESC, "Maximum failures at " + triggerFailureCount + " were reached.  Failing the flow");
                // add in the trigger flow id so ops manager can get the key to retry if needed
                String triggerFlowFile = flowFile.getAttribute(SavepointProvenanceProperties.PARENT_FLOWFILE_ID);
                if (StringUtils.isNotBlank(triggerFlowFile)) {
                    flowFile = session.putAttribute(flowFile, SavepointProvenanceProperties.SAVE_POINT_TRIGGER_FLOWFILE, triggerFlowFile);
                }
                session.transfer(flowFile, REL_FAILURE);
            } else {
                logger.info("Failed to process flowfile for savepoint {}", new String[] { savepointIdStr }, e);
                flowFile = session.penalize(flowFile);
                session.transfer(flowFile, REL_SELF);
            }
        } finally {
            if (lock != null) {
                try {
                    provider.unlock(lock);
                } catch (IOException e) {
                    logger.warn("Unable to unlock {}", new String[] { savepointIdStr });
                }
            }
        }
    } else {
        // Route to failure
        flowFile = session.putAttribute(flowFile, SavepointProvenanceProperties.SAVE_POINT_BEHAVIOR_STATUS, behavior);
        String triggerFlowFile = flowFile.getAttribute(SavepointProvenanceProperties.PARENT_FLOWFILE_ID);
        if (StringUtils.isNotBlank(triggerFlowFile)) {
            flowFile = session.putAttribute(flowFile, SavepointProvenanceProperties.SAVE_POINT_TRIGGER_FLOWFILE, triggerFlowFile);
        }
        session.transfer(flowFile, REL_FAILURE);
    }
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) InvalidSetpointException(com.thinkbiganalytics.nifi.v2.core.savepoint.InvalidSetpointException) SavepointController(com.thinkbiganalytics.nifi.v2.core.savepoint.SavepointController) PropertyValue(org.apache.nifi.components.PropertyValue) IOException(java.io.IOException) ComponentLog(org.apache.nifi.logging.ComponentLog) Lock(com.thinkbiganalytics.nifi.v2.core.savepoint.Lock) SavepointProvider(com.thinkbiganalytics.nifi.v2.core.savepoint.SavepointProvider) InvalidLockException(com.thinkbiganalytics.nifi.v2.core.savepoint.InvalidLockException)

Example 18 with FlowFile

use of org.apache.nifi.flowfile.FlowFile in project kylo by Teradata.

the class MergeHiveTableMetadata method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    final ComponentLog logger = getLog();
    FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;
    }
    try {
        final String databaseNameField = context.getProperty(DATABASE_NAME).evaluateAttributeExpressions(flowFile).getValue();
        final String databaseOwnerField = context.getProperty(DATABASE_OWNER).evaluateAttributeExpressions(flowFile).getValue();
        final String tableCreateTimeField = context.getProperty(TABLE_CREATE_TIME).evaluateAttributeExpressions(flowFile).getValue();
        final String tableNameField = context.getProperty(TABLE_NAME).evaluateAttributeExpressions(flowFile).getValue();
        final String tableTypeField = context.getProperty(TABLE_TYPE).evaluateAttributeExpressions(flowFile).getValue();
        final String columnNameField = context.getProperty(COLUMN_NAME).evaluateAttributeExpressions(flowFile).getValue();
        final String columnTypeField = context.getProperty(COLUMN_TYPE).evaluateAttributeExpressions(flowFile).getValue();
        final String columnCommentField = context.getProperty(COLUMN_COMMENT).evaluateAttributeExpressions(flowFile).getValue();
        final StringBuffer sb = new StringBuffer();
        session.read(flowFile, new InputStreamCallback() {

            @Override
            public void process(InputStream in) throws IOException {
                sb.append(IOUtils.toString(in, Charset.defaultCharset()));
            }
        });
        logger.debug("The json that was received is: " + sb.toString());
        flowFile = session.write(flowFile, new OutputStreamCallback() {

            @Override
            public void process(final OutputStream out) throws IOException {
                try {
                    JSONArray array = new JSONArray(sb.toString());
                    Map<String, Metadata> tables = new HashMap<>();
                    for (int i = 0; i < array.length(); i++) {
                        JSONObject jsonObj = array.getJSONObject(i);
                        String databaseName = jsonObj.getString(databaseNameField);
                        String databaseOwner = jsonObj.getString(databaseOwnerField);
                        String tableName = jsonObj.getString(tableNameField);
                        String tableCreateTime = jsonObj.getString(tableCreateTimeField);
                        String tableType = jsonObj.getString(tableTypeField);
                        String columnName = jsonObj.getString(columnNameField);
                        String columnType = jsonObj.getString(columnTypeField);
                        String columnComment = jsonObj.getString(columnCommentField);
                        String key = databaseName + tableName;
                        if (tables.containsKey(key)) {
                            Metadata meta = tables.get(key);
                            HiveColumn column = new HiveColumn();
                            column.setColumnName(columnName);
                            column.setColumnType(columnType);
                            column.setColumnComment(columnComment);
                            meta.getHiveColumns().add(column);
                        } else {
                            Metadata meta = new Metadata();
                            meta.setDatabaseName(databaseName);
                            meta.setDatabaseOwner(databaseOwner);
                            meta.setTableCreateTime(tableCreateTime);
                            meta.setTableName(tableName);
                            meta.setTableType(tableType);
                            HiveColumn column = new HiveColumn();
                            column.setColumnName(columnName);
                            column.setColumnType(columnType);
                            column.setColumnComment(columnComment);
                            meta.getHiveColumns().add(column);
                            tables.put(key, meta);
                        }
                    }
                    List<Metadata> tablesAsList = new ArrayList<>();
                    Iterator iter = tables.entrySet().iterator();
                    while (iter.hasNext()) {
                        Map.Entry pair = (Map.Entry) iter.next();
                        tablesAsList.add((Metadata) pair.getValue());
                    }
                    Gson gson = new Gson();
                    JsonElement element = gson.toJsonTree(tablesAsList, new TypeToken<List<Metadata>>() {
                    }.getType());
                    JsonArray jsonArray = element.getAsJsonArray();
                    out.write(jsonArray.toString().getBytes());
                } catch (final Exception e) {
                    throw new ProcessException(e);
                }
            }
        });
        logger.info("*** Completed with status ");
        session.transfer(flowFile, REL_SUCCESS);
    } catch (final Exception e) {
        logger.error("Unable to execute merge hive json job", new Object[] { flowFile, e });
        session.transfer(flowFile, REL_FAILURE);
    }
}
Also used : HashMap(java.util.HashMap) OutputStream(java.io.OutputStream) ArrayList(java.util.ArrayList) Gson(com.google.gson.Gson) Iterator(java.util.Iterator) OutputStreamCallback(org.apache.nifi.processor.io.OutputStreamCallback) FlowFile(org.apache.nifi.flowfile.FlowFile) InputStream(java.io.InputStream) JSONArray(org.codehaus.jettison.json.JSONArray) IOException(java.io.IOException) ComponentLog(org.apache.nifi.logging.ComponentLog) ProcessException(org.apache.nifi.processor.exception.ProcessException) IOException(java.io.IOException) JsonArray(com.google.gson.JsonArray) ProcessException(org.apache.nifi.processor.exception.ProcessException) JSONObject(org.codehaus.jettison.json.JSONObject) JsonElement(com.google.gson.JsonElement) TypeToken(com.google.gson.reflect.TypeToken) InputStreamCallback(org.apache.nifi.processor.io.InputStreamCallback) JSONObject(org.codehaus.jettison.json.JSONObject) HashMap(java.util.HashMap) Map(java.util.Map)

Example 19 with FlowFile

use of org.apache.nifi.flowfile.FlowFile in project kylo by Teradata.

the class DistCopyHDFS method onTrigger.

/**
 * onTrigger is called when the flow file proceeds through the processor
 *
 * @param context passed in by the framework and provides access to the data configured in the processor
 * @param session passed in by the framework and provides access to the flow file
 * @throws ProcessException if any framework actions fail
 */
@Override
public void onTrigger(@Nonnull final ProcessContext context, @Nonnull final ProcessSession session) throws ProcessException {
    FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;
    }
    final FileSystem fs = getFileSystem(context);
    if (fs == null) {
        getLog().error("Couldn't initialize HDFS");
        session.transfer(flowFile, REL_FAILURE);
        return;
    }
    String filesJSON = context.getProperty(FILES).evaluateAttributeExpressions(flowFile).getValue();
    String source = context.getProperty(SOURCE).evaluateAttributeExpressions(flowFile).getValue();
    String destination = context.getProperty(DESTINATION).evaluateAttributeExpressions(flowFile).getValue();
    Gson jsonParser = new Gson();
    File[] filesList;
    ArrayList<Path> pathsList = new ArrayList<>();
    try {
        if (!(filesJSON == null) && !filesJSON.isEmpty()) {
            filesList = jsonParser.fromJson(filesJSON, File[].class);
            if (filesList == null) {
                filesList = new File[0];
            }
            if (source != null && !source.isEmpty()) {
                for (File f : filesList) {
                    pathsList.add(new Path(source, f.getName()));
                }
            } else {
                for (File f : filesList) {
                    pathsList.add(new Path(f.getName()));
                }
            }
        } else {
            if (source == null || source.isEmpty()) {
                getLog().error(String.format("At least one of attributes: %s or %s needs to be set", SOURCE.getName(), FILES.getName()));
                session.transfer(flowFile, REL_FAILURE);
                return;
            }
            pathsList.add(new Path(source));
        }
        DistCp distCp = getDistCp(pathsList, new Path(destination));
        Job job = distCp.execute();
        job.waitForCompletion(false);
    } catch (JsonSyntaxException e) {
        getLog().error("Files list attribute does not contain a proper JSON array");
        session.transfer(flowFile, REL_FAILURE);
        return;
    } catch (Exception e) {
        getLog().error("Exception during processor execution: " + e.getMessage());
        session.transfer(flowFile, REL_FAILURE);
        return;
    }
    session.transfer(flowFile, REL_SUCCESS);
}
Also used : Path(org.apache.hadoop.fs.Path) FlowFile(org.apache.nifi.flowfile.FlowFile) ArrayList(java.util.ArrayList) Gson(com.google.gson.Gson) JsonSyntaxException(com.google.gson.JsonSyntaxException) ProcessException(org.apache.nifi.processor.exception.ProcessException) JsonSyntaxException(com.google.gson.JsonSyntaxException) DistCp(org.apache.hadoop.tools.DistCp) FileSystem(org.apache.hadoop.fs.FileSystem) Job(org.apache.hadoop.mapreduce.Job) FlowFile(org.apache.nifi.flowfile.FlowFile)

Example 20 with FlowFile

use of org.apache.nifi.flowfile.FlowFile in project kylo by Teradata.

the class RemoveHDFSFolder method onTrigger.

@Override
public void onTrigger(@Nonnull final ProcessContext context, @Nonnull final ProcessSession session) throws ProcessException {
    // Get file to process
    FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;
    }
    // Get file system
    FileSystem fileSystem = getFileSystem(context);
    if (fileSystem == null) {
        session.transfer(flowFile, REL_FAILURE);
        return;
    }
    // Delete the specified paths
    String[] directories = context.getProperty(DIRECTORY).evaluateAttributeExpressions(flowFile).getValue().split("\\r?\\n");
    for (String string : directories) {
        // Check for possible missing properties - accidentally deleting parent directory instead of child
        String pathString = string.trim();
        if (!pathString.endsWith("/")) {
            getLog().error("Path must end with a slash /: " + pathString);
            session.transfer(flowFile, REL_FAILURE);
            return;
        }
        if (pathString.contains("//")) {
            getLog().error("Path cannot contain double slashes //: " + pathString);
            session.transfer(flowFile, REL_FAILURE);
            return;
        }
        // Check for relative directories - accidentally deleting folder in home directory
        Path path = new Path(pathString);
        if (!path.isAbsolute()) {
            getLog().error("Path is not absolute: " + path);
            session.transfer(flowFile, REL_FAILURE);
            return;
        }
        // Delete path
        getLog().debug("Deleting path: " + path);
        try {
            if (!fileSystem.delete(path, true) && fileSystem.exists(path)) {
                getLog().error("Failed to remove path: " + path);
                session.transfer(flowFile, REL_FAILURE);
                return;
            }
        } catch (IOException e) {
            getLog().error("Failed to remove path: " + path, e);
            session.transfer(flowFile, REL_FAILURE);
            return;
        }
    }
    // Return success
    session.transfer(flowFile, REL_SUCCESS);
}
Also used : Path(org.apache.hadoop.fs.Path) FlowFile(org.apache.nifi.flowfile.FlowFile) FileSystem(org.apache.hadoop.fs.FileSystem) IOException(java.io.IOException)

Aggregations

FlowFile (org.apache.nifi.flowfile.FlowFile)500 IOException (java.io.IOException)236 ProcessException (org.apache.nifi.processor.exception.ProcessException)193 HashMap (java.util.HashMap)160 InputStream (java.io.InputStream)145 OutputStream (java.io.OutputStream)131 ComponentLog (org.apache.nifi.logging.ComponentLog)119 Test (org.junit.Test)116 ArrayList (java.util.ArrayList)113 Map (java.util.Map)105 MockFlowFile (org.apache.nifi.util.MockFlowFile)103 ProcessSession (org.apache.nifi.processor.ProcessSession)99 OutputStreamCallback (org.apache.nifi.processor.io.OutputStreamCallback)83 Relationship (org.apache.nifi.processor.Relationship)78 InputStreamCallback (org.apache.nifi.processor.io.InputStreamCallback)78 HashSet (java.util.HashSet)75 List (java.util.List)67 StopWatch (org.apache.nifi.util.StopWatch)59 Set (java.util.Set)56 PropertyDescriptor (org.apache.nifi.components.PropertyDescriptor)55