Search in sources :

Example 76 with ProcessContext

use of org.apache.nifi.processor.ProcessContext in project nifi by apache.

the class PartitionRecord method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;
    }
    final RecordReaderFactory readerFactory = context.getProperty(RECORD_READER).asControllerService(RecordReaderFactory.class);
    final RecordSetWriterFactory writerFactory = context.getProperty(RECORD_WRITER).asControllerService(RecordSetWriterFactory.class);
    final Map<String, RecordPath> recordPaths;
    try {
        recordPaths = context.getProperties().keySet().stream().filter(prop -> prop.isDynamic()).collect(Collectors.toMap(prop -> prop.getName(), prop -> getRecordPath(context, prop, flowFile)));
    } catch (final Exception e) {
        getLogger().error("Failed to compile RecordPath for {}; routing to failure", new Object[] { flowFile, e });
        session.transfer(flowFile, REL_FAILURE);
        return;
    }
    final Map<RecordValueMap, RecordSetWriter> writerMap = new HashMap<>();
    try (final InputStream in = session.read(flowFile)) {
        final Map<String, String> originalAttributes = flowFile.getAttributes();
        final RecordReader reader = readerFactory.createRecordReader(originalAttributes, in, getLogger());
        final RecordSchema writeSchema = writerFactory.getSchema(originalAttributes, reader.getSchema());
        Record record;
        while ((record = reader.nextRecord()) != null) {
            final Map<String, List<ValueWrapper>> recordMap = new HashMap<>();
            // Evaluate all of the RecordPath's for this Record
            for (final Map.Entry<String, RecordPath> entry : recordPaths.entrySet()) {
                final String propName = entry.getKey();
                final RecordPath recordPath = entry.getValue();
                final Stream<FieldValue> fieldValueStream = recordPath.evaluate(record).getSelectedFields();
                final List<ValueWrapper> fieldValues = fieldValueStream.map(fieldVal -> new ValueWrapper(fieldVal.getValue())).collect(Collectors.toList());
                recordMap.put(propName, fieldValues);
            }
            final RecordValueMap recordValueMap = new RecordValueMap(recordMap);
            // Get the RecordSetWriter that contains the same values for all RecordPaths - or create one if none exists.
            RecordSetWriter writer = writerMap.get(recordValueMap);
            if (writer == null) {
                final FlowFile childFlowFile = session.create(flowFile);
                recordValueMap.setFlowFile(childFlowFile);
                final OutputStream out = session.write(childFlowFile);
                writer = writerFactory.createWriter(getLogger(), writeSchema, out);
                writer.beginRecordSet();
                writerMap.put(recordValueMap, writer);
            }
            writer.write(record);
        }
        // For each RecordSetWriter, finish the record set and close the writer.
        for (final Map.Entry<RecordValueMap, RecordSetWriter> entry : writerMap.entrySet()) {
            final RecordValueMap valueMap = entry.getKey();
            final RecordSetWriter writer = entry.getValue();
            final WriteResult writeResult = writer.finishRecordSet();
            writer.close();
            final Map<String, String> attributes = new HashMap<>();
            attributes.putAll(valueMap.getAttributes());
            attributes.putAll(writeResult.getAttributes());
            attributes.put("record.count", String.valueOf(writeResult.getRecordCount()));
            attributes.put(CoreAttributes.MIME_TYPE.key(), writer.getMimeType());
            FlowFile childFlowFile = valueMap.getFlowFile();
            childFlowFile = session.putAllAttributes(childFlowFile, attributes);
            session.adjustCounter("Record Processed", writeResult.getRecordCount(), false);
        }
    } catch (final Exception e) {
        for (final Map.Entry<RecordValueMap, RecordSetWriter> entry : writerMap.entrySet()) {
            final RecordValueMap valueMap = entry.getKey();
            final RecordSetWriter writer = entry.getValue();
            try {
                writer.close();
            } catch (final IOException e1) {
                getLogger().warn("Failed to close Record Writer for {}; some resources may not be cleaned up appropriately", new Object[] { flowFile, e1 });
            }
            session.remove(valueMap.getFlowFile());
        }
        getLogger().error("Failed to partition {}", new Object[] { flowFile, e });
        session.transfer(flowFile, REL_FAILURE);
        return;
    }
    // because we want to ensure that we are able to remove the child flowfiles in case of a failure.
    for (final RecordValueMap valueMap : writerMap.keySet()) {
        session.transfer(valueMap.getFlowFile(), REL_SUCCESS);
    }
    session.transfer(flowFile, REL_ORIGINAL);
}
Also used : Arrays(java.util.Arrays) CapabilityDescription(org.apache.nifi.annotation.documentation.CapabilityDescription) ValidationContext(org.apache.nifi.components.ValidationContext) HashMap(java.util.HashMap) EventDriven(org.apache.nifi.annotation.behavior.EventDriven) PropertyDescriptor(org.apache.nifi.components.PropertyDescriptor) ProcessException(org.apache.nifi.processor.exception.ProcessException) RecordPath(org.apache.nifi.record.path.RecordPath) ArrayList(java.util.ArrayList) RecordPathValidator(org.apache.nifi.record.path.validation.RecordPathValidator) HashSet(java.util.HashSet) RecordSchema(org.apache.nifi.serialization.record.RecordSchema) WritesAttributes(org.apache.nifi.annotation.behavior.WritesAttributes) Relationship(org.apache.nifi.processor.Relationship) RecordReader(org.apache.nifi.serialization.RecordReader) Map(java.util.Map) Requirement(org.apache.nifi.annotation.behavior.InputRequirement.Requirement) ValidationResult(org.apache.nifi.components.ValidationResult) Record(org.apache.nifi.serialization.record.Record) OutputStream(java.io.OutputStream) FlowFile(org.apache.nifi.flowfile.FlowFile) Collection(java.util.Collection) WriteResult(org.apache.nifi.serialization.WriteResult) DataTypeUtils(org.apache.nifi.serialization.record.util.DataTypeUtils) ProcessContext(org.apache.nifi.processor.ProcessContext) Set(java.util.Set) IOException(java.io.IOException) ProcessSession(org.apache.nifi.processor.ProcessSession) RecordSetWriterFactory(org.apache.nifi.serialization.RecordSetWriterFactory) WritesAttribute(org.apache.nifi.annotation.behavior.WritesAttribute) SeeAlso(org.apache.nifi.annotation.documentation.SeeAlso) Collectors(java.util.stream.Collectors) List(java.util.List) InputRequirement(org.apache.nifi.annotation.behavior.InputRequirement) Stream(java.util.stream.Stream) DynamicProperty(org.apache.nifi.annotation.behavior.DynamicProperty) RecordReaderFactory(org.apache.nifi.serialization.RecordReaderFactory) SupportsBatching(org.apache.nifi.annotation.behavior.SupportsBatching) RecordPathCache(org.apache.nifi.record.path.util.RecordPathCache) AbstractProcessor(org.apache.nifi.processor.AbstractProcessor) Tags(org.apache.nifi.annotation.documentation.Tags) CoreAttributes(org.apache.nifi.flowfile.attributes.CoreAttributes) FieldValue(org.apache.nifi.record.path.FieldValue) Collections(java.util.Collections) RecordSetWriter(org.apache.nifi.serialization.RecordSetWriter) InputStream(java.io.InputStream) HashMap(java.util.HashMap) RecordReader(org.apache.nifi.serialization.RecordReader) OutputStream(java.io.OutputStream) RecordSetWriter(org.apache.nifi.serialization.RecordSetWriter) RecordSetWriterFactory(org.apache.nifi.serialization.RecordSetWriterFactory) Record(org.apache.nifi.serialization.record.Record) ArrayList(java.util.ArrayList) List(java.util.List) FieldValue(org.apache.nifi.record.path.FieldValue) RecordSchema(org.apache.nifi.serialization.record.RecordSchema) FlowFile(org.apache.nifi.flowfile.FlowFile) InputStream(java.io.InputStream) RecordPath(org.apache.nifi.record.path.RecordPath) IOException(java.io.IOException) ProcessException(org.apache.nifi.processor.exception.ProcessException) IOException(java.io.IOException) RecordReaderFactory(org.apache.nifi.serialization.RecordReaderFactory) WriteResult(org.apache.nifi.serialization.WriteResult) HashMap(java.util.HashMap) Map(java.util.Map)

Example 77 with ProcessContext

use of org.apache.nifi.processor.ProcessContext in project nifi by apache.

the class EvaluateJsonPath method onTrigger.

@Override
public void onTrigger(final ProcessContext processContext, final ProcessSession processSession) throws ProcessException {
    FlowFile flowFile = processSession.get();
    if (flowFile == null) {
        return;
    }
    final ComponentLog logger = getLogger();
    DocumentContext documentContext;
    try {
        documentContext = validateAndEstablishJsonContext(processSession, flowFile);
    } catch (InvalidJsonException e) {
        logger.error("FlowFile {} did not have valid JSON content.", new Object[] { flowFile });
        processSession.transfer(flowFile, REL_FAILURE);
        return;
    }
    Set<Map.Entry<String, JsonPath>> attributeJsonPathEntries = attributeToJsonPathEntrySetQueue.poll();
    if (attributeJsonPathEntries == null) {
        attributeJsonPathEntries = processContext.getProperties().entrySet().stream().filter(e -> e.getKey().isDynamic()).collect(Collectors.toMap(e -> e.getKey().getName(), e -> JsonPath.compile(e.getValue()))).entrySet();
    }
    try {
        // We'll only be using this map if destinationIsAttribute == true
        final Map<String, String> jsonPathResults = destinationIsAttribute ? new HashMap<>(attributeJsonPathEntries.size()) : Collections.EMPTY_MAP;
        for (final Map.Entry<String, JsonPath> attributeJsonPathEntry : attributeJsonPathEntries) {
            final String jsonPathAttrKey = attributeJsonPathEntry.getKey();
            final JsonPath jsonPathExp = attributeJsonPathEntry.getValue();
            Object result;
            try {
                Object potentialResult = documentContext.read(jsonPathExp);
                if (returnType.equals(RETURN_TYPE_SCALAR) && !isJsonScalar(potentialResult)) {
                    logger.error("Unable to return a scalar value for the expression {} for FlowFile {}. Evaluated value was {}. Transferring to {}.", new Object[] { jsonPathExp.getPath(), flowFile.getId(), potentialResult.toString(), REL_FAILURE.getName() });
                    processSession.transfer(flowFile, REL_FAILURE);
                    return;
                }
                result = potentialResult;
            } catch (PathNotFoundException e) {
                if (pathNotFound.equals(PATH_NOT_FOUND_WARN)) {
                    logger.warn("FlowFile {} could not find path {} for attribute key {}.", new Object[] { flowFile.getId(), jsonPathExp.getPath(), jsonPathAttrKey }, e);
                }
                if (destinationIsAttribute) {
                    jsonPathResults.put(jsonPathAttrKey, StringUtils.EMPTY);
                    continue;
                } else {
                    processSession.transfer(flowFile, REL_NO_MATCH);
                    return;
                }
            }
            final String resultRepresentation = getResultRepresentation(result, nullDefaultValue);
            if (destinationIsAttribute) {
                jsonPathResults.put(jsonPathAttrKey, resultRepresentation);
            } else {
                flowFile = processSession.write(flowFile, out -> {
                    try (OutputStream outputStream = new BufferedOutputStream(out)) {
                        outputStream.write(resultRepresentation.getBytes(StandardCharsets.UTF_8));
                    }
                });
                processSession.getProvenanceReporter().modifyContent(flowFile, "Replaced content with result of expression " + jsonPathExp.getPath());
            }
        }
        // jsonPathResults map will be empty if this is false
        if (destinationIsAttribute) {
            flowFile = processSession.putAllAttributes(flowFile, jsonPathResults);
        }
        processSession.transfer(flowFile, REL_MATCH);
    } finally {
        attributeToJsonPathEntrySetQueue.offer(attributeJsonPathEntries);
    }
}
Also used : CapabilityDescription(org.apache.nifi.annotation.documentation.CapabilityDescription) OnRemoved(org.apache.nifi.annotation.lifecycle.OnRemoved) ValidationContext(org.apache.nifi.components.ValidationContext) HashMap(java.util.HashMap) EventDriven(org.apache.nifi.annotation.behavior.EventDriven) ComponentLog(org.apache.nifi.logging.ComponentLog) StringUtils(org.apache.commons.lang3.StringUtils) SideEffectFree(org.apache.nifi.annotation.behavior.SideEffectFree) PropertyDescriptor(org.apache.nifi.components.PropertyDescriptor) ProcessException(org.apache.nifi.processor.exception.ProcessException) BufferedOutputStream(java.io.BufferedOutputStream) ArrayList(java.util.ArrayList) ConcurrentMap(java.util.concurrent.ConcurrentMap) InvalidJsonException(com.jayway.jsonpath.InvalidJsonException) HashSet(java.util.HashSet) Relationship(org.apache.nifi.processor.Relationship) Map(java.util.Map) Requirement(org.apache.nifi.annotation.behavior.InputRequirement.Requirement) ValidationResult(org.apache.nifi.components.ValidationResult) OutputStream(java.io.OutputStream) OnUnscheduled(org.apache.nifi.annotation.lifecycle.OnUnscheduled) FlowFile(org.apache.nifi.flowfile.FlowFile) Collection(java.util.Collection) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) ProcessContext(org.apache.nifi.processor.ProcessContext) Set(java.util.Set) ProcessSession(org.apache.nifi.processor.ProcessSession) JsonPath(com.jayway.jsonpath.JsonPath) Collectors(java.util.stream.Collectors) StandardCharsets(java.nio.charset.StandardCharsets) List(java.util.List) InputRequirement(org.apache.nifi.annotation.behavior.InputRequirement) OnScheduled(org.apache.nifi.annotation.lifecycle.OnScheduled) DynamicProperty(org.apache.nifi.annotation.behavior.DynamicProperty) SupportsBatching(org.apache.nifi.annotation.behavior.SupportsBatching) DocumentContext(com.jayway.jsonpath.DocumentContext) Queue(java.util.Queue) Tags(org.apache.nifi.annotation.documentation.Tags) PathNotFoundException(com.jayway.jsonpath.PathNotFoundException) Collections(java.util.Collections) ProcessorInitializationContext(org.apache.nifi.processor.ProcessorInitializationContext) ConcurrentLinkedQueue(java.util.concurrent.ConcurrentLinkedQueue) FlowFile(org.apache.nifi.flowfile.FlowFile) BufferedOutputStream(java.io.BufferedOutputStream) OutputStream(java.io.OutputStream) JsonPath(com.jayway.jsonpath.JsonPath) ComponentLog(org.apache.nifi.logging.ComponentLog) InvalidJsonException(com.jayway.jsonpath.InvalidJsonException) PathNotFoundException(com.jayway.jsonpath.PathNotFoundException) DocumentContext(com.jayway.jsonpath.DocumentContext) HashMap(java.util.HashMap) ConcurrentMap(java.util.concurrent.ConcurrentMap) Map(java.util.Map) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) BufferedOutputStream(java.io.BufferedOutputStream)

Example 78 with ProcessContext

use of org.apache.nifi.processor.ProcessContext in project nifi by apache.

the class GenerateTableFetch method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSessionFactory sessionFactory) throws ProcessException {
    // Fetch the column/table info once (if the table name and max value columns are not dynamic). Otherwise do the setup later
    if (!isDynamicTableName && !isDynamicMaxValues && !setupComplete.get()) {
        super.setup(context);
    }
    ProcessSession session = sessionFactory.createSession();
    FlowFile fileToProcess = null;
    if (context.hasIncomingConnection()) {
        fileToProcess = session.get();
        if (fileToProcess == null) {
            // Incoming connection with no flow file available, do no work (see capability description)
            return;
        }
    }
    final ComponentLog logger = getLogger();
    final DBCPService dbcpService = context.getProperty(DBCP_SERVICE).asControllerService(DBCPService.class);
    final DatabaseAdapter dbAdapter = dbAdapters.get(context.getProperty(DB_TYPE).getValue());
    final String tableName = context.getProperty(TABLE_NAME).evaluateAttributeExpressions(fileToProcess).getValue();
    final String columnNames = context.getProperty(COLUMN_NAMES).evaluateAttributeExpressions(fileToProcess).getValue();
    final String maxValueColumnNames = context.getProperty(MAX_VALUE_COLUMN_NAMES).evaluateAttributeExpressions(fileToProcess).getValue();
    final int partitionSize = context.getProperty(PARTITION_SIZE).evaluateAttributeExpressions(fileToProcess).asInteger();
    final String customWhereClause = context.getProperty(WHERE_CLAUSE).evaluateAttributeExpressions(fileToProcess).getValue();
    final StateManager stateManager = context.getStateManager();
    final StateMap stateMap;
    FlowFile finalFileToProcess = fileToProcess;
    try {
        stateMap = stateManager.getState(Scope.CLUSTER);
    } catch (final IOException ioe) {
        logger.error("Failed to retrieve observed maximum values from the State Manager. Will not perform " + "query until this is accomplished.", ioe);
        context.yield();
        return;
    }
    try {
        // Make a mutable copy of the current state property map. This will be updated by the result row callback, and eventually
        // set as the current state map (after the session has been committed)
        final Map<String, String> statePropertyMap = new HashMap<>(stateMap.toMap());
        // If an initial max value for column(s) has been specified using properties, and this column is not in the state manager, sync them to the state property map
        for (final Map.Entry<String, String> maxProp : maxValueProperties.entrySet()) {
            String maxPropKey = maxProp.getKey().toLowerCase();
            String fullyQualifiedMaxPropKey = getStateKey(tableName, maxPropKey);
            if (!statePropertyMap.containsKey(fullyQualifiedMaxPropKey)) {
                String newMaxPropValue;
                // but store the new initial max value under the fully-qualified key.
                if (statePropertyMap.containsKey(maxPropKey)) {
                    newMaxPropValue = statePropertyMap.get(maxPropKey);
                } else {
                    newMaxPropValue = maxProp.getValue();
                }
                statePropertyMap.put(fullyQualifiedMaxPropKey, newMaxPropValue);
            }
        }
        // Build a WHERE clause with maximum-value columns (if they exist), and a list of column names that will contain MAX(<column>) aliases. The
        // executed SQL query will retrieve the count of all records after the filter(s) have been applied, as well as the new maximum values for the
        // specified columns. This allows the processor to generate the correctly partitioned SQL statements as well as to update the state with the
        // latest observed maximum values.
        String whereClause = null;
        List<String> maxValueColumnNameList = StringUtils.isEmpty(maxValueColumnNames) ? new ArrayList<>(0) : Arrays.asList(maxValueColumnNames.split("\\s*,\\s*"));
        List<String> maxValueClauses = new ArrayList<>(maxValueColumnNameList.size());
        String columnsClause = null;
        List<String> maxValueSelectColumns = new ArrayList<>(maxValueColumnNameList.size() + 1);
        maxValueSelectColumns.add("COUNT(*)");
        // For each maximum-value column, get a WHERE filter and a MAX(column) alias
        IntStream.range(0, maxValueColumnNameList.size()).forEach((index) -> {
            String colName = maxValueColumnNameList.get(index);
            maxValueSelectColumns.add("MAX(" + colName + ") " + colName);
            String maxValue = getColumnStateMaxValue(tableName, statePropertyMap, colName);
            if (!StringUtils.isEmpty(maxValue)) {
                if (columnTypeMap.isEmpty() || getColumnType(tableName, colName) == null) {
                    // This means column type cache is clean after instance reboot. We should re-cache column type
                    super.setup(context, false, finalFileToProcess);
                }
                Integer type = getColumnType(tableName, colName);
                // Add a condition for the WHERE clause
                maxValueClauses.add(colName + (index == 0 ? " > " : " >= ") + getLiteralByType(type, maxValue, dbAdapter.getName()));
            }
        });
        if (customWhereClause != null) {
            // adding the custom WHERE clause (if defined) to the list of existing clauses.
            maxValueClauses.add("(" + customWhereClause + ")");
        }
        whereClause = StringUtils.join(maxValueClauses, " AND ");
        columnsClause = StringUtils.join(maxValueSelectColumns, ", ");
        // Build a SELECT query with maximum-value columns (if present)
        final String selectQuery = dbAdapter.getSelectStatement(tableName, columnsClause, whereClause, null, null, null);
        long rowCount = 0;
        try (final Connection con = dbcpService.getConnection();
            final Statement st = con.createStatement()) {
            final Integer queryTimeout = context.getProperty(QUERY_TIMEOUT).evaluateAttributeExpressions(fileToProcess).asTimePeriod(TimeUnit.SECONDS).intValue();
            // timeout in seconds
            st.setQueryTimeout(queryTimeout);
            logger.debug("Executing {}", new Object[] { selectQuery });
            ResultSet resultSet;
            resultSet = st.executeQuery(selectQuery);
            if (resultSet.next()) {
                // Total row count is in the first column
                rowCount = resultSet.getLong(1);
                // Update the state map with the newly-observed maximum values
                ResultSetMetaData rsmd = resultSet.getMetaData();
                for (int i = 2; i <= rsmd.getColumnCount(); i++) {
                    // Some JDBC drivers consider the columns name and label to be very different things.
                    // Since this column has been aliased lets check the label first,
                    // if there is no label we'll use the column name.
                    String resultColumnName = (StringUtils.isNotEmpty(rsmd.getColumnLabel(i)) ? rsmd.getColumnLabel(i) : rsmd.getColumnName(i)).toLowerCase();
                    String fullyQualifiedStateKey = getStateKey(tableName, resultColumnName);
                    String resultColumnCurrentMax = statePropertyMap.get(fullyQualifiedStateKey);
                    if (StringUtils.isEmpty(resultColumnCurrentMax) && !isDynamicTableName) {
                        // If we can't find the value at the fully-qualified key name and the table name is static, it is possible (under a previous scheme)
                        // the value has been stored under a key that is only the column name. Fall back to check the column name; either way, when a new
                        // maximum value is observed, it will be stored under the fully-qualified key from then on.
                        resultColumnCurrentMax = statePropertyMap.get(resultColumnName);
                    }
                    int type = rsmd.getColumnType(i);
                    if (isDynamicTableName) {
                        // We haven't pre-populated the column type map if the table name is dynamic, so do it here
                        columnTypeMap.put(fullyQualifiedStateKey, type);
                    }
                    try {
                        String newMaxValue = getMaxValueFromRow(resultSet, i, type, resultColumnCurrentMax, dbAdapter.getName());
                        if (newMaxValue != null) {
                            statePropertyMap.put(fullyQualifiedStateKey, newMaxValue);
                        }
                    } catch (ParseException | IOException pie) {
                        // Fail the whole thing here before we start creating flow files and such
                        throw new ProcessException(pie);
                    }
                }
            } else {
                // Something is very wrong here, one row (even if count is zero) should be returned
                throw new SQLException("No rows returned from metadata query: " + selectQuery);
            }
            // for each maximum-value column get a right bounding WHERE condition
            IntStream.range(0, maxValueColumnNameList.size()).forEach((index) -> {
                String colName = maxValueColumnNameList.get(index);
                maxValueSelectColumns.add("MAX(" + colName + ") " + colName);
                String maxValue = getColumnStateMaxValue(tableName, statePropertyMap, colName);
                if (!StringUtils.isEmpty(maxValue)) {
                    if (columnTypeMap.isEmpty() || getColumnType(tableName, colName) == null) {
                        // This means column type cache is clean after instance reboot. We should re-cache column type
                        super.setup(context, false, finalFileToProcess);
                    }
                    Integer type = getColumnType(tableName, colName);
                    // Add a condition for the WHERE clause
                    maxValueClauses.add(colName + " <= " + getLiteralByType(type, maxValue, dbAdapter.getName()));
                }
            });
            // Update WHERE list to include new right hand boundaries
            whereClause = StringUtils.join(maxValueClauses, " AND ");
            final long numberOfFetches = (partitionSize == 0) ? 1 : (rowCount / partitionSize) + (rowCount % partitionSize == 0 ? 0 : 1);
            // Generate SQL statements to read "pages" of data
            for (long i = 0; i < numberOfFetches; i++) {
                Long limit = partitionSize == 0 ? null : (long) partitionSize;
                Long offset = partitionSize == 0 ? null : i * partitionSize;
                final String maxColumnNames = StringUtils.join(maxValueColumnNameList, ", ");
                final String query = dbAdapter.getSelectStatement(tableName, columnNames, whereClause, maxColumnNames, limit, offset);
                FlowFile sqlFlowFile = (fileToProcess == null) ? session.create() : session.create(fileToProcess);
                sqlFlowFile = session.write(sqlFlowFile, out -> out.write(query.getBytes()));
                sqlFlowFile = session.putAttribute(sqlFlowFile, "generatetablefetch.tableName", tableName);
                if (columnNames != null) {
                    sqlFlowFile = session.putAttribute(sqlFlowFile, "generatetablefetch.columnNames", columnNames);
                }
                if (StringUtils.isNotBlank(whereClause)) {
                    sqlFlowFile = session.putAttribute(sqlFlowFile, "generatetablefetch.whereClause", whereClause);
                }
                if (StringUtils.isNotBlank(maxColumnNames)) {
                    sqlFlowFile = session.putAttribute(sqlFlowFile, "generatetablefetch.maxColumnNames", maxColumnNames);
                }
                sqlFlowFile = session.putAttribute(sqlFlowFile, "generatetablefetch.limit", String.valueOf(limit));
                if (partitionSize != 0) {
                    sqlFlowFile = session.putAttribute(sqlFlowFile, "generatetablefetch.offset", String.valueOf(offset));
                }
                session.transfer(sqlFlowFile, REL_SUCCESS);
            }
            if (fileToProcess != null) {
                session.remove(fileToProcess);
            }
        } catch (SQLException e) {
            if (fileToProcess != null) {
                logger.error("Unable to execute SQL select query {} due to {}, routing {} to failure", new Object[] { selectQuery, e, fileToProcess });
                fileToProcess = session.putAttribute(fileToProcess, "generatetablefetch.sql.error", e.getMessage());
                session.transfer(fileToProcess, REL_FAILURE);
            } else {
                logger.error("Unable to execute SQL select query {} due to {}", new Object[] { selectQuery, e });
                throw new ProcessException(e);
            }
        }
        session.commit();
        try {
            // Update the state
            stateManager.setState(statePropertyMap, Scope.CLUSTER);
        } catch (IOException ioe) {
            logger.error("{} failed to update State Manager, observed maximum values will not be recorded. " + "Also, any generated SQL statements may be duplicated.", new Object[] { this, ioe });
        }
    } catch (final ProcessException pe) {
        // Log the cause of the ProcessException if it is available
        Throwable t = (pe.getCause() == null ? pe : pe.getCause());
        logger.error("Error during processing: {}", new Object[] { t.getMessage() }, t);
        session.rollback();
        context.yield();
    }
}
Also used : ProcessSession(org.apache.nifi.processor.ProcessSession) StandardValidators(org.apache.nifi.processor.util.StandardValidators) IntStream(java.util.stream.IntStream) Arrays(java.util.Arrays) Connection(java.sql.Connection) CapabilityDescription(org.apache.nifi.annotation.documentation.CapabilityDescription) ValidationContext(org.apache.nifi.components.ValidationContext) HashMap(java.util.HashMap) ComponentLog(org.apache.nifi.logging.ComponentLog) StringUtils(org.apache.commons.lang3.StringUtils) PropertyDescriptor(org.apache.nifi.components.PropertyDescriptor) ProcessException(org.apache.nifi.processor.exception.ProcessException) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) SQLException(java.sql.SQLException) WritesAttributes(org.apache.nifi.annotation.behavior.WritesAttributes) Scope(org.apache.nifi.components.state.Scope) Relationship(org.apache.nifi.processor.Relationship) ResultSet(java.sql.ResultSet) Map(java.util.Map) Requirement(org.apache.nifi.annotation.behavior.InputRequirement.Requirement) ParseException(java.text.ParseException) TriggerSerially(org.apache.nifi.annotation.behavior.TriggerSerially) ValidationResult(org.apache.nifi.components.ValidationResult) DatabaseAdapter(org.apache.nifi.processors.standard.db.DatabaseAdapter) FlowFile(org.apache.nifi.flowfile.FlowFile) StateManager(org.apache.nifi.components.state.StateManager) Collection(java.util.Collection) ProcessContext(org.apache.nifi.processor.ProcessContext) Set(java.util.Set) ProcessSession(org.apache.nifi.processor.ProcessSession) IOException(java.io.IOException) WritesAttribute(org.apache.nifi.annotation.behavior.WritesAttribute) SeeAlso(org.apache.nifi.annotation.documentation.SeeAlso) ProcessSessionFactory(org.apache.nifi.processor.ProcessSessionFactory) StateMap(org.apache.nifi.components.state.StateMap) TimeUnit(java.util.concurrent.TimeUnit) InputRequirement(org.apache.nifi.annotation.behavior.InputRequirement) Stateful(org.apache.nifi.annotation.behavior.Stateful) OnScheduled(org.apache.nifi.annotation.lifecycle.OnScheduled) List(java.util.List) DynamicProperty(org.apache.nifi.annotation.behavior.DynamicProperty) Statement(java.sql.Statement) Tags(org.apache.nifi.annotation.documentation.Tags) DBCPService(org.apache.nifi.dbcp.DBCPService) Collections(java.util.Collections) ResultSetMetaData(java.sql.ResultSetMetaData) HashMap(java.util.HashMap) SQLException(java.sql.SQLException) StateMap(org.apache.nifi.components.state.StateMap) ArrayList(java.util.ArrayList) ResultSetMetaData(java.sql.ResultSetMetaData) StateManager(org.apache.nifi.components.state.StateManager) ResultSet(java.sql.ResultSet) FlowFile(org.apache.nifi.flowfile.FlowFile) Statement(java.sql.Statement) Connection(java.sql.Connection) IOException(java.io.IOException) ComponentLog(org.apache.nifi.logging.ComponentLog) DatabaseAdapter(org.apache.nifi.processors.standard.db.DatabaseAdapter) ProcessException(org.apache.nifi.processor.exception.ProcessException) DBCPService(org.apache.nifi.dbcp.DBCPService) ParseException(java.text.ParseException) HashMap(java.util.HashMap) Map(java.util.Map) StateMap(org.apache.nifi.components.state.StateMap)

Example 79 with ProcessContext

use of org.apache.nifi.processor.ProcessContext in project nifi by apache.

the class FetchParquetTest method testIOExceptionCreatingReaderShouldRouteToRetry.

@Test
public void testIOExceptionCreatingReaderShouldRouteToRetry() throws InitializationException, IOException {
    final FetchParquet proc = new FetchParquet() {

        @Override
        public HDFSRecordReader createHDFSRecordReader(ProcessContext context, FlowFile flowFile, Configuration conf, Path path) throws IOException {
            throw new IOException("IOException");
        }
    };
    configure(proc);
    final File parquetDir = new File(DIRECTORY);
    final File parquetFile = new File(parquetDir, "testFetchParquetToCSV.parquet");
    final int numUsers = 10;
    writeParquetUsers(parquetFile, numUsers);
    final Map<String, String> attributes = new HashMap<>();
    attributes.put(CoreAttributes.PATH.key(), parquetDir.getAbsolutePath());
    attributes.put(CoreAttributes.FILENAME.key(), parquetFile.getName());
    testRunner.enqueue("TRIGGER", attributes);
    testRunner.run();
    testRunner.assertAllFlowFilesTransferred(FetchParquet.REL_RETRY, 1);
    final MockFlowFile flowFile = testRunner.getFlowFilesForRelationship(FetchParquet.REL_RETRY).get(0);
    flowFile.assertContentEquals("TRIGGER");
}
Also used : Path(org.apache.hadoop.fs.Path) MockFlowFile(org.apache.nifi.util.MockFlowFile) FlowFile(org.apache.nifi.flowfile.FlowFile) MockFlowFile(org.apache.nifi.util.MockFlowFile) Configuration(org.apache.hadoop.conf.Configuration) HashMap(java.util.HashMap) IOException(java.io.IOException) FlowFile(org.apache.nifi.flowfile.FlowFile) File(java.io.File) MockFlowFile(org.apache.nifi.util.MockFlowFile) ProcessContext(org.apache.nifi.processor.ProcessContext) Test(org.junit.Test)

Example 80 with ProcessContext

use of org.apache.nifi.processor.ProcessContext in project nifi by apache.

the class SelectHiveQL method onTrigger.

private void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    FlowFile fileToProcess = (context.hasIncomingConnection() ? session.get() : null);
    FlowFile flowfile = null;
    // we know that we should run only if we have a FlowFile.
    if (context.hasIncomingConnection()) {
        if (fileToProcess == null && context.hasNonLoopConnection()) {
            return;
        }
    }
    final ComponentLog logger = getLogger();
    final HiveDBCPService dbcpService = context.getProperty(HIVE_DBCP_SERVICE).asControllerService(HiveDBCPService.class);
    final Charset charset = Charset.forName(context.getProperty(CHARSET).getValue());
    final boolean flowbased = !(context.getProperty(HIVEQL_SELECT_QUERY).isSet());
    // Source the SQL
    final String selectQuery;
    if (context.getProperty(HIVEQL_SELECT_QUERY).isSet()) {
        selectQuery = context.getProperty(HIVEQL_SELECT_QUERY).evaluateAttributeExpressions(fileToProcess).getValue();
    } else {
        // If the query is not set, then an incoming flow file is required, and expected to contain a valid SQL select query.
        // If there is no incoming connection, onTrigger will not be called as the processor will fail when scheduled.
        final StringBuilder queryContents = new StringBuilder();
        session.read(fileToProcess, in -> queryContents.append(IOUtils.toString(in, charset)));
        selectQuery = queryContents.toString();
    }
    final Integer fetchSize = context.getProperty(FETCH_SIZE).evaluateAttributeExpressions(fileToProcess).asInteger();
    final Integer maxRowsPerFlowFile = context.getProperty(MAX_ROWS_PER_FLOW_FILE).evaluateAttributeExpressions(fileToProcess).asInteger();
    final Integer maxFragments = context.getProperty(MAX_FRAGMENTS).isSet() ? context.getProperty(MAX_FRAGMENTS).evaluateAttributeExpressions(fileToProcess).asInteger() : 0;
    final String outputFormat = context.getProperty(HIVEQL_OUTPUT_FORMAT).getValue();
    final boolean convertNamesForAvro = context.getProperty(NORMALIZE_NAMES_FOR_AVRO).asBoolean();
    final StopWatch stopWatch = new StopWatch(true);
    final boolean header = context.getProperty(HIVEQL_CSV_HEADER).asBoolean();
    final String altHeader = context.getProperty(HIVEQL_CSV_ALT_HEADER).evaluateAttributeExpressions(fileToProcess).getValue();
    final String delimiter = context.getProperty(HIVEQL_CSV_DELIMITER).evaluateAttributeExpressions(fileToProcess).getValue();
    final boolean quote = context.getProperty(HIVEQL_CSV_QUOTE).asBoolean();
    final boolean escape = context.getProperty(HIVEQL_CSV_HEADER).asBoolean();
    final String fragmentIdentifier = UUID.randomUUID().toString();
    try (final Connection con = dbcpService.getConnection();
        final Statement st = (flowbased ? con.prepareStatement(selectQuery) : con.createStatement())) {
        if (fetchSize != null && fetchSize > 0) {
            try {
                st.setFetchSize(fetchSize);
            } catch (SQLException se) {
                // Not all drivers support this, just log the error (at debug level) and move on
                logger.debug("Cannot set fetch size to {} due to {}", new Object[] { fetchSize, se.getLocalizedMessage() }, se);
            }
        }
        final List<FlowFile> resultSetFlowFiles = new ArrayList<>();
        try {
            logger.debug("Executing query {}", new Object[] { selectQuery });
            if (flowbased) {
                // Hive JDBC Doesn't Support this yet:
                // ParameterMetaData pmd = ((PreparedStatement)st).getParameterMetaData();
                // int paramCount = pmd.getParameterCount();
                // Alternate way to determine number of params in SQL.
                int paramCount = StringUtils.countMatches(selectQuery, "?");
                if (paramCount > 0) {
                    setParameters(1, (PreparedStatement) st, paramCount, fileToProcess.getAttributes());
                }
            }
            final ResultSet resultSet;
            try {
                resultSet = (flowbased ? ((PreparedStatement) st).executeQuery() : st.executeQuery(selectQuery));
            } catch (SQLException se) {
                // If an error occurs during the query, a flowfile is expected to be routed to failure, so ensure one here
                flowfile = (fileToProcess == null) ? session.create() : fileToProcess;
                fileToProcess = null;
                throw se;
            }
            int fragmentIndex = 0;
            String baseFilename = (fileToProcess != null) ? fileToProcess.getAttribute(CoreAttributes.FILENAME.key()) : null;
            while (true) {
                final AtomicLong nrOfRows = new AtomicLong(0L);
                flowfile = (flowfile == null) ? session.create() : session.create(flowfile);
                if (baseFilename == null) {
                    baseFilename = flowfile.getAttribute(CoreAttributes.FILENAME.key());
                }
                try {
                    flowfile = session.write(flowfile, out -> {
                        try {
                            if (AVRO.equals(outputFormat)) {
                                nrOfRows.set(HiveJdbcCommon.convertToAvroStream(resultSet, out, maxRowsPerFlowFile, convertNamesForAvro));
                            } else if (CSV.equals(outputFormat)) {
                                CsvOutputOptions options = new CsvOutputOptions(header, altHeader, delimiter, quote, escape, maxRowsPerFlowFile);
                                nrOfRows.set(HiveJdbcCommon.convertToCsvStream(resultSet, out, options));
                            } else {
                                nrOfRows.set(0L);
                                throw new ProcessException("Unsupported output format: " + outputFormat);
                            }
                        } catch (final SQLException | RuntimeException e) {
                            throw new ProcessException("Error during database query or conversion of records.", e);
                        }
                    });
                } catch (ProcessException e) {
                    // Add flowfile to results before rethrowing so it will be removed from session in outer catch
                    resultSetFlowFiles.add(flowfile);
                    throw e;
                }
                if (nrOfRows.get() > 0 || resultSetFlowFiles.isEmpty()) {
                    final Map<String, String> attributes = new HashMap<>();
                    // Set attribute for how many rows were selected
                    attributes.put(RESULT_ROW_COUNT, String.valueOf(nrOfRows.get()));
                    try {
                        // Set input/output table names by parsing the query
                        attributes.putAll(toQueryTableAttributes(findTableNames(selectQuery)));
                    } catch (Exception e) {
                        // If failed to parse the query, just log a warning message, but continue.
                        getLogger().warn("Failed to parse query: {} due to {}", new Object[] { selectQuery, e }, e);
                    }
                    // Set MIME type on output document and add extension to filename
                    if (AVRO.equals(outputFormat)) {
                        attributes.put(CoreAttributes.MIME_TYPE.key(), MIME_TYPE_AVRO_BINARY);
                        attributes.put(CoreAttributes.FILENAME.key(), baseFilename + "." + fragmentIndex + ".avro");
                    } else if (CSV.equals(outputFormat)) {
                        attributes.put(CoreAttributes.MIME_TYPE.key(), CSV_MIME_TYPE);
                        attributes.put(CoreAttributes.FILENAME.key(), baseFilename + "." + fragmentIndex + ".csv");
                    }
                    if (maxRowsPerFlowFile > 0) {
                        attributes.put("fragment.identifier", fragmentIdentifier);
                        attributes.put("fragment.index", String.valueOf(fragmentIndex));
                    }
                    flowfile = session.putAllAttributes(flowfile, attributes);
                    logger.info("{} contains {} Avro records; transferring to 'success'", new Object[] { flowfile, nrOfRows.get() });
                    if (context.hasIncomingConnection()) {
                        // If the flow file came from an incoming connection, issue a Fetch provenance event
                        session.getProvenanceReporter().fetch(flowfile, dbcpService.getConnectionURL(), "Retrieved " + nrOfRows.get() + " rows", stopWatch.getElapsed(TimeUnit.MILLISECONDS));
                    } else {
                        // If we created a flow file from rows received from Hive, issue a Receive provenance event
                        session.getProvenanceReporter().receive(flowfile, dbcpService.getConnectionURL(), stopWatch.getElapsed(TimeUnit.MILLISECONDS));
                    }
                    resultSetFlowFiles.add(flowfile);
                } else {
                    // If there were no rows returned (and the first flow file has been sent, we're done processing, so remove the flowfile and carry on
                    session.remove(flowfile);
                    break;
                }
                fragmentIndex++;
                if (maxFragments > 0 && fragmentIndex >= maxFragments) {
                    break;
                }
            }
            for (int i = 0; i < resultSetFlowFiles.size(); i++) {
                // Set count on all FlowFiles
                if (maxRowsPerFlowFile > 0) {
                    resultSetFlowFiles.set(i, session.putAttribute(resultSetFlowFiles.get(i), "fragment.count", Integer.toString(fragmentIndex)));
                }
            }
        } catch (final SQLException e) {
            throw e;
        }
        session.transfer(resultSetFlowFiles, REL_SUCCESS);
    } catch (final ProcessException | SQLException e) {
        logger.error("Issue processing SQL {} due to {}.", new Object[] { selectQuery, e });
        if (flowfile == null) {
            // This can happen if any exceptions occur while setting up the connection, statement, etc.
            logger.error("Unable to execute HiveQL select query {} due to {}. No FlowFile to route to failure", new Object[] { selectQuery, e });
            context.yield();
        } else {
            if (context.hasIncomingConnection()) {
                logger.error("Unable to execute HiveQL select query {} for {} due to {}; routing to failure", new Object[] { selectQuery, flowfile, e });
                flowfile = session.penalize(flowfile);
            } else {
                logger.error("Unable to execute HiveQL select query {} due to {}; routing to failure", new Object[] { selectQuery, e });
                context.yield();
            }
            session.transfer(flowfile, REL_FAILURE);
        }
    } finally {
        if (fileToProcess != null) {
            session.remove(fileToProcess);
        }
    }
}
Also used : StandardValidators(org.apache.nifi.processor.util.StandardValidators) StringUtils(org.apache.commons.lang.StringUtils) Connection(java.sql.Connection) CapabilityDescription(org.apache.nifi.annotation.documentation.CapabilityDescription) NORMALIZE_NAMES_FOR_AVRO(org.apache.nifi.util.hive.HiveJdbcCommon.NORMALIZE_NAMES_FOR_AVRO) HashMap(java.util.HashMap) EventDriven(org.apache.nifi.annotation.behavior.EventDriven) CSV_MIME_TYPE(org.apache.nifi.util.hive.HiveJdbcCommon.CSV_MIME_TYPE) ComponentLog(org.apache.nifi.logging.ComponentLog) PropertyDescriptor(org.apache.nifi.components.PropertyDescriptor) ProcessException(org.apache.nifi.processor.exception.ProcessException) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) SQLException(java.sql.SQLException) Charset(java.nio.charset.Charset) WritesAttributes(org.apache.nifi.annotation.behavior.WritesAttributes) Relationship(org.apache.nifi.processor.Relationship) CSV(org.apache.nifi.util.hive.HiveJdbcCommon.CSV) ResultSet(java.sql.ResultSet) Map(java.util.Map) HiveDBCPService(org.apache.nifi.dbcp.hive.HiveDBCPService) Requirement(org.apache.nifi.annotation.behavior.InputRequirement.Requirement) MIME_TYPE_AVRO_BINARY(org.apache.nifi.util.hive.HiveJdbcCommon.MIME_TYPE_AVRO_BINARY) PartialFunctions(org.apache.nifi.processor.util.pattern.PartialFunctions) HiveJdbcCommon(org.apache.nifi.util.hive.HiveJdbcCommon) FlowFile(org.apache.nifi.flowfile.FlowFile) ProcessContext(org.apache.nifi.processor.ProcessContext) Set(java.util.Set) ProcessSession(org.apache.nifi.processor.ProcessSession) AVRO(org.apache.nifi.util.hive.HiveJdbcCommon.AVRO) UUID(java.util.UUID) WritesAttribute(org.apache.nifi.annotation.behavior.WritesAttribute) PreparedStatement(java.sql.PreparedStatement) ProcessSessionFactory(org.apache.nifi.processor.ProcessSessionFactory) TimeUnit(java.util.concurrent.TimeUnit) AtomicLong(java.util.concurrent.atomic.AtomicLong) IOUtils(org.apache.commons.io.IOUtils) List(java.util.List) InputRequirement(org.apache.nifi.annotation.behavior.InputRequirement) OnScheduled(org.apache.nifi.annotation.lifecycle.OnScheduled) CsvOutputOptions(org.apache.nifi.util.hive.CsvOutputOptions) Statement(java.sql.Statement) StopWatch(org.apache.nifi.util.StopWatch) Tags(org.apache.nifi.annotation.documentation.Tags) CoreAttributes(org.apache.nifi.flowfile.attributes.CoreAttributes) Collections(java.util.Collections) SQLException(java.sql.SQLException) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ResultSet(java.sql.ResultSet) HiveDBCPService(org.apache.nifi.dbcp.hive.HiveDBCPService) FlowFile(org.apache.nifi.flowfile.FlowFile) PreparedStatement(java.sql.PreparedStatement) Statement(java.sql.Statement) Connection(java.sql.Connection) Charset(java.nio.charset.Charset) ComponentLog(org.apache.nifi.logging.ComponentLog) ProcessException(org.apache.nifi.processor.exception.ProcessException) SQLException(java.sql.SQLException) StopWatch(org.apache.nifi.util.StopWatch) AtomicLong(java.util.concurrent.atomic.AtomicLong) ProcessException(org.apache.nifi.processor.exception.ProcessException) CsvOutputOptions(org.apache.nifi.util.hive.CsvOutputOptions)

Aggregations

ProcessContext (org.apache.nifi.processor.ProcessContext)115 Test (org.junit.Test)67 TestRunner (org.apache.nifi.util.TestRunner)56 ProcessSession (org.apache.nifi.processor.ProcessSession)49 FlowFile (org.apache.nifi.flowfile.FlowFile)40 MockFlowFile (org.apache.nifi.util.MockFlowFile)39 HashSet (java.util.HashSet)35 Relationship (org.apache.nifi.processor.Relationship)35 List (java.util.List)34 PropertyDescriptor (org.apache.nifi.components.PropertyDescriptor)34 ArrayList (java.util.ArrayList)33 Set (java.util.Set)33 Tags (org.apache.nifi.annotation.documentation.Tags)31 IOException (java.io.IOException)30 HashMap (java.util.HashMap)30 CapabilityDescription (org.apache.nifi.annotation.documentation.CapabilityDescription)30 ProcessException (org.apache.nifi.processor.exception.ProcessException)30 Collections (java.util.Collections)29 InputRequirement (org.apache.nifi.annotation.behavior.InputRequirement)29 ProcessSessionFactory (org.apache.nifi.processor.ProcessSessionFactory)29