use of org.apache.nifi.processor.ProcessContext in project nifi by apache.
the class TestConsumeKafkaRecord_1_0 method setup.
@Before
public void setup() throws InitializationException {
mockLease = mock(ConsumerLease.class);
mockConsumerPool = mock(ConsumerPool.class);
ConsumeKafkaRecord_1_0 proc = new ConsumeKafkaRecord_1_0() {
@Override
protected ConsumerPool createConsumerPool(final ProcessContext context, final ComponentLog log) {
return mockConsumerPool;
}
};
runner = TestRunners.newTestRunner(proc);
runner.setProperty(KafkaProcessorUtils.BOOTSTRAP_SERVERS, "okeydokey:1234");
final String readerId = "record-reader";
final MockRecordParser readerService = new MockRecordParser();
readerService.addSchemaField("name", RecordFieldType.STRING);
readerService.addSchemaField("age", RecordFieldType.INT);
runner.addControllerService(readerId, readerService);
runner.enableControllerService(readerService);
final String writerId = "record-writer";
final RecordSetWriterFactory writerService = new MockRecordWriter("name, age");
runner.addControllerService(writerId, writerService);
runner.enableControllerService(writerService);
runner.setProperty(ConsumeKafkaRecord_1_0.RECORD_READER, readerId);
runner.setProperty(ConsumeKafkaRecord_1_0.RECORD_WRITER, writerId);
}
use of org.apache.nifi.processor.ProcessContext in project nifi by apache.
the class TestPublishKafkaRecord_1_0 method setup.
@Before
public void setup() throws InitializationException, IOException {
mockPool = mock(PublisherPool.class);
mockLease = mock(PublisherLease.class);
Mockito.doCallRealMethod().when(mockLease).publish(any(FlowFile.class), any(RecordSet.class), any(RecordSetWriterFactory.class), any(RecordSchema.class), any(String.class), any(String.class));
when(mockPool.obtainPublisher()).thenReturn(mockLease);
runner = TestRunners.newTestRunner(new PublishKafkaRecord_1_0() {
@Override
protected PublisherPool createPublisherPool(final ProcessContext context) {
return mockPool;
}
});
runner.setProperty(PublishKafkaRecord_1_0.TOPIC, TOPIC_NAME);
final String readerId = "record-reader";
final MockRecordParser readerService = new MockRecordParser();
readerService.addSchemaField("name", RecordFieldType.STRING);
readerService.addSchemaField("age", RecordFieldType.INT);
runner.addControllerService(readerId, readerService);
runner.enableControllerService(readerService);
final String writerId = "record-writer";
final RecordSetWriterFactory writerService = new MockRecordWriter("name, age");
runner.addControllerService(writerId, writerService);
runner.enableControllerService(writerService);
runner.setProperty(PublishKafkaRecord_1_0.RECORD_READER, readerId);
runner.setProperty(PublishKafkaRecord_1_0.RECORD_WRITER, writerId);
runner.setProperty(PublishKafka_1_0.DELIVERY_GUARANTEE, PublishKafka_1_0.DELIVERY_REPLICATED);
}
use of org.apache.nifi.processor.ProcessContext in project nifi by apache.
the class FetchParquetTest method testIOExceptionWhileReadingShouldRouteToRetry.
@Test
public void testIOExceptionWhileReadingShouldRouteToRetry() throws IOException, InitializationException {
final FetchParquet proc = new FetchParquet() {
@Override
public HDFSRecordReader createHDFSRecordReader(ProcessContext context, FlowFile flowFile, Configuration conf, Path path) throws IOException {
return new HDFSRecordReader() {
@Override
public Record nextRecord() throws IOException {
throw new IOException("IOException");
}
@Override
public void close() throws IOException {
}
};
}
};
configure(proc);
final File parquetDir = new File(DIRECTORY);
final File parquetFile = new File(parquetDir, "testFetchParquetToCSV.parquet");
final int numUsers = 10;
writeParquetUsers(parquetFile, numUsers);
final Map<String, String> attributes = new HashMap<>();
attributes.put(CoreAttributes.PATH.key(), parquetDir.getAbsolutePath());
attributes.put(CoreAttributes.FILENAME.key(), parquetFile.getName());
testRunner.enqueue("TRIGGER", attributes);
testRunner.run();
testRunner.assertAllFlowFilesTransferred(FetchParquet.REL_RETRY, 1);
final MockFlowFile flowFile = testRunner.getFlowFilesForRelationship(FetchParquet.REL_RETRY).get(0);
flowFile.assertContentEquals("TRIGGER");
}
use of org.apache.nifi.processor.ProcessContext in project nifi by apache.
the class QueryDatabaseTable method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSessionFactory sessionFactory) throws ProcessException {
// Fetch the column/table info once
if (!setupComplete.get()) {
super.setup(context);
}
ProcessSession session = sessionFactory.createSession();
final List<FlowFile> resultSetFlowFiles = new ArrayList<>();
final ComponentLog logger = getLogger();
final DBCPService dbcpService = context.getProperty(DBCP_SERVICE).asControllerService(DBCPService.class);
final DatabaseAdapter dbAdapter = dbAdapters.get(context.getProperty(DB_TYPE).getValue());
final String tableName = context.getProperty(TABLE_NAME).evaluateAttributeExpressions().getValue();
final String columnNames = context.getProperty(COLUMN_NAMES).evaluateAttributeExpressions().getValue();
final String maxValueColumnNames = context.getProperty(MAX_VALUE_COLUMN_NAMES).evaluateAttributeExpressions().getValue();
final String customWhereClause = context.getProperty(WHERE_CLAUSE).evaluateAttributeExpressions().getValue();
final Integer fetchSize = context.getProperty(FETCH_SIZE).evaluateAttributeExpressions().asInteger();
final Integer maxRowsPerFlowFile = context.getProperty(MAX_ROWS_PER_FLOW_FILE).evaluateAttributeExpressions().asInteger();
final Integer outputBatchSizeField = context.getProperty(OUTPUT_BATCH_SIZE).evaluateAttributeExpressions().asInteger();
final int outputBatchSize = outputBatchSizeField == null ? 0 : outputBatchSizeField;
final Integer maxFragments = context.getProperty(MAX_FRAGMENTS).isSet() ? context.getProperty(MAX_FRAGMENTS).evaluateAttributeExpressions().asInteger() : 0;
final JdbcCommon.AvroConversionOptions options = JdbcCommon.AvroConversionOptions.builder().recordName(tableName).maxRows(maxRowsPerFlowFile).convertNames(context.getProperty(NORMALIZE_NAMES_FOR_AVRO).asBoolean()).useLogicalTypes(context.getProperty(USE_AVRO_LOGICAL_TYPES).asBoolean()).defaultPrecision(context.getProperty(DEFAULT_PRECISION).evaluateAttributeExpressions().asInteger()).defaultScale(context.getProperty(DEFAULT_SCALE).evaluateAttributeExpressions().asInteger()).build();
final StateManager stateManager = context.getStateManager();
final StateMap stateMap;
try {
stateMap = stateManager.getState(Scope.CLUSTER);
} catch (final IOException ioe) {
getLogger().error("Failed to retrieve observed maximum values from the State Manager. Will not perform " + "query until this is accomplished.", ioe);
context.yield();
return;
}
// Make a mutable copy of the current state property map. This will be updated by the result row callback, and eventually
// set as the current state map (after the session has been committed)
final Map<String, String> statePropertyMap = new HashMap<>(stateMap.toMap());
// If an initial max value for column(s) has been specified using properties, and this column is not in the state manager, sync them to the state property map
for (final Map.Entry<String, String> maxProp : maxValueProperties.entrySet()) {
String maxPropKey = maxProp.getKey().toLowerCase();
String fullyQualifiedMaxPropKey = getStateKey(tableName, maxPropKey);
if (!statePropertyMap.containsKey(fullyQualifiedMaxPropKey)) {
String newMaxPropValue;
// but store the new initial max value under the fully-qualified key.
if (statePropertyMap.containsKey(maxPropKey)) {
newMaxPropValue = statePropertyMap.get(maxPropKey);
} else {
newMaxPropValue = maxProp.getValue();
}
statePropertyMap.put(fullyQualifiedMaxPropKey, newMaxPropValue);
}
}
List<String> maxValueColumnNameList = StringUtils.isEmpty(maxValueColumnNames) ? null : Arrays.asList(maxValueColumnNames.split("\\s*,\\s*"));
final String selectQuery = getQuery(dbAdapter, tableName, columnNames, maxValueColumnNameList, customWhereClause, statePropertyMap);
final StopWatch stopWatch = new StopWatch(true);
final String fragmentIdentifier = UUID.randomUUID().toString();
try (final Connection con = dbcpService.getConnection();
final Statement st = con.createStatement()) {
if (fetchSize != null && fetchSize > 0) {
try {
st.setFetchSize(fetchSize);
} catch (SQLException se) {
// Not all drivers support this, just log the error (at debug level) and move on
logger.debug("Cannot set fetch size to {} due to {}", new Object[] { fetchSize, se.getLocalizedMessage() }, se);
}
}
String jdbcURL = "DBCPService";
try {
DatabaseMetaData databaseMetaData = con.getMetaData();
if (databaseMetaData != null) {
jdbcURL = databaseMetaData.getURL();
}
} catch (SQLException se) {
// Ignore and use default JDBC URL. This shouldn't happen unless the driver doesn't implement getMetaData() properly
}
final Integer queryTimeout = context.getProperty(QUERY_TIMEOUT).evaluateAttributeExpressions().asTimePeriod(TimeUnit.SECONDS).intValue();
// timeout in seconds
st.setQueryTimeout(queryTimeout);
try {
logger.debug("Executing query {}", new Object[] { selectQuery });
final ResultSet resultSet = st.executeQuery(selectQuery);
int fragmentIndex = 0;
while (true) {
final AtomicLong nrOfRows = new AtomicLong(0L);
FlowFile fileToProcess = session.create();
try {
fileToProcess = session.write(fileToProcess, out -> {
// Max values will be updated in the state property map by the callback
final MaxValueResultSetRowCollector maxValCollector = new MaxValueResultSetRowCollector(tableName, statePropertyMap, dbAdapter);
try {
nrOfRows.set(JdbcCommon.convertToAvroStream(resultSet, out, options, maxValCollector));
} catch (SQLException | RuntimeException e) {
throw new ProcessException("Error during database query or conversion of records to Avro.", e);
}
});
} catch (ProcessException e) {
// Add flowfile to results before rethrowing so it will be removed from session in outer catch
resultSetFlowFiles.add(fileToProcess);
throw e;
}
if (nrOfRows.get() > 0) {
// set attribute how many rows were selected
fileToProcess = session.putAttribute(fileToProcess, RESULT_ROW_COUNT, String.valueOf(nrOfRows.get()));
fileToProcess = session.putAttribute(fileToProcess, RESULT_TABLENAME, tableName);
fileToProcess = session.putAttribute(fileToProcess, CoreAttributes.MIME_TYPE.key(), JdbcCommon.MIME_TYPE_AVRO_BINARY);
if (maxRowsPerFlowFile > 0) {
fileToProcess = session.putAttribute(fileToProcess, "fragment.identifier", fragmentIdentifier);
fileToProcess = session.putAttribute(fileToProcess, "fragment.index", String.valueOf(fragmentIndex));
}
logger.info("{} contains {} Avro records; transferring to 'success'", new Object[] { fileToProcess, nrOfRows.get() });
session.getProvenanceReporter().receive(fileToProcess, jdbcURL, stopWatch.getElapsed(TimeUnit.MILLISECONDS));
resultSetFlowFiles.add(fileToProcess);
// If we've reached the batch size, send out the flow files
if (outputBatchSize > 0 && resultSetFlowFiles.size() >= outputBatchSize) {
session.transfer(resultSetFlowFiles, REL_SUCCESS);
session.commit();
resultSetFlowFiles.clear();
}
} else {
// If there were no rows returned, don't send the flowfile
session.remove(fileToProcess);
context.yield();
break;
}
fragmentIndex++;
if (maxFragments > 0 && fragmentIndex >= maxFragments) {
break;
}
}
// Even though the maximum value and total count are known at this point, to maintain consistent behavior if Output Batch Size is set, do not store the attributes
if (outputBatchSize == 0) {
for (int i = 0; i < resultSetFlowFiles.size(); i++) {
// Add maximum values as attributes
for (Map.Entry<String, String> entry : statePropertyMap.entrySet()) {
// Get just the column name from the key
String key = entry.getKey();
String colName = key.substring(key.lastIndexOf(NAMESPACE_DELIMITER) + NAMESPACE_DELIMITER.length());
resultSetFlowFiles.set(i, session.putAttribute(resultSetFlowFiles.get(i), "maxvalue." + colName, entry.getValue()));
}
// set count on all FlowFiles
if (maxRowsPerFlowFile > 0) {
resultSetFlowFiles.set(i, session.putAttribute(resultSetFlowFiles.get(i), "fragment.count", Integer.toString(fragmentIndex)));
}
}
}
} catch (final SQLException e) {
throw e;
}
session.transfer(resultSetFlowFiles, REL_SUCCESS);
} catch (final ProcessException | SQLException e) {
logger.error("Unable to execute SQL select query {} due to {}", new Object[] { selectQuery, e });
if (!resultSetFlowFiles.isEmpty()) {
session.remove(resultSetFlowFiles);
}
context.yield();
} finally {
session.commit();
try {
// Update the state
stateManager.setState(statePropertyMap, Scope.CLUSTER);
} catch (IOException ioe) {
getLogger().error("{} failed to update State Manager, maximum observed values will not be recorded", new Object[] { this, ioe });
}
}
}
use of org.apache.nifi.processor.ProcessContext in project nifi by apache.
the class SplitJson method onTrigger.
@Override
public void onTrigger(final ProcessContext processContext, final ProcessSession processSession) {
FlowFile original = processSession.get();
if (original == null) {
return;
}
final ComponentLog logger = getLogger();
DocumentContext documentContext;
try {
documentContext = validateAndEstablishJsonContext(processSession, original);
} catch (InvalidJsonException e) {
logger.error("FlowFile {} did not have valid JSON content.", new Object[] { original });
processSession.transfer(original, REL_FAILURE);
return;
}
final JsonPath jsonPath = JSON_PATH_REF.get();
Object jsonPathResult;
try {
jsonPathResult = documentContext.read(jsonPath);
} catch (PathNotFoundException e) {
logger.warn("JsonPath {} could not be found for FlowFile {}", new Object[] { jsonPath.getPath(), original });
processSession.transfer(original, REL_FAILURE);
return;
}
if (!(jsonPathResult instanceof List)) {
logger.error("The evaluated value {} of {} was not a JSON Array compatible type and cannot be split.", new Object[] { jsonPathResult, jsonPath.getPath() });
processSession.transfer(original, REL_FAILURE);
return;
}
List resultList = (List) jsonPathResult;
Map<String, String> attributes = new HashMap<>();
final String fragmentId = UUID.randomUUID().toString();
attributes.put(FRAGMENT_ID.key(), fragmentId);
attributes.put(FRAGMENT_COUNT.key(), Integer.toString(resultList.size()));
for (int i = 0; i < resultList.size(); i++) {
Object resultSegment = resultList.get(i);
FlowFile split = processSession.create(original);
split = processSession.write(split, (out) -> {
String resultSegmentContent = getResultRepresentation(resultSegment, nullDefaultValue);
out.write(resultSegmentContent.getBytes(StandardCharsets.UTF_8));
});
attributes.put(SEGMENT_ORIGINAL_FILENAME.key(), split.getAttribute(CoreAttributes.FILENAME.key()));
attributes.put(FRAGMENT_INDEX.key(), Integer.toString(i));
processSession.transfer(processSession.putAllAttributes(split, attributes), REL_SPLIT);
}
original = copyAttributesToOriginal(processSession, original, fragmentId, resultList.size());
processSession.transfer(original, REL_ORIGINAL);
logger.info("Split {} into {} FlowFiles", new Object[] { original, resultList.size() });
}
Aggregations