use of org.apache.nifi.processor.ProcessContext in project nifi by apache.
the class PartitionRecord method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
FlowFile flowFile = session.get();
if (flowFile == null) {
return;
}
final RecordReaderFactory readerFactory = context.getProperty(RECORD_READER).asControllerService(RecordReaderFactory.class);
final RecordSetWriterFactory writerFactory = context.getProperty(RECORD_WRITER).asControllerService(RecordSetWriterFactory.class);
final Map<String, RecordPath> recordPaths;
try {
recordPaths = context.getProperties().keySet().stream().filter(prop -> prop.isDynamic()).collect(Collectors.toMap(prop -> prop.getName(), prop -> getRecordPath(context, prop, flowFile)));
} catch (final Exception e) {
getLogger().error("Failed to compile RecordPath for {}; routing to failure", new Object[] { flowFile, e });
session.transfer(flowFile, REL_FAILURE);
return;
}
final Map<RecordValueMap, RecordSetWriter> writerMap = new HashMap<>();
try (final InputStream in = session.read(flowFile)) {
final Map<String, String> originalAttributes = flowFile.getAttributes();
final RecordReader reader = readerFactory.createRecordReader(originalAttributes, in, getLogger());
final RecordSchema writeSchema = writerFactory.getSchema(originalAttributes, reader.getSchema());
Record record;
while ((record = reader.nextRecord()) != null) {
final Map<String, List<ValueWrapper>> recordMap = new HashMap<>();
// Evaluate all of the RecordPath's for this Record
for (final Map.Entry<String, RecordPath> entry : recordPaths.entrySet()) {
final String propName = entry.getKey();
final RecordPath recordPath = entry.getValue();
final Stream<FieldValue> fieldValueStream = recordPath.evaluate(record).getSelectedFields();
final List<ValueWrapper> fieldValues = fieldValueStream.map(fieldVal -> new ValueWrapper(fieldVal.getValue())).collect(Collectors.toList());
recordMap.put(propName, fieldValues);
}
final RecordValueMap recordValueMap = new RecordValueMap(recordMap);
// Get the RecordSetWriter that contains the same values for all RecordPaths - or create one if none exists.
RecordSetWriter writer = writerMap.get(recordValueMap);
if (writer == null) {
final FlowFile childFlowFile = session.create(flowFile);
recordValueMap.setFlowFile(childFlowFile);
final OutputStream out = session.write(childFlowFile);
writer = writerFactory.createWriter(getLogger(), writeSchema, out);
writer.beginRecordSet();
writerMap.put(recordValueMap, writer);
}
writer.write(record);
}
// For each RecordSetWriter, finish the record set and close the writer.
for (final Map.Entry<RecordValueMap, RecordSetWriter> entry : writerMap.entrySet()) {
final RecordValueMap valueMap = entry.getKey();
final RecordSetWriter writer = entry.getValue();
final WriteResult writeResult = writer.finishRecordSet();
writer.close();
final Map<String, String> attributes = new HashMap<>();
attributes.putAll(valueMap.getAttributes());
attributes.putAll(writeResult.getAttributes());
attributes.put("record.count", String.valueOf(writeResult.getRecordCount()));
attributes.put(CoreAttributes.MIME_TYPE.key(), writer.getMimeType());
FlowFile childFlowFile = valueMap.getFlowFile();
childFlowFile = session.putAllAttributes(childFlowFile, attributes);
session.adjustCounter("Record Processed", writeResult.getRecordCount(), false);
}
} catch (final Exception e) {
for (final Map.Entry<RecordValueMap, RecordSetWriter> entry : writerMap.entrySet()) {
final RecordValueMap valueMap = entry.getKey();
final RecordSetWriter writer = entry.getValue();
try {
writer.close();
} catch (final IOException e1) {
getLogger().warn("Failed to close Record Writer for {}; some resources may not be cleaned up appropriately", new Object[] { flowFile, e1 });
}
session.remove(valueMap.getFlowFile());
}
getLogger().error("Failed to partition {}", new Object[] { flowFile, e });
session.transfer(flowFile, REL_FAILURE);
return;
}
// because we want to ensure that we are able to remove the child flowfiles in case of a failure.
for (final RecordValueMap valueMap : writerMap.keySet()) {
session.transfer(valueMap.getFlowFile(), REL_SUCCESS);
}
session.transfer(flowFile, REL_ORIGINAL);
}
use of org.apache.nifi.processor.ProcessContext in project nifi by apache.
the class EvaluateJsonPath method onTrigger.
@Override
public void onTrigger(final ProcessContext processContext, final ProcessSession processSession) throws ProcessException {
FlowFile flowFile = processSession.get();
if (flowFile == null) {
return;
}
final ComponentLog logger = getLogger();
DocumentContext documentContext;
try {
documentContext = validateAndEstablishJsonContext(processSession, flowFile);
} catch (InvalidJsonException e) {
logger.error("FlowFile {} did not have valid JSON content.", new Object[] { flowFile });
processSession.transfer(flowFile, REL_FAILURE);
return;
}
Set<Map.Entry<String, JsonPath>> attributeJsonPathEntries = attributeToJsonPathEntrySetQueue.poll();
if (attributeJsonPathEntries == null) {
attributeJsonPathEntries = processContext.getProperties().entrySet().stream().filter(e -> e.getKey().isDynamic()).collect(Collectors.toMap(e -> e.getKey().getName(), e -> JsonPath.compile(e.getValue()))).entrySet();
}
try {
// We'll only be using this map if destinationIsAttribute == true
final Map<String, String> jsonPathResults = destinationIsAttribute ? new HashMap<>(attributeJsonPathEntries.size()) : Collections.EMPTY_MAP;
for (final Map.Entry<String, JsonPath> attributeJsonPathEntry : attributeJsonPathEntries) {
final String jsonPathAttrKey = attributeJsonPathEntry.getKey();
final JsonPath jsonPathExp = attributeJsonPathEntry.getValue();
Object result;
try {
Object potentialResult = documentContext.read(jsonPathExp);
if (returnType.equals(RETURN_TYPE_SCALAR) && !isJsonScalar(potentialResult)) {
logger.error("Unable to return a scalar value for the expression {} for FlowFile {}. Evaluated value was {}. Transferring to {}.", new Object[] { jsonPathExp.getPath(), flowFile.getId(), potentialResult.toString(), REL_FAILURE.getName() });
processSession.transfer(flowFile, REL_FAILURE);
return;
}
result = potentialResult;
} catch (PathNotFoundException e) {
if (pathNotFound.equals(PATH_NOT_FOUND_WARN)) {
logger.warn("FlowFile {} could not find path {} for attribute key {}.", new Object[] { flowFile.getId(), jsonPathExp.getPath(), jsonPathAttrKey }, e);
}
if (destinationIsAttribute) {
jsonPathResults.put(jsonPathAttrKey, StringUtils.EMPTY);
continue;
} else {
processSession.transfer(flowFile, REL_NO_MATCH);
return;
}
}
final String resultRepresentation = getResultRepresentation(result, nullDefaultValue);
if (destinationIsAttribute) {
jsonPathResults.put(jsonPathAttrKey, resultRepresentation);
} else {
flowFile = processSession.write(flowFile, out -> {
try (OutputStream outputStream = new BufferedOutputStream(out)) {
outputStream.write(resultRepresentation.getBytes(StandardCharsets.UTF_8));
}
});
processSession.getProvenanceReporter().modifyContent(flowFile, "Replaced content with result of expression " + jsonPathExp.getPath());
}
}
// jsonPathResults map will be empty if this is false
if (destinationIsAttribute) {
flowFile = processSession.putAllAttributes(flowFile, jsonPathResults);
}
processSession.transfer(flowFile, REL_MATCH);
} finally {
attributeToJsonPathEntrySetQueue.offer(attributeJsonPathEntries);
}
}
use of org.apache.nifi.processor.ProcessContext in project nifi by apache.
the class GenerateTableFetch method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSessionFactory sessionFactory) throws ProcessException {
// Fetch the column/table info once (if the table name and max value columns are not dynamic). Otherwise do the setup later
if (!isDynamicTableName && !isDynamicMaxValues && !setupComplete.get()) {
super.setup(context);
}
ProcessSession session = sessionFactory.createSession();
FlowFile fileToProcess = null;
if (context.hasIncomingConnection()) {
fileToProcess = session.get();
if (fileToProcess == null) {
// Incoming connection with no flow file available, do no work (see capability description)
return;
}
}
final ComponentLog logger = getLogger();
final DBCPService dbcpService = context.getProperty(DBCP_SERVICE).asControllerService(DBCPService.class);
final DatabaseAdapter dbAdapter = dbAdapters.get(context.getProperty(DB_TYPE).getValue());
final String tableName = context.getProperty(TABLE_NAME).evaluateAttributeExpressions(fileToProcess).getValue();
final String columnNames = context.getProperty(COLUMN_NAMES).evaluateAttributeExpressions(fileToProcess).getValue();
final String maxValueColumnNames = context.getProperty(MAX_VALUE_COLUMN_NAMES).evaluateAttributeExpressions(fileToProcess).getValue();
final int partitionSize = context.getProperty(PARTITION_SIZE).evaluateAttributeExpressions(fileToProcess).asInteger();
final String customWhereClause = context.getProperty(WHERE_CLAUSE).evaluateAttributeExpressions(fileToProcess).getValue();
final StateManager stateManager = context.getStateManager();
final StateMap stateMap;
FlowFile finalFileToProcess = fileToProcess;
try {
stateMap = stateManager.getState(Scope.CLUSTER);
} catch (final IOException ioe) {
logger.error("Failed to retrieve observed maximum values from the State Manager. Will not perform " + "query until this is accomplished.", ioe);
context.yield();
return;
}
try {
// Make a mutable copy of the current state property map. This will be updated by the result row callback, and eventually
// set as the current state map (after the session has been committed)
final Map<String, String> statePropertyMap = new HashMap<>(stateMap.toMap());
// If an initial max value for column(s) has been specified using properties, and this column is not in the state manager, sync them to the state property map
for (final Map.Entry<String, String> maxProp : maxValueProperties.entrySet()) {
String maxPropKey = maxProp.getKey().toLowerCase();
String fullyQualifiedMaxPropKey = getStateKey(tableName, maxPropKey);
if (!statePropertyMap.containsKey(fullyQualifiedMaxPropKey)) {
String newMaxPropValue;
// but store the new initial max value under the fully-qualified key.
if (statePropertyMap.containsKey(maxPropKey)) {
newMaxPropValue = statePropertyMap.get(maxPropKey);
} else {
newMaxPropValue = maxProp.getValue();
}
statePropertyMap.put(fullyQualifiedMaxPropKey, newMaxPropValue);
}
}
// Build a WHERE clause with maximum-value columns (if they exist), and a list of column names that will contain MAX(<column>) aliases. The
// executed SQL query will retrieve the count of all records after the filter(s) have been applied, as well as the new maximum values for the
// specified columns. This allows the processor to generate the correctly partitioned SQL statements as well as to update the state with the
// latest observed maximum values.
String whereClause = null;
List<String> maxValueColumnNameList = StringUtils.isEmpty(maxValueColumnNames) ? new ArrayList<>(0) : Arrays.asList(maxValueColumnNames.split("\\s*,\\s*"));
List<String> maxValueClauses = new ArrayList<>(maxValueColumnNameList.size());
String columnsClause = null;
List<String> maxValueSelectColumns = new ArrayList<>(maxValueColumnNameList.size() + 1);
maxValueSelectColumns.add("COUNT(*)");
// For each maximum-value column, get a WHERE filter and a MAX(column) alias
IntStream.range(0, maxValueColumnNameList.size()).forEach((index) -> {
String colName = maxValueColumnNameList.get(index);
maxValueSelectColumns.add("MAX(" + colName + ") " + colName);
String maxValue = getColumnStateMaxValue(tableName, statePropertyMap, colName);
if (!StringUtils.isEmpty(maxValue)) {
if (columnTypeMap.isEmpty() || getColumnType(tableName, colName) == null) {
// This means column type cache is clean after instance reboot. We should re-cache column type
super.setup(context, false, finalFileToProcess);
}
Integer type = getColumnType(tableName, colName);
// Add a condition for the WHERE clause
maxValueClauses.add(colName + (index == 0 ? " > " : " >= ") + getLiteralByType(type, maxValue, dbAdapter.getName()));
}
});
if (customWhereClause != null) {
// adding the custom WHERE clause (if defined) to the list of existing clauses.
maxValueClauses.add("(" + customWhereClause + ")");
}
whereClause = StringUtils.join(maxValueClauses, " AND ");
columnsClause = StringUtils.join(maxValueSelectColumns, ", ");
// Build a SELECT query with maximum-value columns (if present)
final String selectQuery = dbAdapter.getSelectStatement(tableName, columnsClause, whereClause, null, null, null);
long rowCount = 0;
try (final Connection con = dbcpService.getConnection();
final Statement st = con.createStatement()) {
final Integer queryTimeout = context.getProperty(QUERY_TIMEOUT).evaluateAttributeExpressions(fileToProcess).asTimePeriod(TimeUnit.SECONDS).intValue();
// timeout in seconds
st.setQueryTimeout(queryTimeout);
logger.debug("Executing {}", new Object[] { selectQuery });
ResultSet resultSet;
resultSet = st.executeQuery(selectQuery);
if (resultSet.next()) {
// Total row count is in the first column
rowCount = resultSet.getLong(1);
// Update the state map with the newly-observed maximum values
ResultSetMetaData rsmd = resultSet.getMetaData();
for (int i = 2; i <= rsmd.getColumnCount(); i++) {
// Some JDBC drivers consider the columns name and label to be very different things.
// Since this column has been aliased lets check the label first,
// if there is no label we'll use the column name.
String resultColumnName = (StringUtils.isNotEmpty(rsmd.getColumnLabel(i)) ? rsmd.getColumnLabel(i) : rsmd.getColumnName(i)).toLowerCase();
String fullyQualifiedStateKey = getStateKey(tableName, resultColumnName);
String resultColumnCurrentMax = statePropertyMap.get(fullyQualifiedStateKey);
if (StringUtils.isEmpty(resultColumnCurrentMax) && !isDynamicTableName) {
// If we can't find the value at the fully-qualified key name and the table name is static, it is possible (under a previous scheme)
// the value has been stored under a key that is only the column name. Fall back to check the column name; either way, when a new
// maximum value is observed, it will be stored under the fully-qualified key from then on.
resultColumnCurrentMax = statePropertyMap.get(resultColumnName);
}
int type = rsmd.getColumnType(i);
if (isDynamicTableName) {
// We haven't pre-populated the column type map if the table name is dynamic, so do it here
columnTypeMap.put(fullyQualifiedStateKey, type);
}
try {
String newMaxValue = getMaxValueFromRow(resultSet, i, type, resultColumnCurrentMax, dbAdapter.getName());
if (newMaxValue != null) {
statePropertyMap.put(fullyQualifiedStateKey, newMaxValue);
}
} catch (ParseException | IOException pie) {
// Fail the whole thing here before we start creating flow files and such
throw new ProcessException(pie);
}
}
} else {
// Something is very wrong here, one row (even if count is zero) should be returned
throw new SQLException("No rows returned from metadata query: " + selectQuery);
}
// for each maximum-value column get a right bounding WHERE condition
IntStream.range(0, maxValueColumnNameList.size()).forEach((index) -> {
String colName = maxValueColumnNameList.get(index);
maxValueSelectColumns.add("MAX(" + colName + ") " + colName);
String maxValue = getColumnStateMaxValue(tableName, statePropertyMap, colName);
if (!StringUtils.isEmpty(maxValue)) {
if (columnTypeMap.isEmpty() || getColumnType(tableName, colName) == null) {
// This means column type cache is clean after instance reboot. We should re-cache column type
super.setup(context, false, finalFileToProcess);
}
Integer type = getColumnType(tableName, colName);
// Add a condition for the WHERE clause
maxValueClauses.add(colName + " <= " + getLiteralByType(type, maxValue, dbAdapter.getName()));
}
});
// Update WHERE list to include new right hand boundaries
whereClause = StringUtils.join(maxValueClauses, " AND ");
final long numberOfFetches = (partitionSize == 0) ? 1 : (rowCount / partitionSize) + (rowCount % partitionSize == 0 ? 0 : 1);
// Generate SQL statements to read "pages" of data
for (long i = 0; i < numberOfFetches; i++) {
Long limit = partitionSize == 0 ? null : (long) partitionSize;
Long offset = partitionSize == 0 ? null : i * partitionSize;
final String maxColumnNames = StringUtils.join(maxValueColumnNameList, ", ");
final String query = dbAdapter.getSelectStatement(tableName, columnNames, whereClause, maxColumnNames, limit, offset);
FlowFile sqlFlowFile = (fileToProcess == null) ? session.create() : session.create(fileToProcess);
sqlFlowFile = session.write(sqlFlowFile, out -> out.write(query.getBytes()));
sqlFlowFile = session.putAttribute(sqlFlowFile, "generatetablefetch.tableName", tableName);
if (columnNames != null) {
sqlFlowFile = session.putAttribute(sqlFlowFile, "generatetablefetch.columnNames", columnNames);
}
if (StringUtils.isNotBlank(whereClause)) {
sqlFlowFile = session.putAttribute(sqlFlowFile, "generatetablefetch.whereClause", whereClause);
}
if (StringUtils.isNotBlank(maxColumnNames)) {
sqlFlowFile = session.putAttribute(sqlFlowFile, "generatetablefetch.maxColumnNames", maxColumnNames);
}
sqlFlowFile = session.putAttribute(sqlFlowFile, "generatetablefetch.limit", String.valueOf(limit));
if (partitionSize != 0) {
sqlFlowFile = session.putAttribute(sqlFlowFile, "generatetablefetch.offset", String.valueOf(offset));
}
session.transfer(sqlFlowFile, REL_SUCCESS);
}
if (fileToProcess != null) {
session.remove(fileToProcess);
}
} catch (SQLException e) {
if (fileToProcess != null) {
logger.error("Unable to execute SQL select query {} due to {}, routing {} to failure", new Object[] { selectQuery, e, fileToProcess });
fileToProcess = session.putAttribute(fileToProcess, "generatetablefetch.sql.error", e.getMessage());
session.transfer(fileToProcess, REL_FAILURE);
} else {
logger.error("Unable to execute SQL select query {} due to {}", new Object[] { selectQuery, e });
throw new ProcessException(e);
}
}
session.commit();
try {
// Update the state
stateManager.setState(statePropertyMap, Scope.CLUSTER);
} catch (IOException ioe) {
logger.error("{} failed to update State Manager, observed maximum values will not be recorded. " + "Also, any generated SQL statements may be duplicated.", new Object[] { this, ioe });
}
} catch (final ProcessException pe) {
// Log the cause of the ProcessException if it is available
Throwable t = (pe.getCause() == null ? pe : pe.getCause());
logger.error("Error during processing: {}", new Object[] { t.getMessage() }, t);
session.rollback();
context.yield();
}
}
use of org.apache.nifi.processor.ProcessContext in project nifi by apache.
the class FetchParquetTest method testIOExceptionCreatingReaderShouldRouteToRetry.
@Test
public void testIOExceptionCreatingReaderShouldRouteToRetry() throws InitializationException, IOException {
final FetchParquet proc = new FetchParquet() {
@Override
public HDFSRecordReader createHDFSRecordReader(ProcessContext context, FlowFile flowFile, Configuration conf, Path path) throws IOException {
throw new IOException("IOException");
}
};
configure(proc);
final File parquetDir = new File(DIRECTORY);
final File parquetFile = new File(parquetDir, "testFetchParquetToCSV.parquet");
final int numUsers = 10;
writeParquetUsers(parquetFile, numUsers);
final Map<String, String> attributes = new HashMap<>();
attributes.put(CoreAttributes.PATH.key(), parquetDir.getAbsolutePath());
attributes.put(CoreAttributes.FILENAME.key(), parquetFile.getName());
testRunner.enqueue("TRIGGER", attributes);
testRunner.run();
testRunner.assertAllFlowFilesTransferred(FetchParquet.REL_RETRY, 1);
final MockFlowFile flowFile = testRunner.getFlowFilesForRelationship(FetchParquet.REL_RETRY).get(0);
flowFile.assertContentEquals("TRIGGER");
}
use of org.apache.nifi.processor.ProcessContext in project nifi by apache.
the class SelectHiveQL method onTrigger.
private void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
FlowFile fileToProcess = (context.hasIncomingConnection() ? session.get() : null);
FlowFile flowfile = null;
// we know that we should run only if we have a FlowFile.
if (context.hasIncomingConnection()) {
if (fileToProcess == null && context.hasNonLoopConnection()) {
return;
}
}
final ComponentLog logger = getLogger();
final HiveDBCPService dbcpService = context.getProperty(HIVE_DBCP_SERVICE).asControllerService(HiveDBCPService.class);
final Charset charset = Charset.forName(context.getProperty(CHARSET).getValue());
final boolean flowbased = !(context.getProperty(HIVEQL_SELECT_QUERY).isSet());
// Source the SQL
final String selectQuery;
if (context.getProperty(HIVEQL_SELECT_QUERY).isSet()) {
selectQuery = context.getProperty(HIVEQL_SELECT_QUERY).evaluateAttributeExpressions(fileToProcess).getValue();
} else {
// If the query is not set, then an incoming flow file is required, and expected to contain a valid SQL select query.
// If there is no incoming connection, onTrigger will not be called as the processor will fail when scheduled.
final StringBuilder queryContents = new StringBuilder();
session.read(fileToProcess, in -> queryContents.append(IOUtils.toString(in, charset)));
selectQuery = queryContents.toString();
}
final Integer fetchSize = context.getProperty(FETCH_SIZE).evaluateAttributeExpressions(fileToProcess).asInteger();
final Integer maxRowsPerFlowFile = context.getProperty(MAX_ROWS_PER_FLOW_FILE).evaluateAttributeExpressions(fileToProcess).asInteger();
final Integer maxFragments = context.getProperty(MAX_FRAGMENTS).isSet() ? context.getProperty(MAX_FRAGMENTS).evaluateAttributeExpressions(fileToProcess).asInteger() : 0;
final String outputFormat = context.getProperty(HIVEQL_OUTPUT_FORMAT).getValue();
final boolean convertNamesForAvro = context.getProperty(NORMALIZE_NAMES_FOR_AVRO).asBoolean();
final StopWatch stopWatch = new StopWatch(true);
final boolean header = context.getProperty(HIVEQL_CSV_HEADER).asBoolean();
final String altHeader = context.getProperty(HIVEQL_CSV_ALT_HEADER).evaluateAttributeExpressions(fileToProcess).getValue();
final String delimiter = context.getProperty(HIVEQL_CSV_DELIMITER).evaluateAttributeExpressions(fileToProcess).getValue();
final boolean quote = context.getProperty(HIVEQL_CSV_QUOTE).asBoolean();
final boolean escape = context.getProperty(HIVEQL_CSV_HEADER).asBoolean();
final String fragmentIdentifier = UUID.randomUUID().toString();
try (final Connection con = dbcpService.getConnection();
final Statement st = (flowbased ? con.prepareStatement(selectQuery) : con.createStatement())) {
if (fetchSize != null && fetchSize > 0) {
try {
st.setFetchSize(fetchSize);
} catch (SQLException se) {
// Not all drivers support this, just log the error (at debug level) and move on
logger.debug("Cannot set fetch size to {} due to {}", new Object[] { fetchSize, se.getLocalizedMessage() }, se);
}
}
final List<FlowFile> resultSetFlowFiles = new ArrayList<>();
try {
logger.debug("Executing query {}", new Object[] { selectQuery });
if (flowbased) {
// Hive JDBC Doesn't Support this yet:
// ParameterMetaData pmd = ((PreparedStatement)st).getParameterMetaData();
// int paramCount = pmd.getParameterCount();
// Alternate way to determine number of params in SQL.
int paramCount = StringUtils.countMatches(selectQuery, "?");
if (paramCount > 0) {
setParameters(1, (PreparedStatement) st, paramCount, fileToProcess.getAttributes());
}
}
final ResultSet resultSet;
try {
resultSet = (flowbased ? ((PreparedStatement) st).executeQuery() : st.executeQuery(selectQuery));
} catch (SQLException se) {
// If an error occurs during the query, a flowfile is expected to be routed to failure, so ensure one here
flowfile = (fileToProcess == null) ? session.create() : fileToProcess;
fileToProcess = null;
throw se;
}
int fragmentIndex = 0;
String baseFilename = (fileToProcess != null) ? fileToProcess.getAttribute(CoreAttributes.FILENAME.key()) : null;
while (true) {
final AtomicLong nrOfRows = new AtomicLong(0L);
flowfile = (flowfile == null) ? session.create() : session.create(flowfile);
if (baseFilename == null) {
baseFilename = flowfile.getAttribute(CoreAttributes.FILENAME.key());
}
try {
flowfile = session.write(flowfile, out -> {
try {
if (AVRO.equals(outputFormat)) {
nrOfRows.set(HiveJdbcCommon.convertToAvroStream(resultSet, out, maxRowsPerFlowFile, convertNamesForAvro));
} else if (CSV.equals(outputFormat)) {
CsvOutputOptions options = new CsvOutputOptions(header, altHeader, delimiter, quote, escape, maxRowsPerFlowFile);
nrOfRows.set(HiveJdbcCommon.convertToCsvStream(resultSet, out, options));
} else {
nrOfRows.set(0L);
throw new ProcessException("Unsupported output format: " + outputFormat);
}
} catch (final SQLException | RuntimeException e) {
throw new ProcessException("Error during database query or conversion of records.", e);
}
});
} catch (ProcessException e) {
// Add flowfile to results before rethrowing so it will be removed from session in outer catch
resultSetFlowFiles.add(flowfile);
throw e;
}
if (nrOfRows.get() > 0 || resultSetFlowFiles.isEmpty()) {
final Map<String, String> attributes = new HashMap<>();
// Set attribute for how many rows were selected
attributes.put(RESULT_ROW_COUNT, String.valueOf(nrOfRows.get()));
try {
// Set input/output table names by parsing the query
attributes.putAll(toQueryTableAttributes(findTableNames(selectQuery)));
} catch (Exception e) {
// If failed to parse the query, just log a warning message, but continue.
getLogger().warn("Failed to parse query: {} due to {}", new Object[] { selectQuery, e }, e);
}
// Set MIME type on output document and add extension to filename
if (AVRO.equals(outputFormat)) {
attributes.put(CoreAttributes.MIME_TYPE.key(), MIME_TYPE_AVRO_BINARY);
attributes.put(CoreAttributes.FILENAME.key(), baseFilename + "." + fragmentIndex + ".avro");
} else if (CSV.equals(outputFormat)) {
attributes.put(CoreAttributes.MIME_TYPE.key(), CSV_MIME_TYPE);
attributes.put(CoreAttributes.FILENAME.key(), baseFilename + "." + fragmentIndex + ".csv");
}
if (maxRowsPerFlowFile > 0) {
attributes.put("fragment.identifier", fragmentIdentifier);
attributes.put("fragment.index", String.valueOf(fragmentIndex));
}
flowfile = session.putAllAttributes(flowfile, attributes);
logger.info("{} contains {} Avro records; transferring to 'success'", new Object[] { flowfile, nrOfRows.get() });
if (context.hasIncomingConnection()) {
// If the flow file came from an incoming connection, issue a Fetch provenance event
session.getProvenanceReporter().fetch(flowfile, dbcpService.getConnectionURL(), "Retrieved " + nrOfRows.get() + " rows", stopWatch.getElapsed(TimeUnit.MILLISECONDS));
} else {
// If we created a flow file from rows received from Hive, issue a Receive provenance event
session.getProvenanceReporter().receive(flowfile, dbcpService.getConnectionURL(), stopWatch.getElapsed(TimeUnit.MILLISECONDS));
}
resultSetFlowFiles.add(flowfile);
} else {
// If there were no rows returned (and the first flow file has been sent, we're done processing, so remove the flowfile and carry on
session.remove(flowfile);
break;
}
fragmentIndex++;
if (maxFragments > 0 && fragmentIndex >= maxFragments) {
break;
}
}
for (int i = 0; i < resultSetFlowFiles.size(); i++) {
// Set count on all FlowFiles
if (maxRowsPerFlowFile > 0) {
resultSetFlowFiles.set(i, session.putAttribute(resultSetFlowFiles.get(i), "fragment.count", Integer.toString(fragmentIndex)));
}
}
} catch (final SQLException e) {
throw e;
}
session.transfer(resultSetFlowFiles, REL_SUCCESS);
} catch (final ProcessException | SQLException e) {
logger.error("Issue processing SQL {} due to {}.", new Object[] { selectQuery, e });
if (flowfile == null) {
// This can happen if any exceptions occur while setting up the connection, statement, etc.
logger.error("Unable to execute HiveQL select query {} due to {}. No FlowFile to route to failure", new Object[] { selectQuery, e });
context.yield();
} else {
if (context.hasIncomingConnection()) {
logger.error("Unable to execute HiveQL select query {} for {} due to {}; routing to failure", new Object[] { selectQuery, flowfile, e });
flowfile = session.penalize(flowfile);
} else {
logger.error("Unable to execute HiveQL select query {} due to {}; routing to failure", new Object[] { selectQuery, e });
context.yield();
}
session.transfer(flowfile, REL_FAILURE);
}
} finally {
if (fileToProcess != null) {
session.remove(fileToProcess);
}
}
}
Aggregations