use of org.apache.nifi.util.StopWatch in project kylo by Teradata.
the class ExecuteHQL method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
final ComponentLog logger = getLog();
FlowFile flowFile = null;
try {
if (context.hasIncomingConnection()) {
flowFile = session.get();
if (flowFile == null) {
return;
}
}
} catch (NoSuchMethodError e) {
logger.error("Failed to get incoming", e);
}
FlowFile outgoing = (flowFile == null ? session.create() : flowFile);
final ThriftService thriftService = context.getProperty(THRIFT_SERVICE).asControllerService(ThriftService.class);
final String selectQuery = context.getProperty(SQL_SELECT_QUERY).evaluateAttributeExpressions(outgoing).getValue();
final Integer queryTimeout = context.getProperty(QUERY_TIMEOUT).asTimePeriod(TimeUnit.SECONDS).intValue();
final StopWatch stopWatch = new StopWatch(true);
try (final Connection con = thriftService.getConnection();
final Statement st = con.createStatement()) {
setQueryTimeout(st, queryTimeout);
final AtomicLong nrOfRows = new AtomicLong(0L);
outgoing = session.write(outgoing, new OutputStreamCallback() {
@Override
public void process(final OutputStream out) throws IOException {
try {
logger.debug("Executing query {}", new Object[] { selectQuery });
final ResultSet resultSet = new ResultSetAdapter(st.executeQuery(selectQuery));
nrOfRows.set(JdbcCommon.convertToAvroStream(resultSet, out));
} catch (final SQLException e) {
throw new ProcessException(e);
}
}
});
// set attribute how many rows were selected
outgoing = session.putAttribute(outgoing, RESULT_ROW_COUNT, Long.toString(nrOfRows.get()));
logger.info("{} contains {} Avro records", new Object[] { nrOfRows.get() });
logger.info("Transferred {} to 'success'", new Object[] { outgoing });
session.getProvenanceReporter().modifyContent(outgoing, "Retrieved " + nrOfRows.get() + " rows", stopWatch.getElapsed(TimeUnit.MILLISECONDS));
session.transfer(outgoing, REL_SUCCESS);
} catch (final ProcessException | SQLException e) {
logger.error("Unable to execute SQL select query {} for {} due to {}; routing to failure", new Object[] { selectQuery, outgoing, e });
session.transfer(outgoing, REL_FAILURE);
}
}
use of org.apache.nifi.util.StopWatch in project kylo by Teradata.
the class CreateHDFSFolder method onTrigger.
@Override
public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException {
FlowFile flowFile = session.get();
if (flowFile == null) {
return;
}
final StopWatch stopWatch = new StopWatch(true);
try {
final FileSystem hdfs = getFileSystem(context);
if (hdfs == null) {
getLog().error("HDFS not configured properly");
session.transfer(flowFile, REL_FAILURE);
context.yield();
return;
}
String owner = context.getProperty(REMOTE_OWNER).evaluateAttributeExpressions(flowFile).getValue();
String group = context.getProperty(REMOTE_GROUP).evaluateAttributeExpressions(flowFile).getValue();
HDFSSupport hdfsSupport = new HDFSSupport(hdfs);
String pathString = context.getProperty(DIRECTORY).evaluateAttributeExpressions(flowFile).getValue();
String[] paths = pathString.split("\\r?\\n");
// Create for each path defined
for (String path : paths) {
getLog().info("Creating folder " + path);
final Path folderPath = new Path(path.trim());
hdfsSupport.createFolder(folderPath, owner, group);
}
stopWatch.stop();
final long millis = stopWatch.getDuration(TimeUnit.MILLISECONDS);
getLog().info("created folders {} in {} milliseconds", new Object[] { pathString, millis });
session.transfer(flowFile, REL_SUCCESS);
} catch (Exception e) {
getLog().error("failed folder creation {}", new Object[] { e });
session.transfer(flowFile, REL_FAILURE);
}
}
use of org.apache.nifi.util.StopWatch in project kylo by Teradata.
the class ImportSqoop method onTrigger.
@Override
public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException {
final ComponentLog logger = getLog();
FlowFile flowFile = session.get();
if (flowFile == null) {
flowFile = session.create();
logger.info("Created a flow file having uuid: {}", new Object[] { flowFile.getAttribute(CoreAttributes.UUID.key()) });
} else {
logger.info("Using an existing flow file having uuid: {}", new Object[] { flowFile.getAttribute(CoreAttributes.UUID.key()) });
}
final String kerberosPrincipal = context.getProperty(KERBEROS_PRINCIPAL).getValue();
final String kerberosKeyTab = context.getProperty(KERBEROS_KEYTAB).getValue();
final SqoopConnectionService sqoopConnectionService = context.getProperty(SQOOP_CONNECTION_SERVICE).asControllerService(SqoopConnectionService.class);
final String sourceTableName = context.getProperty(SOURCE_TABLE_NAME).evaluateAttributeExpressions(flowFile).getValue();
final String sourceTableFields = context.getProperty(SOURCE_TABLE_FIELDS).evaluateAttributeExpressions(flowFile).getValue();
final String sourceTableWhereClause = context.getProperty(SOURCE_TABLE_WHERE_CLAUSE).evaluateAttributeExpressions(flowFile).getValue();
final SqoopLoadStrategy sourceLoadStrategy = SqoopLoadStrategy.valueOf(context.getProperty(SOURCE_LOAD_STRATEGY).getValue());
final String sourceCheckColumnName = context.getProperty(SOURCE_CHECK_COLUMN_NAME).evaluateAttributeExpressions(flowFile).getValue();
final String sourcePropertyWatermark = context.getProperty(SOURCE_PROPERTY_WATERMARK).evaluateAttributeExpressions(flowFile).getValue();
final String sourceCheckColumnLastValue = context.getProperty(SOURCE_CHECK_COLUMN_LAST_VALUE).evaluateAttributeExpressions(flowFile).getValue();
final String sourceSplitByField = context.getProperty(SOURCE_SPLIT_BY_FIELD).evaluateAttributeExpressions(flowFile).getValue();
final String sourceBoundaryQuery = context.getProperty(SOURCE_BOUNDARY_QUERY).evaluateAttributeExpressions(flowFile).getValue();
final Integer clusterMapTasks = context.getProperty(CLUSTER_MAP_TASKS).evaluateAttributeExpressions(flowFile).asInteger();
final String clusterUIJobName = context.getProperty(CLUSTER_UI_JOB_NAME).evaluateAttributeExpressions(flowFile).getValue();
final String targetHdfsDirectory = context.getProperty(TARGET_HDFS_DIRECTORY).evaluateAttributeExpressions(flowFile).getValue();
final TargetHdfsDirExistsStrategy targetHdfsDirExistsStrategy = TargetHdfsDirExistsStrategy.valueOf(context.getProperty(TARGET_HDFS_DIRECTORY_EXISTS_STRATEGY).getValue());
final ExtractDataFormat targetExtractDataFormat = ExtractDataFormat.valueOf(context.getProperty(TARGET_EXTRACT_DATA_FORMAT).getValue());
final String targetHdfsFileFieldDelimiter = context.getProperty(TARGET_HDFS_FILE_FIELD_DELIMITER).evaluateAttributeExpressions(flowFile).getValue();
final String targetHdfsFileRecordDelimiter = context.getProperty(TARGET_HDFS_FILE_RECORD_DELIMITER).evaluateAttributeExpressions(flowFile).getValue();
final HiveDelimStrategy targetHiveDelimStrategy = HiveDelimStrategy.valueOf(context.getProperty(TARGET_HIVE_DELIM_STRATEGY).getValue());
final String targetHiveReplaceDelim = context.getProperty(TARGET_HIVE_REPLACE_DELIM).evaluateAttributeExpressions(flowFile).getValue();
final CompressionAlgorithm targetCompressionAlgorithm = CompressionAlgorithm.valueOf(context.getProperty(TARGET_COMPRESSION_ALGORITHM).getValue());
final String targetColumnTypeMapping = context.getProperty(TARGET_COLUMN_TYPE_MAPPING).evaluateAttributeExpressions(flowFile).getValue();
final String sqoopCodeGenDirectory = context.getProperty(SQOOP_CODEGEN_DIR).evaluateAttributeExpressions(flowFile).getValue();
final String sourceSpecificSqlServerSchema = context.getProperty(SOURCESPECIFIC_SQLSERVER_SCHEMA).evaluateAttributeExpressions(flowFile).getValue();
final String systemProperties = context.getProperty(SQOOP_SYSTEM_PROPERTIES).evaluateAttributeExpressions(flowFile).getValue();
final String additionalArguments = context.getProperty(SQOOP_ADDITIONAL_ARGUMENTS).evaluateAttributeExpressions(flowFile).getValue();
final String COMMAND_SHELL = "/bin/bash";
final String COMMAND_SHELL_FLAGS = "-c";
final StopWatch stopWatch = new StopWatch(false);
KerberosConfig kerberosConfig = new KerberosConfig().setLogger(logger).setKerberosPrincipal(kerberosPrincipal).setKerberosKeytab(kerberosKeyTab);
SqoopBuilder sqoopBuilder = new SqoopBuilder();
String sqoopCommand = sqoopBuilder.setLogger(logger).setSourceConnectionString(sqoopConnectionService.getConnectionString()).setSourceUserName(sqoopConnectionService.getUserName()).setPasswordMode(sqoopConnectionService.getPasswordMode()).setSourcePasswordHdfsFile(sqoopConnectionService.getPasswordHdfsFile()).setSourcePasswordPassphrase(sqoopConnectionService.getPasswordPassphrase()).setSourceEnteredPassword(sqoopConnectionService.getEnteredPassword()).setSourceConnectionManager(sqoopConnectionService.getConnectionManager()).setSourceDriver(sqoopConnectionService.getDriver()).setSourceTableName(sourceTableName).setSourceTableFields(sourceTableFields).setSourceTableWhereClause(sourceTableWhereClause).setSourceLoadStrategy(sourceLoadStrategy).setSourceCheckColumnName(sourceCheckColumnName).setSourceCheckColumnLastValue(sourceCheckColumnLastValue).setSourceSplitByField(sourceSplitByField).setSourceBoundaryQuery(sourceBoundaryQuery).setClusterMapTasks(clusterMapTasks).setClusterUIJobName(clusterUIJobName).setTargetHdfsDirectory(targetHdfsDirectory).setTargetHdfsDirExistsStrategy(targetHdfsDirExistsStrategy).setTargetExtractDataFormat(targetExtractDataFormat).setTargetHdfsFileFieldDelimiter(targetHdfsFileFieldDelimiter).setTargetHdfsFileRecordDelimiter(targetHdfsFileRecordDelimiter).setTargetHiveDelimStrategy(targetHiveDelimStrategy).setTargetHiveReplaceDelim(targetHiveReplaceDelim).setTargetCompressionAlgorithm(targetCompressionAlgorithm).setTargetColumnTypeMapping(targetColumnTypeMapping).setSqoopCodeGenDirectory(sqoopCodeGenDirectory).setSourceSpecificSqlServerSchema(sourceSpecificSqlServerSchema).setSystemProperties(systemProperties).setAdditionalArguments(additionalArguments).build();
List<String> sqoopExecutionCommand = new ArrayList<>();
sqoopExecutionCommand.add(COMMAND_SHELL);
sqoopExecutionCommand.add(COMMAND_SHELL_FLAGS);
sqoopExecutionCommand.add(sqoopCommand);
SqoopProcessRunner sqoopProcessRunner = new SqoopProcessRunner(kerberosConfig, sqoopExecutionCommand, logger, sourceLoadStrategy);
logger.info("Starting execution of Sqoop command");
stopWatch.start();
SqoopProcessResult sqoopProcessResult = sqoopProcessRunner.execute();
long jobDurationSeconds = stopWatch.getElapsed(TimeUnit.SECONDS);
stopWatch.stop();
logger.info("Finished execution of Sqoop command");
int resultStatus = sqoopProcessResult.getExitValue();
SqoopUtils sqoopUtils = new SqoopUtils();
long recordsCount = sqoopUtils.getSqoopRecordCount(sqoopProcessResult, logger);
String sqoopCommandWithCredentialsMasked = sqoopUtils.maskCredentials(sqoopCommand, sqoopUtils.getCredentialsToMask());
flowFile = session.putAttribute(flowFile, "sqoop.command.text", sqoopCommandWithCredentialsMasked);
flowFile = session.putAttribute(flowFile, "sqoop.result.code", String.valueOf(resultStatus));
flowFile = session.putAttribute(flowFile, "sqoop.run.seconds", String.valueOf(jobDurationSeconds));
flowFile = session.putAttribute(flowFile, "sqoop.record.count", String.valueOf(recordsCount));
flowFile = session.putAttribute(flowFile, "sqoop.output.hdfs", targetHdfsDirectory);
logger.info("Wrote result attributes to flow file");
if (resultStatus == 0) {
logger.info("Sqoop Import OK [Code {}]", new Object[] { resultStatus });
if (sourceLoadStrategy == SqoopLoadStrategy.INCREMENTAL_APPEND || sourceLoadStrategy == SqoopLoadStrategy.INCREMENTAL_LASTMODIFIED) {
if ((sourceLoadStrategy == SqoopLoadStrategy.INCREMENTAL_APPEND) && (recordsCount == 0)) {
flowFile = session.putAttribute(flowFile, sourcePropertyWatermark, sourceCheckColumnLastValue);
} else {
String newHighWaterMark = sqoopUtils.getNewHighWatermark(sqoopProcessResult);
if ((newHighWaterMark == null) || (newHighWaterMark.equals("NO_UPDATE")) || (newHighWaterMark.equals(""))) {
flowFile = session.putAttribute(flowFile, sourcePropertyWatermark, sourceCheckColumnLastValue);
} else {
flowFile = session.putAttribute(flowFile, sourcePropertyWatermark, newHighWaterMark);
}
}
}
session.transfer(flowFile, REL_SUCCESS);
} else {
logger.error("Sqoop Import FAIL [Code {}]", new Object[] { resultStatus });
session.transfer(flowFile, REL_FAILURE);
}
}
use of org.apache.nifi.util.StopWatch in project kylo by Teradata.
the class GetTableData method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
FlowFile flowFile = null;
if (context.hasIncomingConnection()) {
flowFile = session.get();
// we know that we should run only if we have a FlowFile.
if (flowFile == null && context.hasNonLoopConnection()) {
return;
}
}
final FlowFile incoming = flowFile;
final ComponentLog logger = getLog();
final DBCPService dbcpService = context.getProperty(JDBC_SERVICE).asControllerService(DBCPService.class);
final MetadataProviderService metadataService = context.getProperty(METADATA_SERVICE).asControllerService(MetadataProviderService.class);
final String loadStrategy = context.getProperty(LOAD_STRATEGY).getValue();
final String categoryName = context.getProperty(FEED_CATEGORY).evaluateAttributeExpressions(incoming).getValue();
final String feedName = context.getProperty(FEED_NAME).evaluateAttributeExpressions(incoming).getValue();
final String tableName = context.getProperty(TABLE_NAME).evaluateAttributeExpressions(incoming).getValue();
final String fieldSpecs = context.getProperty(TABLE_SPECS).evaluateAttributeExpressions(incoming).getValue();
final String dateField = context.getProperty(DATE_FIELD).evaluateAttributeExpressions(incoming).getValue();
final Integer queryTimeout = context.getProperty(QUERY_TIMEOUT).asTimePeriod(TimeUnit.SECONDS).intValue();
final Integer overlapTime = context.getProperty(OVERLAP_TIME).evaluateAttributeExpressions(incoming).asTimePeriod(TimeUnit.SECONDS).intValue();
final Integer backoffTime = context.getProperty(BACKOFF_PERIOD).asTimePeriod(TimeUnit.SECONDS).intValue();
final String unitSize = context.getProperty(UNIT_SIZE).getValue();
final String outputType = context.getProperty(OUTPUT_TYPE).getValue();
String outputDelimiter = context.getProperty(OUTPUT_DELIMITER).evaluateAttributeExpressions(incoming).getValue();
final String delimiter = StringUtils.isBlank(outputDelimiter) ? "," : outputDelimiter;
final PropertyValue waterMarkPropName = context.getProperty(HIGH_WATER_MARK_PROP).evaluateAttributeExpressions(incoming);
final String[] selectFields = parseFields(fieldSpecs);
final LoadStrategy strategy = LoadStrategy.valueOf(loadStrategy);
final StopWatch stopWatch = new StopWatch(true);
try (final Connection conn = dbcpService.getConnection()) {
FlowFile outgoing = (incoming == null ? session.create() : incoming);
final AtomicLong nrOfRows = new AtomicLong(0L);
final LastFieldVisitor visitor = new LastFieldVisitor(dateField, null);
final FlowFile current = outgoing;
outgoing = session.write(outgoing, new OutputStreamCallback() {
@Override
public void process(final OutputStream out) throws IOException {
ResultSet rs = null;
try {
GetTableDataSupport support = new GetTableDataSupport(conn, queryTimeout);
if (strategy == LoadStrategy.FULL_LOAD) {
rs = support.selectFullLoad(tableName, selectFields);
} else if (strategy == LoadStrategy.INCREMENTAL) {
String waterMarkValue = getIncrementalWaterMarkValue(current, waterMarkPropName);
LocalDateTime waterMarkTime = LocalDateTime.parse(waterMarkValue, DATE_TIME_FORMAT);
Date lastLoadDate = toDate(waterMarkTime);
visitor.setLastModifyDate(lastLoadDate);
rs = support.selectIncremental(tableName, selectFields, dateField, overlapTime, lastLoadDate, backoffTime, GetTableDataSupport.UnitSizes.valueOf(unitSize));
} else {
throw new RuntimeException("Unsupported loadStrategy [" + loadStrategy + "]");
}
if (GetTableDataSupport.OutputType.DELIMITED.equals(GetTableDataSupport.OutputType.valueOf(outputType))) {
nrOfRows.set(JdbcCommon.convertToDelimitedStream(rs, out, (strategy == LoadStrategy.INCREMENTAL ? visitor : null), delimiter));
} else if (GetTableDataSupport.OutputType.AVRO.equals(GetTableDataSupport.OutputType.valueOf(outputType))) {
avroSchema = JdbcCommon.createSchema(rs);
nrOfRows.set(JdbcCommon.convertToAvroStream(rs, out, (strategy == LoadStrategy.INCREMENTAL ? visitor : null), avroSchema));
} else {
throw new RuntimeException("Unsupported output format type [" + outputType + "]");
}
} catch (final SQLException e) {
throw new IOException("SQL execution failure", e);
} finally {
if (rs != null) {
try {
if (rs.getStatement() != null) {
rs.getStatement().close();
}
rs.close();
} catch (SQLException e) {
getLog().error("Error closing sql statement and resultset");
}
}
}
}
});
// set attribute how many rows were selected
outgoing = session.putAttribute(outgoing, RESULT_ROW_COUNT, Long.toString(nrOfRows.get()));
// set output format type and avro schema for feed setup, if available
outgoing = session.putAttribute(outgoing, "db.table.output.format", outputType);
String avroSchemaForFeedSetup = (avroSchema != null) ? JdbcCommon.getAvroSchemaForFeedSetup(avroSchema) : EMPTY_STRING;
outgoing = session.putAttribute(outgoing, "db.table.avro.schema", avroSchemaForFeedSetup);
session.getProvenanceReporter().modifyContent(outgoing, "Retrieved " + nrOfRows.get() + " rows", stopWatch.getElapsed(TimeUnit.MILLISECONDS));
// Terminate flow file if no work
Long rowcount = nrOfRows.get();
outgoing = session.putAttribute(outgoing, ComponentAttributes.NUM_SOURCE_RECORDS.key(), String.valueOf(rowcount));
if (nrOfRows.get() == 0L) {
logger.info("{} contains no data; transferring to 'nodata'", new Object[] { outgoing });
session.transfer(outgoing, REL_NO_DATA);
} else {
logger.info("{} contains {} records; transferring to 'success'", new Object[] { outgoing, nrOfRows.get() });
if (strategy == LoadStrategy.INCREMENTAL) {
String newWaterMarkStr = format(visitor.getLastModifyDate());
outgoing = setIncrementalWaterMarkValue(session, outgoing, waterMarkPropName, newWaterMarkStr);
logger.info("Recorded load status feed {} date {}", new Object[] { feedName, newWaterMarkStr });
}
session.transfer(outgoing, REL_SUCCESS);
}
} catch (final Exception e) {
if (incoming == null) {
logger.error("Unable to execute SQL select from table due to {}. No incoming flow file to route to failure", new Object[] { e });
} else {
logger.error("Unable to execute SQL select from table due to {}; routing to failure", new Object[] { incoming, e });
session.transfer(incoming, REL_FAILURE);
}
}
}
use of org.apache.nifi.util.StopWatch in project kylo by Teradata.
the class ExecuteHQLStatement method executeStatements.
public void executeStatements(ProcessContext context, ProcessSession session, FlowFile flowFile, String[] hiveStatements, ThriftService thriftService) {
final ComponentLog logger = getLog();
String EXCEPTION_STATUS_KEY = "HQLStmt Status ";
final StopWatch stopWatch = new StopWatch(true);
try (final Connection con = thriftService.getConnection();
final Statement st = con.createStatement()) {
boolean result = false;
EXCEPTION_STATUS_KEY = context.getName() + EXCEPTION_STATUS_KEY;
for (String statement : hiveStatements) {
// leading whitespace will cause Hive statement to fail
statement = statement.trim();
logger.debug("Executing statement: '{}'", new Object[] { statement });
result = st.execute(statement);
}
session.getProvenanceReporter().modifyContent(flowFile, "Execution result " + result, stopWatch.getElapsed(TimeUnit.MILLISECONDS));
session.transfer(flowFile, IngestProperties.REL_SUCCESS);
} catch (final Exception e) {
logger.error("Unable to execute SQL DDL {} for {} due to {}; routing to failure", new Object[] { hiveStatements, flowFile, e });
logger.error(e.getMessage());
// add the exception to the flow file
flowFile = session.putAttribute(flowFile, EXCEPTION_STATUS_KEY, "Failed With Exception: " + (e.getMessage().length() > 300 ? e.getMessage().substring(0, 300) : e.getMessage()));
session.transfer(flowFile, IngestProperties.REL_FAILURE);
}
}
Aggregations