use of com.thinkbiganalytics.nifi.v2.sqoop.enums.TargetHdfsDirExistsStrategy in project kylo by Teradata.
the class ImportSqoop method onTrigger.
@Override
public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException {
final ComponentLog logger = getLog();
FlowFile flowFile = session.get();
if (flowFile == null) {
flowFile = session.create();
logger.info("Created a flow file having uuid: {}", new Object[] { flowFile.getAttribute(CoreAttributes.UUID.key()) });
} else {
logger.info("Using an existing flow file having uuid: {}", new Object[] { flowFile.getAttribute(CoreAttributes.UUID.key()) });
}
final String kerberosPrincipal = context.getProperty(KERBEROS_PRINCIPAL).getValue();
final String kerberosKeyTab = context.getProperty(KERBEROS_KEYTAB).getValue();
final SqoopConnectionService sqoopConnectionService = context.getProperty(SQOOP_CONNECTION_SERVICE).asControllerService(SqoopConnectionService.class);
final String sourceTableName = context.getProperty(SOURCE_TABLE_NAME).evaluateAttributeExpressions(flowFile).getValue();
final String sourceTableFields = context.getProperty(SOURCE_TABLE_FIELDS).evaluateAttributeExpressions(flowFile).getValue();
final String sourceTableWhereClause = context.getProperty(SOURCE_TABLE_WHERE_CLAUSE).evaluateAttributeExpressions(flowFile).getValue();
final SqoopLoadStrategy sourceLoadStrategy = SqoopLoadStrategy.valueOf(context.getProperty(SOURCE_LOAD_STRATEGY).getValue());
final String sourceCheckColumnName = context.getProperty(SOURCE_CHECK_COLUMN_NAME).evaluateAttributeExpressions(flowFile).getValue();
final String sourcePropertyWatermark = context.getProperty(SOURCE_PROPERTY_WATERMARK).evaluateAttributeExpressions(flowFile).getValue();
final String sourceCheckColumnLastValue = context.getProperty(SOURCE_CHECK_COLUMN_LAST_VALUE).evaluateAttributeExpressions(flowFile).getValue();
final String sourceSplitByField = context.getProperty(SOURCE_SPLIT_BY_FIELD).evaluateAttributeExpressions(flowFile).getValue();
final String sourceBoundaryQuery = context.getProperty(SOURCE_BOUNDARY_QUERY).evaluateAttributeExpressions(flowFile).getValue();
final Integer clusterMapTasks = context.getProperty(CLUSTER_MAP_TASKS).evaluateAttributeExpressions(flowFile).asInteger();
final String clusterUIJobName = context.getProperty(CLUSTER_UI_JOB_NAME).evaluateAttributeExpressions(flowFile).getValue();
final String targetHdfsDirectory = context.getProperty(TARGET_HDFS_DIRECTORY).evaluateAttributeExpressions(flowFile).getValue();
final TargetHdfsDirExistsStrategy targetHdfsDirExistsStrategy = TargetHdfsDirExistsStrategy.valueOf(context.getProperty(TARGET_HDFS_DIRECTORY_EXISTS_STRATEGY).getValue());
final ExtractDataFormat targetExtractDataFormat = ExtractDataFormat.valueOf(context.getProperty(TARGET_EXTRACT_DATA_FORMAT).getValue());
final String targetHdfsFileFieldDelimiter = context.getProperty(TARGET_HDFS_FILE_FIELD_DELIMITER).evaluateAttributeExpressions(flowFile).getValue();
final String targetHdfsFileRecordDelimiter = context.getProperty(TARGET_HDFS_FILE_RECORD_DELIMITER).evaluateAttributeExpressions(flowFile).getValue();
final HiveDelimStrategy targetHiveDelimStrategy = HiveDelimStrategy.valueOf(context.getProperty(TARGET_HIVE_DELIM_STRATEGY).getValue());
final String targetHiveReplaceDelim = context.getProperty(TARGET_HIVE_REPLACE_DELIM).evaluateAttributeExpressions(flowFile).getValue();
final CompressionAlgorithm targetCompressionAlgorithm = CompressionAlgorithm.valueOf(context.getProperty(TARGET_COMPRESSION_ALGORITHM).getValue());
final String targetColumnTypeMapping = context.getProperty(TARGET_COLUMN_TYPE_MAPPING).evaluateAttributeExpressions(flowFile).getValue();
final String sqoopCodeGenDirectory = context.getProperty(SQOOP_CODEGEN_DIR).evaluateAttributeExpressions(flowFile).getValue();
final String sourceSpecificSqlServerSchema = context.getProperty(SOURCESPECIFIC_SQLSERVER_SCHEMA).evaluateAttributeExpressions(flowFile).getValue();
final String systemProperties = context.getProperty(SQOOP_SYSTEM_PROPERTIES).evaluateAttributeExpressions(flowFile).getValue();
final String additionalArguments = context.getProperty(SQOOP_ADDITIONAL_ARGUMENTS).evaluateAttributeExpressions(flowFile).getValue();
final String COMMAND_SHELL = "/bin/bash";
final String COMMAND_SHELL_FLAGS = "-c";
final StopWatch stopWatch = new StopWatch(false);
KerberosConfig kerberosConfig = new KerberosConfig().setLogger(logger).setKerberosPrincipal(kerberosPrincipal).setKerberosKeytab(kerberosKeyTab);
SqoopBuilder sqoopBuilder = new SqoopBuilder();
String sqoopCommand = sqoopBuilder.setLogger(logger).setSourceConnectionString(sqoopConnectionService.getConnectionString()).setSourceUserName(sqoopConnectionService.getUserName()).setPasswordMode(sqoopConnectionService.getPasswordMode()).setSourcePasswordHdfsFile(sqoopConnectionService.getPasswordHdfsFile()).setSourcePasswordPassphrase(sqoopConnectionService.getPasswordPassphrase()).setSourceEnteredPassword(sqoopConnectionService.getEnteredPassword()).setSourceConnectionManager(sqoopConnectionService.getConnectionManager()).setSourceDriver(sqoopConnectionService.getDriver()).setSourceTableName(sourceTableName).setSourceTableFields(sourceTableFields).setSourceTableWhereClause(sourceTableWhereClause).setSourceLoadStrategy(sourceLoadStrategy).setSourceCheckColumnName(sourceCheckColumnName).setSourceCheckColumnLastValue(sourceCheckColumnLastValue).setSourceSplitByField(sourceSplitByField).setSourceBoundaryQuery(sourceBoundaryQuery).setClusterMapTasks(clusterMapTasks).setClusterUIJobName(clusterUIJobName).setTargetHdfsDirectory(targetHdfsDirectory).setTargetHdfsDirExistsStrategy(targetHdfsDirExistsStrategy).setTargetExtractDataFormat(targetExtractDataFormat).setTargetHdfsFileFieldDelimiter(targetHdfsFileFieldDelimiter).setTargetHdfsFileRecordDelimiter(targetHdfsFileRecordDelimiter).setTargetHiveDelimStrategy(targetHiveDelimStrategy).setTargetHiveReplaceDelim(targetHiveReplaceDelim).setTargetCompressionAlgorithm(targetCompressionAlgorithm).setTargetColumnTypeMapping(targetColumnTypeMapping).setSqoopCodeGenDirectory(sqoopCodeGenDirectory).setSourceSpecificSqlServerSchema(sourceSpecificSqlServerSchema).setSystemProperties(systemProperties).setAdditionalArguments(additionalArguments).build();
List<String> sqoopExecutionCommand = new ArrayList<>();
sqoopExecutionCommand.add(COMMAND_SHELL);
sqoopExecutionCommand.add(COMMAND_SHELL_FLAGS);
sqoopExecutionCommand.add(sqoopCommand);
SqoopProcessRunner sqoopProcessRunner = new SqoopProcessRunner(kerberosConfig, sqoopExecutionCommand, logger, sourceLoadStrategy);
logger.info("Starting execution of Sqoop command");
stopWatch.start();
SqoopProcessResult sqoopProcessResult = sqoopProcessRunner.execute();
long jobDurationSeconds = stopWatch.getElapsed(TimeUnit.SECONDS);
stopWatch.stop();
logger.info("Finished execution of Sqoop command");
int resultStatus = sqoopProcessResult.getExitValue();
SqoopUtils sqoopUtils = new SqoopUtils();
long recordsCount = sqoopUtils.getSqoopRecordCount(sqoopProcessResult, logger);
String sqoopCommandWithCredentialsMasked = sqoopUtils.maskCredentials(sqoopCommand, sqoopUtils.getCredentialsToMask());
flowFile = session.putAttribute(flowFile, "sqoop.command.text", sqoopCommandWithCredentialsMasked);
flowFile = session.putAttribute(flowFile, "sqoop.result.code", String.valueOf(resultStatus));
flowFile = session.putAttribute(flowFile, "sqoop.run.seconds", String.valueOf(jobDurationSeconds));
flowFile = session.putAttribute(flowFile, "sqoop.record.count", String.valueOf(recordsCount));
flowFile = session.putAttribute(flowFile, "sqoop.output.hdfs", targetHdfsDirectory);
logger.info("Wrote result attributes to flow file");
if (resultStatus == 0) {
logger.info("Sqoop Import OK [Code {}]", new Object[] { resultStatus });
if (sourceLoadStrategy == SqoopLoadStrategy.INCREMENTAL_APPEND || sourceLoadStrategy == SqoopLoadStrategy.INCREMENTAL_LASTMODIFIED) {
if ((sourceLoadStrategy == SqoopLoadStrategy.INCREMENTAL_APPEND) && (recordsCount == 0)) {
flowFile = session.putAttribute(flowFile, sourcePropertyWatermark, sourceCheckColumnLastValue);
} else {
String newHighWaterMark = sqoopUtils.getNewHighWatermark(sqoopProcessResult);
if ((newHighWaterMark == null) || (newHighWaterMark.equals("NO_UPDATE")) || (newHighWaterMark.equals(""))) {
flowFile = session.putAttribute(flowFile, sourcePropertyWatermark, sourceCheckColumnLastValue);
} else {
flowFile = session.putAttribute(flowFile, sourcePropertyWatermark, newHighWaterMark);
}
}
}
session.transfer(flowFile, REL_SUCCESS);
} else {
logger.error("Sqoop Import FAIL [Code {}]", new Object[] { resultStatus });
session.transfer(flowFile, REL_FAILURE);
}
}
Aggregations