Search in sources :

Example 1 with HiveDelimStrategy

use of com.thinkbiganalytics.nifi.v2.sqoop.enums.HiveDelimStrategy in project kylo by Teradata.

the class ImportSqoop method onTrigger.

@Override
public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException {
    final ComponentLog logger = getLog();
    FlowFile flowFile = session.get();
    if (flowFile == null) {
        flowFile = session.create();
        logger.info("Created a flow file having uuid: {}", new Object[] { flowFile.getAttribute(CoreAttributes.UUID.key()) });
    } else {
        logger.info("Using an existing flow file having uuid: {}", new Object[] { flowFile.getAttribute(CoreAttributes.UUID.key()) });
    }
    final String kerberosPrincipal = context.getProperty(KERBEROS_PRINCIPAL).getValue();
    final String kerberosKeyTab = context.getProperty(KERBEROS_KEYTAB).getValue();
    final SqoopConnectionService sqoopConnectionService = context.getProperty(SQOOP_CONNECTION_SERVICE).asControllerService(SqoopConnectionService.class);
    final String sourceTableName = context.getProperty(SOURCE_TABLE_NAME).evaluateAttributeExpressions(flowFile).getValue();
    final String sourceTableFields = context.getProperty(SOURCE_TABLE_FIELDS).evaluateAttributeExpressions(flowFile).getValue();
    final String sourceTableWhereClause = context.getProperty(SOURCE_TABLE_WHERE_CLAUSE).evaluateAttributeExpressions(flowFile).getValue();
    final SqoopLoadStrategy sourceLoadStrategy = SqoopLoadStrategy.valueOf(context.getProperty(SOURCE_LOAD_STRATEGY).getValue());
    final String sourceCheckColumnName = context.getProperty(SOURCE_CHECK_COLUMN_NAME).evaluateAttributeExpressions(flowFile).getValue();
    final String sourcePropertyWatermark = context.getProperty(SOURCE_PROPERTY_WATERMARK).evaluateAttributeExpressions(flowFile).getValue();
    final String sourceCheckColumnLastValue = context.getProperty(SOURCE_CHECK_COLUMN_LAST_VALUE).evaluateAttributeExpressions(flowFile).getValue();
    final String sourceSplitByField = context.getProperty(SOURCE_SPLIT_BY_FIELD).evaluateAttributeExpressions(flowFile).getValue();
    final String sourceBoundaryQuery = context.getProperty(SOURCE_BOUNDARY_QUERY).evaluateAttributeExpressions(flowFile).getValue();
    final Integer clusterMapTasks = context.getProperty(CLUSTER_MAP_TASKS).evaluateAttributeExpressions(flowFile).asInteger();
    final String clusterUIJobName = context.getProperty(CLUSTER_UI_JOB_NAME).evaluateAttributeExpressions(flowFile).getValue();
    final String targetHdfsDirectory = context.getProperty(TARGET_HDFS_DIRECTORY).evaluateAttributeExpressions(flowFile).getValue();
    final TargetHdfsDirExistsStrategy targetHdfsDirExistsStrategy = TargetHdfsDirExistsStrategy.valueOf(context.getProperty(TARGET_HDFS_DIRECTORY_EXISTS_STRATEGY).getValue());
    final ExtractDataFormat targetExtractDataFormat = ExtractDataFormat.valueOf(context.getProperty(TARGET_EXTRACT_DATA_FORMAT).getValue());
    final String targetHdfsFileFieldDelimiter = context.getProperty(TARGET_HDFS_FILE_FIELD_DELIMITER).evaluateAttributeExpressions(flowFile).getValue();
    final String targetHdfsFileRecordDelimiter = context.getProperty(TARGET_HDFS_FILE_RECORD_DELIMITER).evaluateAttributeExpressions(flowFile).getValue();
    final HiveDelimStrategy targetHiveDelimStrategy = HiveDelimStrategy.valueOf(context.getProperty(TARGET_HIVE_DELIM_STRATEGY).getValue());
    final String targetHiveReplaceDelim = context.getProperty(TARGET_HIVE_REPLACE_DELIM).evaluateAttributeExpressions(flowFile).getValue();
    final CompressionAlgorithm targetCompressionAlgorithm = CompressionAlgorithm.valueOf(context.getProperty(TARGET_COMPRESSION_ALGORITHM).getValue());
    final String targetColumnTypeMapping = context.getProperty(TARGET_COLUMN_TYPE_MAPPING).evaluateAttributeExpressions(flowFile).getValue();
    final String sqoopCodeGenDirectory = context.getProperty(SQOOP_CODEGEN_DIR).evaluateAttributeExpressions(flowFile).getValue();
    final String sourceSpecificSqlServerSchema = context.getProperty(SOURCESPECIFIC_SQLSERVER_SCHEMA).evaluateAttributeExpressions(flowFile).getValue();
    final String systemProperties = context.getProperty(SQOOP_SYSTEM_PROPERTIES).evaluateAttributeExpressions(flowFile).getValue();
    final String additionalArguments = context.getProperty(SQOOP_ADDITIONAL_ARGUMENTS).evaluateAttributeExpressions(flowFile).getValue();
    final String COMMAND_SHELL = "/bin/bash";
    final String COMMAND_SHELL_FLAGS = "-c";
    final StopWatch stopWatch = new StopWatch(false);
    KerberosConfig kerberosConfig = new KerberosConfig().setLogger(logger).setKerberosPrincipal(kerberosPrincipal).setKerberosKeytab(kerberosKeyTab);
    SqoopBuilder sqoopBuilder = new SqoopBuilder();
    String sqoopCommand = sqoopBuilder.setLogger(logger).setSourceConnectionString(sqoopConnectionService.getConnectionString()).setSourceUserName(sqoopConnectionService.getUserName()).setPasswordMode(sqoopConnectionService.getPasswordMode()).setSourcePasswordHdfsFile(sqoopConnectionService.getPasswordHdfsFile()).setSourcePasswordPassphrase(sqoopConnectionService.getPasswordPassphrase()).setSourceEnteredPassword(sqoopConnectionService.getEnteredPassword()).setSourceConnectionManager(sqoopConnectionService.getConnectionManager()).setSourceDriver(sqoopConnectionService.getDriver()).setSourceTableName(sourceTableName).setSourceTableFields(sourceTableFields).setSourceTableWhereClause(sourceTableWhereClause).setSourceLoadStrategy(sourceLoadStrategy).setSourceCheckColumnName(sourceCheckColumnName).setSourceCheckColumnLastValue(sourceCheckColumnLastValue).setSourceSplitByField(sourceSplitByField).setSourceBoundaryQuery(sourceBoundaryQuery).setClusterMapTasks(clusterMapTasks).setClusterUIJobName(clusterUIJobName).setTargetHdfsDirectory(targetHdfsDirectory).setTargetHdfsDirExistsStrategy(targetHdfsDirExistsStrategy).setTargetExtractDataFormat(targetExtractDataFormat).setTargetHdfsFileFieldDelimiter(targetHdfsFileFieldDelimiter).setTargetHdfsFileRecordDelimiter(targetHdfsFileRecordDelimiter).setTargetHiveDelimStrategy(targetHiveDelimStrategy).setTargetHiveReplaceDelim(targetHiveReplaceDelim).setTargetCompressionAlgorithm(targetCompressionAlgorithm).setTargetColumnTypeMapping(targetColumnTypeMapping).setSqoopCodeGenDirectory(sqoopCodeGenDirectory).setSourceSpecificSqlServerSchema(sourceSpecificSqlServerSchema).setSystemProperties(systemProperties).setAdditionalArguments(additionalArguments).build();
    List<String> sqoopExecutionCommand = new ArrayList<>();
    sqoopExecutionCommand.add(COMMAND_SHELL);
    sqoopExecutionCommand.add(COMMAND_SHELL_FLAGS);
    sqoopExecutionCommand.add(sqoopCommand);
    SqoopProcessRunner sqoopProcessRunner = new SqoopProcessRunner(kerberosConfig, sqoopExecutionCommand, logger, sourceLoadStrategy);
    logger.info("Starting execution of Sqoop command");
    stopWatch.start();
    SqoopProcessResult sqoopProcessResult = sqoopProcessRunner.execute();
    long jobDurationSeconds = stopWatch.getElapsed(TimeUnit.SECONDS);
    stopWatch.stop();
    logger.info("Finished execution of Sqoop command");
    int resultStatus = sqoopProcessResult.getExitValue();
    SqoopUtils sqoopUtils = new SqoopUtils();
    long recordsCount = sqoopUtils.getSqoopRecordCount(sqoopProcessResult, logger);
    String sqoopCommandWithCredentialsMasked = sqoopUtils.maskCredentials(sqoopCommand, sqoopUtils.getCredentialsToMask());
    flowFile = session.putAttribute(flowFile, "sqoop.command.text", sqoopCommandWithCredentialsMasked);
    flowFile = session.putAttribute(flowFile, "sqoop.result.code", String.valueOf(resultStatus));
    flowFile = session.putAttribute(flowFile, "sqoop.run.seconds", String.valueOf(jobDurationSeconds));
    flowFile = session.putAttribute(flowFile, "sqoop.record.count", String.valueOf(recordsCount));
    flowFile = session.putAttribute(flowFile, "sqoop.output.hdfs", targetHdfsDirectory);
    logger.info("Wrote result attributes to flow file");
    if (resultStatus == 0) {
        logger.info("Sqoop Import OK [Code {}]", new Object[] { resultStatus });
        if (sourceLoadStrategy == SqoopLoadStrategy.INCREMENTAL_APPEND || sourceLoadStrategy == SqoopLoadStrategy.INCREMENTAL_LASTMODIFIED) {
            if ((sourceLoadStrategy == SqoopLoadStrategy.INCREMENTAL_APPEND) && (recordsCount == 0)) {
                flowFile = session.putAttribute(flowFile, sourcePropertyWatermark, sourceCheckColumnLastValue);
            } else {
                String newHighWaterMark = sqoopUtils.getNewHighWatermark(sqoopProcessResult);
                if ((newHighWaterMark == null) || (newHighWaterMark.equals("NO_UPDATE")) || (newHighWaterMark.equals(""))) {
                    flowFile = session.putAttribute(flowFile, sourcePropertyWatermark, sourceCheckColumnLastValue);
                } else {
                    flowFile = session.putAttribute(flowFile, sourcePropertyWatermark, newHighWaterMark);
                }
            }
        }
        session.transfer(flowFile, REL_SUCCESS);
    } else {
        logger.error("Sqoop Import FAIL [Code {}]", new Object[] { resultStatus });
        session.transfer(flowFile, REL_FAILURE);
    }
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) SqoopUtils(com.thinkbiganalytics.nifi.v2.sqoop.utils.SqoopUtils) SqoopBuilder(com.thinkbiganalytics.nifi.v2.sqoop.utils.SqoopBuilder) KerberosConfig(com.thinkbiganalytics.nifi.v2.sqoop.security.KerberosConfig) TargetHdfsDirExistsStrategy(com.thinkbiganalytics.nifi.v2.sqoop.enums.TargetHdfsDirExistsStrategy) ArrayList(java.util.ArrayList) ExtractDataFormat(com.thinkbiganalytics.nifi.v2.sqoop.enums.ExtractDataFormat) ComponentLog(org.apache.nifi.logging.ComponentLog) CompressionAlgorithm(com.thinkbiganalytics.nifi.v2.sqoop.enums.CompressionAlgorithm) StopWatch(org.apache.nifi.util.StopWatch) SqoopProcessResult(com.thinkbiganalytics.nifi.v2.sqoop.process.SqoopProcessResult) SqoopConnectionService(com.thinkbiganalytics.nifi.v2.sqoop.SqoopConnectionService) HiveDelimStrategy(com.thinkbiganalytics.nifi.v2.sqoop.enums.HiveDelimStrategy) SqoopProcessRunner(com.thinkbiganalytics.nifi.v2.sqoop.process.SqoopProcessRunner) SqoopLoadStrategy(com.thinkbiganalytics.nifi.v2.sqoop.enums.SqoopLoadStrategy)

Example 2 with HiveDelimStrategy

use of com.thinkbiganalytics.nifi.v2.sqoop.enums.HiveDelimStrategy in project kylo by Teradata.

the class ImportSqoop method customValidate.

/**
 * Called by the framework, this method does additional validation on properties
 *
 * @param validationContext used to retrieves the properties to check
 * @return A collection of {@link ValidationResult} which will be checked by the framework
 */
@Override
protected Collection<ValidationResult> customValidate(ValidationContext validationContext) {
    final List<ValidationResult> results = new ArrayList<>();
    final SqoopLoadStrategy sourceLoadStrategy = SqoopLoadStrategy.valueOf(validationContext.getProperty(SOURCE_LOAD_STRATEGY).getValue());
    final String sourceCheckColumnName = validationContext.getProperty(SOURCE_CHECK_COLUMN_NAME).evaluateAttributeExpressions().getValue();
    final String sourceCheckColumnLastValue = validationContext.getProperty(SOURCE_CHECK_COLUMN_LAST_VALUE).evaluateAttributeExpressions().getValue();
    final HiveDelimStrategy targetHiveDelimStrategy = HiveDelimStrategy.valueOf(validationContext.getProperty(TARGET_HIVE_DELIM_STRATEGY).getValue());
    final String targetHiveReplaceDelim = validationContext.getProperty(TARGET_HIVE_REPLACE_DELIM).evaluateAttributeExpressions().getValue();
    final SqoopConnectionService sqoopConnectionService = validationContext.getProperty(SQOOP_CONNECTION_SERVICE).asControllerService(SqoopConnectionService.class);
    final PasswordMode passwordMode = sqoopConnectionService.getPasswordMode();
    final ExtractDataFormat targetExtractDataFormat = ExtractDataFormat.valueOf(validationContext.getProperty(TARGET_EXTRACT_DATA_FORMAT).getValue());
    final CompressionAlgorithm targetCompressionAlgorithm = CompressionAlgorithm.valueOf(validationContext.getProperty(TARGET_COMPRESSION_ALGORITHM).getValue());
    final String targetColumnTypeMapping = validationContext.getProperty(TARGET_COLUMN_TYPE_MAPPING).evaluateAttributeExpressions().getValue();
    SqoopUtils sqoopUtils = new SqoopUtils();
    if (sourceLoadStrategy == SqoopLoadStrategy.INCREMENTAL_LASTMODIFIED || sourceLoadStrategy == SqoopLoadStrategy.INCREMENTAL_APPEND) {
        if ((sourceCheckColumnName == null) || (sourceCheckColumnLastValue == null)) {
            results.add(new ValidationResult.Builder().subject(this.getClass().getSimpleName()).valid(false).explanation("Both 'Check Column Name' and 'Check Column Last Value' are required for incremental load.").build());
        }
    }
    if (targetHiveDelimStrategy == HiveDelimStrategy.REPLACE) {
        if (targetHiveReplaceDelim == null) {
            results.add(new ValidationResult.Builder().subject(this.getClass().getSimpleName()).valid(false).explanation("Replacement delimiter must be specified for Hive Delimiter REPLACE Strategy.").build());
        }
    }
    if (passwordMode == PasswordMode.ENCRYPTED_ON_HDFS_FILE) {
        if (sqoopConnectionService.getPasswordHdfsFile() == null || sqoopConnectionService.getPasswordPassphrase() == null) {
            results.add(new ValidationResult.Builder().subject(this.getClass().getSimpleName()).valid(false).explanation("For encrypted password on HDFS, both encrypted HDFS file location and passphrase are required.").build());
        }
    } else if (passwordMode == PasswordMode.ENCRYPTED_TEXT_ENTRY) {
        if (sqoopConnectionService.getEnteredPassword() == null || sqoopConnectionService.getPasswordPassphrase() == null) {
            results.add(new ValidationResult.Builder().subject(this.getClass().getSimpleName()).valid(false).explanation("For encrypted password entry mode, both the encrypted password and passphrase are required.").build());
        }
    } else if (passwordMode == PasswordMode.CLEAR_TEXT_ENTRY) {
        if (sqoopConnectionService.getEnteredPassword() == null) {
            results.add(new ValidationResult.Builder().subject(this.getClass().getSimpleName()).valid(false).explanation("For clear text password entry mode, the password must be provided.").build());
        }
    }
    if ((targetExtractDataFormat == ExtractDataFormat.PARQUET) && (targetCompressionAlgorithm != CompressionAlgorithm.NONE)) {
        results.add(new ValidationResult.Builder().subject(this.getClass().getSimpleName()).valid(false).explanation("For PARQUET data format, compression is not supported.").build());
    }
    if ((sqoopUtils.isTeradataDatabase(sqoopConnectionService.getConnectionString()) && (sourceLoadStrategy == SqoopLoadStrategy.INCREMENTAL_LASTMODIFIED))) {
        results.add(new ValidationResult.Builder().subject(this.getClass().getSimpleName()).valid(false).explanation("For Teradata source system, INCREMENTAL_LASTMODIFIED mode of load is not supported. This is due to a known issue with Sqoop (SQOOP-2402).").build());
    }
    if ((targetColumnTypeMapping != null) && (!targetColumnTypeMapping.isEmpty())) {
        if (!sqoopUtils.checkMappingInput(targetColumnTypeMapping)) {
            results.add(new ValidationResult.Builder().subject(this.getClass().getSimpleName()).valid(false).explanation("For Target Column Type Mapping, ensure that mappings are provided as COLUMN=Type pairs separated by comma. " + "Ensure no spaces in entry. " + "Example: PO_ID=Integer,PO_DETAILS=String").build());
        }
    }
    return results;
}
Also used : SqoopUtils(com.thinkbiganalytics.nifi.v2.sqoop.utils.SqoopUtils) SqoopBuilder(com.thinkbiganalytics.nifi.v2.sqoop.utils.SqoopBuilder) ArrayList(java.util.ArrayList) ExtractDataFormat(com.thinkbiganalytics.nifi.v2.sqoop.enums.ExtractDataFormat) PasswordMode(com.thinkbiganalytics.nifi.v2.sqoop.PasswordMode) ValidationResult(org.apache.nifi.components.ValidationResult) CompressionAlgorithm(com.thinkbiganalytics.nifi.v2.sqoop.enums.CompressionAlgorithm) SqoopConnectionService(com.thinkbiganalytics.nifi.v2.sqoop.SqoopConnectionService) HiveDelimStrategy(com.thinkbiganalytics.nifi.v2.sqoop.enums.HiveDelimStrategy) SqoopLoadStrategy(com.thinkbiganalytics.nifi.v2.sqoop.enums.SqoopLoadStrategy)

Aggregations

SqoopConnectionService (com.thinkbiganalytics.nifi.v2.sqoop.SqoopConnectionService)2 CompressionAlgorithm (com.thinkbiganalytics.nifi.v2.sqoop.enums.CompressionAlgorithm)2 ExtractDataFormat (com.thinkbiganalytics.nifi.v2.sqoop.enums.ExtractDataFormat)2 HiveDelimStrategy (com.thinkbiganalytics.nifi.v2.sqoop.enums.HiveDelimStrategy)2 SqoopLoadStrategy (com.thinkbiganalytics.nifi.v2.sqoop.enums.SqoopLoadStrategy)2 SqoopBuilder (com.thinkbiganalytics.nifi.v2.sqoop.utils.SqoopBuilder)2 SqoopUtils (com.thinkbiganalytics.nifi.v2.sqoop.utils.SqoopUtils)2 ArrayList (java.util.ArrayList)2 PasswordMode (com.thinkbiganalytics.nifi.v2.sqoop.PasswordMode)1 TargetHdfsDirExistsStrategy (com.thinkbiganalytics.nifi.v2.sqoop.enums.TargetHdfsDirExistsStrategy)1 SqoopProcessResult (com.thinkbiganalytics.nifi.v2.sqoop.process.SqoopProcessResult)1 SqoopProcessRunner (com.thinkbiganalytics.nifi.v2.sqoop.process.SqoopProcessRunner)1 KerberosConfig (com.thinkbiganalytics.nifi.v2.sqoop.security.KerberosConfig)1 ValidationResult (org.apache.nifi.components.ValidationResult)1 FlowFile (org.apache.nifi.flowfile.FlowFile)1 ComponentLog (org.apache.nifi.logging.ComponentLog)1 StopWatch (org.apache.nifi.util.StopWatch)1