Search in sources :

Example 1 with TdchBuilder

use of com.thinkbiganalytics.kylo.nifi.teradata.tdch.core.processor.export.utils.TdchBuilder in project kylo by Teradata.

the class TdchExportHiveToTeradata method onTrigger.

@Override
public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException {
    final ComponentLog logger = getLog();
    FlowFile flowFile = session.get();
    if (flowFile == null) {
        flowFile = session.create();
        logger.info("[Hive to Teradata Export via TDCH] Created a flow file having uuid: {}", new Object[] { flowFile.getAttribute(CoreAttributes.UUID.key()) });
    } else {
        logger.info("[Hive to Teradata Export via TDCH] Using an existing flow file having uuid: {}", new Object[] { flowFile.getAttribute(CoreAttributes.UUID.key()) });
    }
    final String kerberosPrincipal = context.getProperty(KERBEROS_PRINCIPAL).getValue();
    final String kerberosKeyTab = context.getProperty(KERBEROS_KEYTAB).getValue();
    final TdchConnectionService tdchConnectionService = context.getProperty(TDCH_CONNECTION_SERVICE).asControllerService(TdchConnectionService.class);
    final String commonExportToolMethod = StringUtils.isEmpty(context.getProperty(EXPORT_TOOL_METHOD).evaluateAttributeExpressions(flowFile).getValue()) ? DEFAULT_EXPORT_TOOL_METHOD : context.getProperty(EXPORT_TOOL_METHOD).evaluateAttributeExpressions(flowFile).getValue();
    final Integer commonNumberOfMappers = StringUtils.isEmpty(context.getProperty(NUMBER_OF_MAPPERS).evaluateAttributeExpressions(flowFile).getValue()) ? Integer.valueOf(DEFAULT_NUMBER_OF_MAPPERS) : context.getProperty(NUMBER_OF_MAPPERS).evaluateAttributeExpressions(flowFile).asInteger();
    final Boolean commonThrottleMappersFlag = StringUtils.isEmpty(context.getProperty(THROTTLE_MAPPERS_FLAG).evaluateAttributeExpressions(flowFile).getValue()) ? Boolean.valueOf(DEFAULT_THROTTLE_MAPPERS_FLAG) : context.getProperty(THROTTLE_MAPPERS_FLAG).evaluateAttributeExpressions(flowFile).asBoolean();
    final Integer commonMinimumMappers = StringUtils.isEmpty(context.getProperty(MINIMUM_MAPPERS).evaluateAttributeExpressions(flowFile).getValue()) ? null : context.getProperty(MINIMUM_MAPPERS).evaluateAttributeExpressions(flowFile).asInteger();
    final String commonSourceDateFormat = context.getProperty(SOURCE_DATE_FORMAT).evaluateAttributeExpressions(flowFile).getValue();
    final String commonSourceTimeFormat = context.getProperty(SOURCE_TIME_FORMAT).evaluateAttributeExpressions(flowFile).getValue();
    final String commonSourceTimestampFormat = context.getProperty(SOURCE_TIMESTAMP_FORMAT).evaluateAttributeExpressions(flowFile).getValue();
    final String commonSourceTimezoneId = context.getProperty(SOURCE_TIMEZONE_ID).evaluateAttributeExpressions(flowFile).getValue();
    final String commonTargetDateFormat = context.getProperty(TARGET_DATE_FORMAT).evaluateAttributeExpressions(flowFile).getValue();
    final String commonTargetTimeFormat = context.getProperty(TARGET_TIME_FORMAT).evaluateAttributeExpressions(flowFile).getValue();
    final String commonTargetTimestampFormat = context.getProperty(TARGET_TIMESTAMP_FORMAT).evaluateAttributeExpressions(flowFile).getValue();
    final String commonTargetTimezoneId = context.getProperty(TARGET_TIMEZONE_ID).evaluateAttributeExpressions(flowFile).getValue();
    final Boolean commonStringTruncateFlag = StringUtils.isEmpty(context.getProperty(STRING_TRUNCATE_FLAG).evaluateAttributeExpressions(flowFile).getValue()) ? Boolean.valueOf(DEFAULT_STRING_TRUNCATE_FLAG) : context.getProperty(STRING_TRUNCATE_FLAG).evaluateAttributeExpressions(flowFile).asBoolean();
    final String commonCharset = context.getProperty(CHARSET).evaluateAttributeExpressions(flowFile).getValue();
    final String commonExportToolJobType = StringUtils.isEmpty(context.getProperty(EXPORT_TOOL_JOB_TYPE).evaluateAttributeExpressions(flowFile).getValue()) ? DEFAULT_EXPORT_TOOL_JOB_TYPE : context.getProperty(EXPORT_TOOL_JOB_TYPE).evaluateAttributeExpressions(flowFile).getValue();
    final String commonExportToolFileFormat = StringUtils.isEmpty(context.getProperty(EXPORT_TOOL_FILEFORMAT).evaluateAttributeExpressions(flowFile).getValue()) ? DEFAULT_EXPORT_TOOL_FILEFORMAT : context.getProperty(EXPORT_TOOL_FILEFORMAT).evaluateAttributeExpressions(flowFile).getValue();
    final String hiveConfigurationFileHdfsPath = context.getProperty(HIVE_CONFIGURATION_FILE_HDFS_PATH).evaluateAttributeExpressions(flowFile).getValue();
    final String hiveDatabase = context.getProperty(HIVE_DATABASE).evaluateAttributeExpressions(flowFile).getValue();
    final String hiveTable = context.getProperty(HIVE_TABLE).evaluateAttributeExpressions(flowFile).getValue();
    final String hiveFieldNames = context.getProperty(HIVE_FIELD_NAMES).evaluateAttributeExpressions(flowFile).getValue();
    final String hiveFieldSeparator = context.getProperty(HIVE_FIELD_SEPARATOR).evaluateAttributeExpressions(flowFile).getValue();
    final String hiveLineSeparator = context.getProperty(HIVE_LINE_SEPARATOR).evaluateAttributeExpressions(flowFile).getValue();
    final String teradataDatabaseTable = context.getProperty(TERADATA_DATABASE_TABLE).evaluateAttributeExpressions(flowFile).getValue();
    final String teradataFieldNames = context.getProperty(TERADATA_FIELD_NAMES).evaluateAttributeExpressions(flowFile).getValue();
    final Boolean teradataTruncateTable = StringUtils.isEmpty(context.getProperty(TERADATA_TRUNCATE_TABLE).evaluateAttributeExpressions(flowFile).getValue()) ? Boolean.valueOf(DEFAULT_TERADATA_TRUNCATE_TABLE) : context.getProperty(TERADATA_TRUNCATE_TABLE).evaluateAttributeExpressions(flowFile).asBoolean();
    final Boolean teradataUseXviews = StringUtils.isEmpty(context.getProperty(TERADATA_USE_XVIEWS).evaluateAttributeExpressions(flowFile).getValue()) ? Boolean.valueOf(DEFAULT_TERADATA_USE_XVIEWS) : context.getProperty(TERADATA_USE_XVIEWS).evaluateAttributeExpressions(flowFile).asBoolean();
    final String teradataQueryBand = context.getProperty(TERADATA_QUERY_BAND).evaluateAttributeExpressions(flowFile).getValue();
    final Integer teradataBatchSize = StringUtils.isEmpty(context.getProperty(TERADATA_BATCH_SIZE).evaluateAttributeExpressions(flowFile).getValue()) ? Integer.valueOf(DEFAULT_TERADATA_BATCH_SIZE) : context.getProperty(TERADATA_BATCH_SIZE).evaluateAttributeExpressions(flowFile).asInteger();
    final String teradataStagingDatabase = context.getProperty(TERADATA_STAGING_DATABASE).evaluateAttributeExpressions(flowFile).getValue();
    final String teradataStagingTable = context.getProperty(TERADATA_STAGING_TABLE).evaluateAttributeExpressions(flowFile).getValue();
    final Boolean teradataForceStage = StringUtils.isEmpty(context.getProperty(TERADATA_FORCE_STAGE).evaluateAttributeExpressions(flowFile).getValue()) ? Boolean.valueOf(DEFAULT_TERADATA_FORCE_STAGE) : context.getProperty(TERADATA_FORCE_STAGE).evaluateAttributeExpressions(flowFile).asBoolean();
    final Boolean teradataKeepStagingTable = StringUtils.isEmpty(context.getProperty(TERADATA_KEEP_STAGE_TABLE).evaluateAttributeExpressions(flowFile).getValue()) ? Boolean.valueOf(DEFAULT_TERADATA_KEEP_STAGE) : context.getProperty(TERADATA_KEEP_STAGE_TABLE).evaluateAttributeExpressions(flowFile).asBoolean();
    final String teradataFastLoadErrorDatabase = context.getProperty(TERADATA_FAST_LOAD_ERROR_DATABASE).evaluateAttributeExpressions(flowFile).getValue();
    final String teradataFastLoadErrorTable = context.getProperty(TERADATA_FAST_LOAD_ERROR_TABLE).evaluateAttributeExpressions(flowFile).getValue();
    final StopWatch stopWatch = new StopWatch(false);
    KerberosConfig kerberosConfig = new KerberosConfig().setLogger(logger).setKerberosPrincipal(kerberosPrincipal).setKerberosKeytab(kerberosKeyTab);
    TdchBuilder tdchBuilder = new TdchBuilder();
    String tdchCommand = tdchBuilder.setLogger(logger).setTdchJarEnvironmentVariable(TDCH_JAR_PATH_ENV_VAR_NAME).setTdchLibraryJarsVariable(TDCH_LIB_JARS_ENV_VAR_NAME).setTdchHadoopClassPathVariable(TDCH_HADOOP_CLASSPATH_ENV_VAR_NAME).setTdchOperationType(TdchOperationType.TDCH_EXPORT).setCommonTeradataUrl(tdchConnectionService.getJdbcConnectionUrl(), teradataDatabaseTable, commonCharset).setCommonTeradataClassname(tdchConnectionService.getJdbcDriverClassName()).setCommonTeradataUsername(tdchConnectionService.getUserName()).setCommonTeradataPassword(tdchConnectionService.getPassword()).setCommonExportToolMethod(commonExportToolMethod).setCommonExportToolJobType(commonExportToolJobType).setCommonExportToolFileFormat(commonExportToolFileFormat).setCommonNumMappers(commonNumberOfMappers).setCommonThrottleMappers(commonThrottleMappersFlag).setCommonMinMappers(commonMinimumMappers).setCommonSourceDateFormat(commonSourceDateFormat).setCommonSourceTimeFormat(commonSourceTimeFormat).setCommonSourceTimestampFormat(commonSourceTimestampFormat).setCommonSourceTimezoneId(commonSourceTimezoneId).setCommonTargetDateFormat(commonTargetDateFormat).setCommonTargetTimeFormat(commonTargetTimeFormat).setCommonTargetTimestampFormat(commonTargetTimestampFormat).setCommonTargetTimezoneId(commonTargetTimezoneId).setCommonStringTruncate(commonStringTruncateFlag).setCommonCharset(commonCharset).setSourceHiveConfigurationFileHdfsPath(hiveConfigurationFileHdfsPath).setSourceHiveSourceDatabase(hiveDatabase).setSourceHiveSourceTable(hiveTable).setSourceHiveSourceFieldNames(hiveFieldNames).setSourceHiveFieldSeparator(hiveFieldSeparator).setSourceHiveLineSeparator(hiveLineSeparator).setTargetTeradataDatabaseTable(teradataDatabaseTable).setTargetTeradataTargetFieldNames(teradataFieldNames).setTargetTeradataTruncateTable(teradataTruncateTable).setTargetTeradataUseXviews(teradataUseXviews).setTargetTeradataQueryBand(teradataQueryBand).setTargetTeradataBatchSize(teradataBatchSize).setTargetTeradataStagingDatabase(teradataStagingDatabase).setTargetTeradataStagingTableName(teradataStagingTable).setTargetTeradataForceStage(teradataForceStage).setTargetTeradataKeepStageTable(teradataKeepStagingTable).setTargetTeradataFastLoadErrorTableDatabase(teradataFastLoadErrorDatabase).setTargetTeradataFastLoadErrorTableName(teradataFastLoadErrorTable).build();
    List<String> tdchExecutionCommand = new ArrayList<>();
    tdchExecutionCommand.add(COMMAND_SHELL);
    tdchExecutionCommand.add(COMMAND_SHELL_FLAGS);
    tdchExecutionCommand.add(tdchCommand);
    Map<String, String> tdchEnvironmentVariables = new HashMap<>();
    tdchEnvironmentVariables.put(TDCH_JAR_PATH_ENV_VAR_NAME, tdchConnectionService.getTdchJarPath());
    tdchEnvironmentVariables.put(TDCH_LIB_JARS_ENV_VAR_NAME, tdchConnectionService.getTdchLibraryJarsPath());
    tdchEnvironmentVariables.put(TDCH_HADOOP_CLASSPATH_ENV_VAR_NAME, tdchConnectionService.getTdchHadoopClassPath());
    TdchProcessRunner tdchProcessRunner = new TdchProcessRunner(kerberosConfig, tdchExecutionCommand, logger, TdchOperationType.TDCH_EXPORT, tdchEnvironmentVariables);
    logger.info("Starting execution of TDCH command (Hive to Teradata export)");
    stopWatch.start();
    TdchProcessResult tdchProcessResult = tdchProcessRunner.execute();
    stopWatch.stop();
    logger.info("Finished execution of TDCH command (Hive to Teradata export)");
    int resultStatus = tdchProcessResult.getExitValue();
    TdchUtils tdchUtils = new TdchUtils();
    String tdchCommandWithCredentialsMasked = tdchUtils.maskTdchCredentials(tdchCommand);
    flowFile = session.putAttribute(flowFile, "tdch.export.hive.to.teradata.command", tdchCommandWithCredentialsMasked);
    flowFile = session.putAttribute(flowFile, "tdch.export.hive.to.teradata.kylo.result.code", String.valueOf(resultStatus));
    flowFile = session.putAttribute(flowFile, "tdch.export.hive.to.teradata.input.record.count", String.valueOf(tdchUtils.getExportHiveToTeradataInputRecordsCount(tdchProcessResult, logger)));
    flowFile = session.putAttribute(flowFile, "tdch.export.hive.to.teradata.output.record.count", String.valueOf(tdchUtils.getExportHiveToTeradataOutputRecordsCount(tdchProcessResult, logger)));
    flowFile = session.putAttribute(flowFile, "tdch.export.hive.to.teradata.tdch.exit.code", String.valueOf(tdchUtils.getExportHiveToTeradataJobExitCode(tdchProcessResult, logger)));
    flowFile = session.putAttribute(flowFile, "tdch.export.hive.to.teradata.tdch.time.taken", tdchUtils.getExportHiveToTeradataJobTimeTaken(tdchProcessResult, logger));
    logger.info("Wrote result attributes to flow file");
    if (resultStatus == 0) {
        logger.info("TDCH Hive to Teradata export OK [Code {}]", new Object[] { resultStatus });
        session.transfer(flowFile, REL_SUCCESS);
    } else {
        logger.error("TDCH Hive to Teradata export FAIL [Code {}]", new Object[] { resultStatus });
        session.transfer(flowFile, REL_FAILURE);
    }
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) TdchProcessRunner(com.thinkbiganalytics.kylo.nifi.teradata.tdch.core.processor.base.TdchProcessRunner) TdchConnectionService(com.thinkbiganalytics.kylo.nifi.teradata.tdch.api.TdchConnectionService) HashMap(java.util.HashMap) KerberosConfig(com.thinkbiganalytics.kylo.nifi.teradata.tdch.core.common.KerberosConfig) ArrayList(java.util.ArrayList) ComponentLog(org.apache.nifi.logging.ComponentLog) TdchBuilder(com.thinkbiganalytics.kylo.nifi.teradata.tdch.core.processor.export.utils.TdchBuilder) StopWatch(org.apache.nifi.util.StopWatch) TdchUtils(com.thinkbiganalytics.kylo.nifi.teradata.tdch.core.processor.export.utils.TdchUtils) TdchProcessResult(com.thinkbiganalytics.kylo.nifi.teradata.tdch.core.processor.base.TdchProcessResult)

Example 2 with TdchBuilder

use of com.thinkbiganalytics.kylo.nifi.teradata.tdch.core.processor.export.utils.TdchBuilder in project kylo by Teradata.

the class TdchBuilderTest method getBaseTdchBuilder.

private static TdchBuilder getBaseTdchBuilder() {
    TdchBuilder tdchBuilder = new TdchBuilder();
    TestRunner runner = TestRunners.newTestRunner(TestAbstractTdchProcessor.class);
    MockComponentLog componentLog = runner.getLogger();
    return tdchBuilder.setLogger(componentLog).setTdchJarEnvironmentVariable("USERLIBTDCH").setTdchLibraryJarsVariable("LIBJARS").setTdchHadoopClassPathVariable("HADOOP_CLASSPATH").setTdchOperationType(TdchOperationType.TDCH_EXPORT).setCommonExportToolJobType("hive");
}
Also used : TestRunner(org.apache.nifi.util.TestRunner) MockComponentLog(org.apache.nifi.util.MockComponentLog) TdchBuilder(com.thinkbiganalytics.kylo.nifi.teradata.tdch.core.processor.export.utils.TdchBuilder)

Example 3 with TdchBuilder

use of com.thinkbiganalytics.kylo.nifi.teradata.tdch.core.processor.export.utils.TdchBuilder in project kylo by Teradata.

the class TdchBuilderTest method testSetting_Tdch_Import_OperationType.

@Test
public void testSetting_Tdch_Import_OperationType() {
    TdchBuilder tdchBuilder = new TdchBuilder();
    TestRunner runner = TestRunners.newTestRunner(TestAbstractTdchProcessor.class);
    MockComponentLog componentLog = runner.getLogger();
    tdchBuilder.setLogger(componentLog).setTdchJarEnvironmentVariable("USERLIBTDCH").setTdchLibraryJarsVariable("LIBJARS").setTdchHadoopClassPathVariable("HADOOP_CLASSPATH").setTdchOperationType(TdchOperationType.TDCH_IMPORT).setCommonExportToolJobType("hive");
    String command = tdchBuilder.build();
    Assert.assertEquals("", command);
    List<LogMessage> warnMessages = componentLog.getWarnMessages();
    Assert.assertEquals(1, warnMessages.size());
    Assert.assertTrue(warnMessages.get(0).getMsg().endsWith("TDCH Import not yet implemented"));
}
Also used : LogMessage(org.apache.nifi.util.LogMessage) TestRunner(org.apache.nifi.util.TestRunner) MockComponentLog(org.apache.nifi.util.MockComponentLog) TdchBuilder(com.thinkbiganalytics.kylo.nifi.teradata.tdch.core.processor.export.utils.TdchBuilder) Test(org.junit.Test)

Aggregations

TdchBuilder (com.thinkbiganalytics.kylo.nifi.teradata.tdch.core.processor.export.utils.TdchBuilder)3 MockComponentLog (org.apache.nifi.util.MockComponentLog)2 TestRunner (org.apache.nifi.util.TestRunner)2 TdchConnectionService (com.thinkbiganalytics.kylo.nifi.teradata.tdch.api.TdchConnectionService)1 KerberosConfig (com.thinkbiganalytics.kylo.nifi.teradata.tdch.core.common.KerberosConfig)1 TdchProcessResult (com.thinkbiganalytics.kylo.nifi.teradata.tdch.core.processor.base.TdchProcessResult)1 TdchProcessRunner (com.thinkbiganalytics.kylo.nifi.teradata.tdch.core.processor.base.TdchProcessRunner)1 TdchUtils (com.thinkbiganalytics.kylo.nifi.teradata.tdch.core.processor.export.utils.TdchUtils)1 ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1 FlowFile (org.apache.nifi.flowfile.FlowFile)1 ComponentLog (org.apache.nifi.logging.ComponentLog)1 LogMessage (org.apache.nifi.util.LogMessage)1 StopWatch (org.apache.nifi.util.StopWatch)1 Test (org.junit.Test)1