use of com.thinkbiganalytics.kylo.nifi.teradata.tdch.api.TdchConnectionService in project kylo by Teradata.
the class TdchExportHiveToTeradataTest method testTeradataUseQueryBand.
@Test
public void testTeradataUseQueryBand() throws InitializationException {
final TestRunner runner = TestRunners.newTestRunner(TdchExportHiveToTeradata.class);
TdchConnectionService tdchConnectionService = new DummyTdchConnectionService();
runner.addControllerService(CONNECTION_SERVICE_ID, tdchConnectionService);
runner.assertValid(tdchConnectionService);
runner.enableControllerService(tdchConnectionService);
runner.setProperty(TdchExportHiveToTeradata.TDCH_CONNECTION_SERVICE, CONNECTION_SERVICE_ID);
runner.setProperty(TdchExportHiveToTeradata.HIVE_DATABASE, "hive_db");
runner.setProperty(TdchExportHiveToTeradata.HIVE_TABLE, "hive_table");
runner.setProperty(TdchExportHiveToTeradata.TERADATA_DATABASE_TABLE, "teradata_db.teradata_table");
runner.assertValid();
Assert.assertFalse(Boolean.valueOf(runner.getProcessor().getPropertyDescriptor(TdchExportHiveToTeradata.TERADATA_QUERY_BAND_NAME).getDefaultValue()));
Assert.assertFalse(runner.getProcessor().getPropertyDescriptor(TdchExportHiveToTeradata.TERADATA_QUERY_BAND_NAME).isRequired());
Assert.assertTrue(runner.getProcessor().getPropertyDescriptor(TdchExportHiveToTeradata.TERADATA_QUERY_BAND_NAME).isExpressionLanguageSupported());
ValidationResult result = runner.setProperty(TdchExportHiveToTeradata.TERADATA_QUERY_BAND, "key;");
Assert.assertFalse(result.isValid());
result = runner.setProperty(TdchExportHiveToTeradata.TERADATA_QUERY_BAND, "=key;");
Assert.assertFalse(result.isValid());
result = runner.setProperty(TdchExportHiveToTeradata.TERADATA_QUERY_BAND, "key=;");
Assert.assertFalse(result.isValid());
result = runner.setProperty(TdchExportHiveToTeradata.TERADATA_QUERY_BAND, "key=value=;");
Assert.assertFalse(result.isValid());
result = runner.setProperty(TdchExportHiveToTeradata.TERADATA_QUERY_BAND, ";");
Assert.assertFalse(result.isValid());
result = runner.setProperty(TdchExportHiveToTeradata.TERADATA_QUERY_BAND, "key=value;");
Assert.assertTrue(result.isValid());
result = runner.setProperty(TdchExportHiveToTeradata.TERADATA_QUERY_BAND, "");
Assert.assertFalse(result.isValid());
runner.removeProperty(TdchExportHiveToTeradata.TERADATA_QUERY_BAND);
runner.assertValid();
}
use of com.thinkbiganalytics.kylo.nifi.teradata.tdch.api.TdchConnectionService in project kylo by Teradata.
the class TdchExportHiveToTeradataTest method testTeradataBatchSize.
@Test
public void testTeradataBatchSize() throws InitializationException {
final TestRunner runner = TestRunners.newTestRunner(TdchExportHiveToTeradata.class);
TdchConnectionService tdchConnectionService = new DummyTdchConnectionService();
runner.addControllerService(CONNECTION_SERVICE_ID, tdchConnectionService);
runner.assertValid(tdchConnectionService);
runner.enableControllerService(tdchConnectionService);
runner.setProperty(TdchExportHiveToTeradata.TDCH_CONNECTION_SERVICE, CONNECTION_SERVICE_ID);
runner.setProperty(TdchExportHiveToTeradata.HIVE_DATABASE, "hive_db");
runner.setProperty(TdchExportHiveToTeradata.HIVE_TABLE, "hive_table");
runner.setProperty(TdchExportHiveToTeradata.TERADATA_DATABASE_TABLE, "teradata_db.teradata_table");
runner.assertValid();
Assert.assertEquals("10000", runner.getProcessor().getPropertyDescriptor(TdchExportHiveToTeradata.TERADATA_BATCH_SIZE_NAME).getDefaultValue());
Assert.assertFalse(runner.getProcessor().getPropertyDescriptor(TdchExportHiveToTeradata.TERADATA_BATCH_SIZE_NAME).isRequired());
Assert.assertTrue(runner.getProcessor().getPropertyDescriptor(TdchExportHiveToTeradata.TERADATA_BATCH_SIZE_NAME).isExpressionLanguageSupported());
ValidationResult result = runner.setProperty(TdchExportHiveToTeradata.TERADATA_BATCH_SIZE, "10000");
Assert.assertTrue(result.isValid());
result = runner.setProperty(TdchExportHiveToTeradata.TERADATA_BATCH_SIZE, "");
Assert.assertFalse(result.isValid());
result = runner.setProperty(TdchExportHiveToTeradata.TERADATA_BATCH_SIZE, "-1");
Assert.assertFalse(result.isValid());
result = runner.setProperty(TdchExportHiveToTeradata.TERADATA_BATCH_SIZE, "not-an-integer");
Assert.assertFalse(result.isValid());
runner.removeProperty(TdchExportHiveToTeradata.TERADATA_BATCH_SIZE);
runner.assertValid();
}
use of com.thinkbiganalytics.kylo.nifi.teradata.tdch.api.TdchConnectionService in project kylo by Teradata.
the class TdchExportHiveToTeradataTest method testExport_HiveTextToTeradataInternalFastload_5_6_SetAllPropertiesViaExpressionLanguageToValidValues.
@Test
public void testExport_HiveTextToTeradataInternalFastload_5_6_SetAllPropertiesViaExpressionLanguageToValidValues() throws InitializationException {
/*
From actual run:
Key: 'tdch.export.hive.to.teradata.command'
Value: 'hadoop jar $USERLIBTDCH com.teradata.connector.common.tool.ConnectorExportTool -libjars $LIB_JARS -Dtdch.output.teradata.truncate=true -classname "com.teradata.jdbc.TeraDriver" -url "jdbc:teradata://localhost/database=perf" -username "dbc" -password ***** -method "internal.fastload" -jobtype "hive" -fileformat "textfile" -nummappers "2" -throttlemappers "true" -minmappers "1" -sourcedateformat "yyyy-MM-dd" -sourcetimeformat "HH:mm:ss" -sourcetimestampformat "yyyy-MM-dd HH:mm:ss.SSS" -sourcetimezoneid "UTC" -targetdateformat "yyyy-MM-dd" -targettimeformat "HH:mm:ss" -targettimestampformat "yyyy-MM-dd HH:mm:ss.SSS" -targettimezoneid "PST" -stringtruncate "true" -hiveconf "/tdch/hive-config/hive-site.xml" -sourcedatabase "tdch" -sourcetable "tdch.perf_hive_text_10k" -sourcefieldnames "yelp_text,yelp_date,yelp_likes,yelp_business_id,user_id" -separator , -lineseparator \\n -targettable "perf.perf_td_allstr_10_to_1000k" -targetfieldnames "yelp_text,yelp_date,yelp_likes,yelp_business_id,user_id" -usexviews "false" -queryband "org=finance;" -batchsize "10000" -stagedatabase "finance_scratchpad" -stagetablename "yelp_stg" -forcestage "true" -keepstagetable "true" -errortabledatabase "finance_scratchpad" -errortablename "yelp_err" '
*/
// This test covers assigning all values (34) via expression language. It ensures all of them can be set via expression variables.
final TestRunner runner = TestRunners.newTestRunner(TdchExportHiveToTeradata.class);
TdchConnectionService tdchConnectionService = new DevTdchConnectionService();
runner.addControllerService(CONNECTION_SERVICE_ID, tdchConnectionService);
runner.assertValid(tdchConnectionService);
runner.enableControllerService(tdchConnectionService);
runner.setProperty(TdchExportHiveToTeradata.TDCH_CONNECTION_SERVICE, CONNECTION_SERVICE_ID);
// These are required, and support arbitrary expression variable
runner.setProperty(TdchExportHiveToTeradata.HIVE_DATABASE, "${required_config_hive_db}");
runner.setProperty(TdchExportHiveToTeradata.HIVE_TABLE, "${required_config_hive_table}");
runner.setProperty(TdchExportHiveToTeradata.TERADATA_DATABASE_TABLE, "${required_config_teradata_db}.${required_config_teradata_table}");
runner.setProperty(TdchExportHiveToTeradata.TERADATA_TRUNCATE_TABLE, "${config_teradata_truncate_table}");
runner.assertValid();
// These are optional (but get defaults when processor is instantiated), and support arbitrary expression variable
runner.setProperty(TdchExportHiveToTeradata.HIVE_FIELD_NAMES, "${config_hive_field_names}");
runner.setProperty(TdchExportHiveToTeradata.TERADATA_FIELD_NAMES, "${config_teradata_field_names}");
runner.setProperty(TdchExportHiveToTeradata.NUMBER_OF_MAPPERS, "${config_num_mappers}");
runner.setProperty(TdchExportHiveToTeradata.THROTTLE_MAPPERS_FLAG, "${config_throttle_mappers_flag}");
runner.setProperty(TdchExportHiveToTeradata.HIVE_SOURCE_DATE_FORMAT, "${config_hive_source_date_format}");
runner.setProperty(TdchExportHiveToTeradata.HIVE_SOURCE_TIME_FORMAT, "${config_hive_source_time_format}");
runner.setProperty(TdchExportHiveToTeradata.HIVE_SOURCE_TIMESTAMP_FORMAT, "${config_hive_source_timestamp_format}");
runner.setProperty(TdchExportHiveToTeradata.TERADATA_TARGET_DATE_FORMAT, "${config_teradata_target_date_format}");
runner.setProperty(TdchExportHiveToTeradata.TERADATA_TARGET_TIME_FORMAT, "${config_teradata_target_time_format}");
runner.setProperty(TdchExportHiveToTeradata.TERADATA_TARGET_TIMESTAMP_FORMAT, "${config_teradata_target_timestamp_format}");
runner.setProperty(TdchExportHiveToTeradata.TERADATA_STRING_TRUNCATE_FLAG, "${config_teradata_string_truncate_flag}");
runner.setProperty(TdchExportHiveToTeradata.TERADATA_CHARSET, "${config_teradata_charset}");
runner.setProperty(TdchExportHiveToTeradata.TERADATA_USE_XVIEWS, "${config_teradata_use_xviews}");
runner.setProperty(TdchExportHiveToTeradata.TERADATA_BATCH_SIZE, "${config_teradata_batch_size}");
runner.setProperty(TdchExportHiveToTeradata.TERADATA_FORCE_STAGE, "${config_teradata_force_stage}");
runner.setProperty(TdchExportHiveToTeradata.TERADATA_KEEP_STAGE_TABLE, "${config_teradata_keep_stage_table}");
runner.assertValid();
// These need the specific expression language variable since processor checks them. The test verifies this.
runner.setProperty(TdchExportHiveToTeradata.TERADATA_EXPORT_TOOL_METHOD, "${my.custom.var.export.tool.method}");
runner.assertNotValid();
runner.setProperty(TdchExportHiveToTeradata.TERADATA_EXPORT_TOOL_METHOD, "${tdch.export.tool.method}");
runner.assertValid();
runner.setProperty(TdchExportHiveToTeradata.HIVE_EXPORT_TOOL_JOB_TYPE, "${my.custom.var.export.tool.job.type}");
runner.assertNotValid();
runner.setProperty(TdchExportHiveToTeradata.HIVE_EXPORT_TOOL_JOB_TYPE, "${tdch.export.tool.job.type}");
runner.assertValid();
runner.setProperty(TdchExportHiveToTeradata.HIVE_EXPORT_TOOL_FILEFORMAT, "${my.custom.var.export.tool.file.format}");
runner.assertNotValid();
runner.setProperty(TdchExportHiveToTeradata.HIVE_EXPORT_TOOL_FILEFORMAT, "${tdch.export.tool.file.format}");
runner.assertValid();
// These are optional (do not get defaults when processor is instantiated), and support arbitrary expression variable
runner.setProperty(TdchExportHiveToTeradata.HIVE_SOURCE_TIMEZONE_ID, "${config_hive_source_timezone_id}");
runner.setProperty(TdchExportHiveToTeradata.HIVE_CONFIGURATION_FILE_HDFS_PATH, "${config_hive_configuration_file_hdfs_path}");
runner.setProperty(TdchExportHiveToTeradata.HIVE_FIELD_SEPARATOR, "${config_hive_field_separator}");
runner.setProperty(TdchExportHiveToTeradata.HIVE_LINE_SEPARATOR, "${config_hive_line_separator}");
runner.setProperty(TdchExportHiveToTeradata.TERADATA_TARGET_TIMEZONE_ID, "${config_teradata_target_timezone_id}");
runner.setProperty(TdchExportHiveToTeradata.TERADATA_QUERY_BAND, "${config_teradata_query_band}");
runner.setProperty(TdchExportHiveToTeradata.TERADATA_STAGING_DATABASE, "${config_teradata_staging_database}");
runner.setProperty(TdchExportHiveToTeradata.TERADATA_STAGING_TABLE, "${config_teradata_staging_table}");
runner.setProperty(TdchExportHiveToTeradata.TERADATA_FAST_LOAD_ERROR_DATABASE, "${config_teradata_fast_load_error_database}");
runner.setProperty(TdchExportHiveToTeradata.TERADATA_FAST_LOAD_ERROR_TABLE, "${config_teradata_fast_load_error_table}");
runner.setProperty(TdchExportHiveToTeradata.MINIMUM_MAPPERS, "${config_minimum_mappers}");
// Assign values to the expression variables upstream in flowfile
MockFlowFile mockFlowFile = new MockFlowFile(1L);
Map<String, String> attributes = new HashMap<>();
attributes.put("required_config_hive_db", "tdch");
attributes.put("required_config_hive_table", "tdch.perf_hive_text_10k");
attributes.put("required_config_teradata_db", "perf");
attributes.put("required_config_teradata_table", "perf_td_allstr_10_to_1000k");
attributes.put("config_teradata_truncate_table", "true");
attributes.put("config_hive_field_names", "yelp_text,yelp_date,yelp_likes,yelp_business_id,user_id");
attributes.put("config_teradata_field_names", "yelp_text,yelp_date,yelp_likes, yelp_business_id,user_id");
attributes.put("config_num_mappers", "2");
attributes.put("config_throttle_mappers_flag", "true");
attributes.put("config_hive_source_date_format", "yyyy-MM-dd");
attributes.put("config_hive_source_time_format", "HH:mm:ss");
attributes.put("config_hive_source_timestamp_format", "yyyy-MM-dd HH:mm:ss.SSS");
attributes.put("config_teradata_target_date_format", "yyyy-MM-dd");
attributes.put("config_teradata_target_time_format", "HH:mm:ss");
attributes.put("config_teradata_target_timestamp_format", "yyyy-MM-dd HH:mm:ss.SSS");
attributes.put("config_teradata_string_truncate_flag", "true");
attributes.put("config_teradata_charset", "UTF16");
attributes.put("config_teradata_use_xviews", "false");
attributes.put("config_teradata_batch_size", "10000");
attributes.put("config_teradata_force_stage", "true");
attributes.put("config_teradata_keep_stage_table", "true");
attributes.put("tdch.export.tool.method", "internal.fastload");
attributes.put("tdch.export.tool.job.type", "hive");
attributes.put("tdch.export.tool.file.format", "textfile");
attributes.put("config_hive_source_timezone_id", "UTC");
attributes.put("config_hive_configuration_file_hdfs_path", "/tdch/hive-config/hive-site.xml");
attributes.put("config_hive_field_separator", ",");
attributes.put("config_hive_line_separator", "\\n");
attributes.put("config_teradata_target_timezone_id", "PST");
attributes.put("config_teradata_query_band", "org=finance;");
attributes.put("config_teradata_staging_database", "finance_scratchpad");
attributes.put("config_teradata_staging_table", "yelp_stg");
attributes.put("config_teradata_fast_load_error_database", "finance_scratchpad");
attributes.put("config_teradata_fast_load_error_table", "yelp_err");
attributes.put("config_minimum_mappers", "1");
mockFlowFile.putAttributes(attributes);
runner.enqueue(mockFlowFile);
runner.run(1);
List<MockFlowFile> failedFlowFiles = runner.getFlowFilesForRelationship(TdchExportHiveToTeradata.REL_FAILURE);
Assert.assertEquals(1, failedFlowFiles.size());
runner.assertQueueEmpty();
String expectedCommand = "hadoop jar $USERLIBTDCH com.teradata.connector.common.tool.ConnectorExportTool -libjars $LIB_JARS -Dtdch.output.teradata.truncate=true -classname \"com.teradata.jdbc.TeraDriver\" -url \"jdbc:teradata://localhost/database=perf,CHARSET=UTF16\" -username \"dbc\" -password ***** -method \"internal.fastload\" -jobtype \"hive\" -fileformat \"textfile\" -nummappers \"2\" -throttlemappers \"true\" -minmappers \"1\" -sourcedateformat \"yyyy-MM-dd\" -sourcetimeformat \"HH:mm:ss\" -sourcetimestampformat \"yyyy-MM-dd HH:mm:ss.SSS\" -sourcetimezoneid \"UTC\" -targetdateformat \"yyyy-MM-dd\" -targettimeformat \"HH:mm:ss\" -targettimestampformat \"yyyy-MM-dd HH:mm:ss.SSS\" -targettimezoneid \"PST\" -stringtruncate \"true\" -hiveconf \"/tdch/hive-config/hive-site.xml\" -sourcedatabase \"tdch\" -sourcetable \"tdch.perf_hive_text_10k\" -sourcefieldnames \"yelp_text,yelp_date,yelp_likes,yelp_business_id,user_id\" -separator , -lineseparator \\\\n -targettable \"perf.perf_td_allstr_10_to_1000k\" -targetfieldnames \"yelp_text,yelp_date,yelp_likes,yelp_business_id,user_id\" -usexviews \"false\" -queryband \"org=finance;\" -batchsize \"10000\" -stagedatabase \"finance_scratchpad\" -stagetablename \"yelp_stg\" -forcestage \"true\" -keepstagetable \"true\" -errortabledatabase \"finance_scratchpad\" -errortablename \"yelp_err\" ";
MockFlowFile failedFlowFile = failedFlowFiles.get(0);
Assert.assertEquals(expectedCommand, failedFlowFile.getAttribute("tdch.export.hive.to.teradata.command"));
}
use of com.thinkbiganalytics.kylo.nifi.teradata.tdch.api.TdchConnectionService in project kylo by Teradata.
the class TdchExportHiveToTeradataTest method testHiveConfigurationFileHdfsPath.
@Test
public void testHiveConfigurationFileHdfsPath() throws InitializationException {
final TestRunner runner = TestRunners.newTestRunner(TdchExportHiveToTeradata.class);
TdchConnectionService tdchConnectionService = new DummyTdchConnectionService();
runner.addControllerService(CONNECTION_SERVICE_ID, tdchConnectionService);
runner.assertValid(tdchConnectionService);
runner.enableControllerService(tdchConnectionService);
runner.setProperty(TdchExportHiveToTeradata.TDCH_CONNECTION_SERVICE, CONNECTION_SERVICE_ID);
runner.setProperty(TdchExportHiveToTeradata.HIVE_DATABASE, "hive_db");
runner.setProperty(TdchExportHiveToTeradata.HIVE_TABLE, "hive_table");
runner.setProperty(TdchExportHiveToTeradata.TERADATA_DATABASE_TABLE, "teradata_db.teradata_table");
runner.assertValid();
Assert.assertNull(runner.getProcessor().getPropertyDescriptor(TdchExportHiveToTeradata.HIVE_CONFIGURATION_FILE_HDFS_PATH_NAME).getDefaultValue());
Assert.assertFalse(runner.getProcessor().getPropertyDescriptor(TdchExportHiveToTeradata.HIVE_CONFIGURATION_FILE_HDFS_PATH_NAME).isRequired());
Assert.assertTrue(runner.getProcessor().getPropertyDescriptor(TdchExportHiveToTeradata.HIVE_CONFIGURATION_FILE_HDFS_PATH_NAME).isExpressionLanguageSupported());
ValidationResult result = runner.setProperty(TdchExportHiveToTeradata.HIVE_CONFIGURATION_FILE_HDFS_PATH, "/abc/xyz/hive-site-invalid.xml");
Assert.assertFalse(result.isValid());
result = runner.setProperty(TdchExportHiveToTeradata.HIVE_CONFIGURATION_FILE_HDFS_PATH, "/abc/xyz/invalid-hive-site.xml");
Assert.assertFalse(result.isValid());
result = runner.setProperty(TdchExportHiveToTeradata.HIVE_CONFIGURATION_FILE_HDFS_PATH, "/abc/xyz/hive-site.xml");
Assert.assertTrue(result.isValid());
runner.removeProperty(TdchExportHiveToTeradata.HIVE_CONFIGURATION_FILE_HDFS_PATH);
runner.assertValid();
}
use of com.thinkbiganalytics.kylo.nifi.teradata.tdch.api.TdchConnectionService in project kylo by Teradata.
the class StandardTdchConnectionService_Set2_Test method testHdpHiveConf_WithValidDirectory.
// checked
@Test
public void testHdpHiveConf_WithValidDirectory() throws Exception {
ValidationResult validationResult;
final TestRunner runner = TestRunners.newTestRunner(TestTdchProcessorForTestingTdchConnectionService.class);
final TdchConnectionService standardTdchConnectionService = new DummyTdchConnectionService();
runner.addControllerService(CONNECTION_SERVICE_ID, standardTdchConnectionService);
runner.assertValid(standardTdchConnectionService);
validationResult = runner.setProperty(standardTdchConnectionService, StandardTdchConnectionService.HIVE_CONF_PATH, tempFolder.getRoot().getAbsolutePath());
Assert.assertEquals(StandardTdchConnectionService.HIVE_CONF_PATH.getDisplayName(), validationResult.getSubject());
Assert.assertTrue(validationResult.isValid());
runner.assertValid(standardTdchConnectionService);
}
Aggregations