use of org.finra.herd.model.api.xml.SchemaColumn in project herd by FINRAOS.
the class BusinessObjectDataServiceGenerateBusinessObjectDataDdlTest method testGenerateBusinessObjectDataDdlNoCustomDdlEscapeSingleQuoteInRowFormat.
@Test
public void testGenerateBusinessObjectDataDdlNoCustomDdlEscapeSingleQuoteInRowFormat() {
// Prepare test data without custom ddl.
List<SchemaColumn> partitionColumns = schemaColumnDaoTestHelper.getTestPartitionColumns();
String partitionKey = partitionColumns.get(0).getName();
businessObjectDataServiceTestHelper.createDatabaseEntitiesForBusinessObjectDataDdlTesting(FileTypeEntity.TXT_FILE_TYPE, partitionKey, null, BusinessObjectDataEntity.FIRST_PARTITION_COLUMN_POSITION, UNSORTED_PARTITION_VALUES, SUBPARTITION_VALUES, SINGLE_QUOTE, SINGLE_QUOTE, SINGLE_QUOTE, schemaColumnDaoTestHelper.getTestSchemaColumns(), partitionColumns, false, null, true, ALLOW_DUPLICATE_BUSINESS_OBJECT_DATA);
// Retrieve business object data ddl.
BusinessObjectDataDdlRequest request = businessObjectDataServiceTestHelper.getTestBusinessObjectDataDdlRequest(UNSORTED_PARTITION_VALUES);
BusinessObjectDataDdl resultDdl = businessObjectDataService.generateBusinessObjectDataDdl(request);
// Validate the results.
String expectedRowFormat = "ROW FORMAT DELIMITED FIELDS TERMINATED BY '\\'' ESCAPED BY '\\'' NULL DEFINED AS '\\''";
String expectedDdl = businessObjectDataServiceTestHelper.getExpectedBusinessObjectDataDdl(PARTITION_COLUMNS.length, FIRST_COLUMN_NAME, FIRST_COLUMN_DATA_TYPE, expectedRowFormat, Hive13DdlGenerator.TEXT_HIVE_FILE_FORMAT, FileTypeEntity.TXT_FILE_TYPE, BusinessObjectDataEntity.FIRST_PARTITION_COLUMN_POSITION, STORAGE_1_AVAILABLE_PARTITION_VALUES, SUBPARTITION_VALUES, false, true, true);
businessObjectDataServiceTestHelper.validateBusinessObjectDataDdl(request, expectedDdl, resultDdl);
}
use of org.finra.herd.model.api.xml.SchemaColumn in project herd by FINRAOS.
the class BusinessObjectDataServiceGenerateBusinessObjectDataDdlTest method testGenerateBusinessObjectDataDdlNoCustomDdlSingleLevelPartitioningPartitionValueList.
@Test
public void testGenerateBusinessObjectDataDdlNoCustomDdlSingleLevelPartitioningPartitionValueList() {
// Prepare test data without custom ddl.
List<SchemaColumn> partitionColumns = schemaColumnDaoTestHelper.getTestPartitionColumns().subList(0, 1);
String partitionKey = partitionColumns.get(0).getName();
businessObjectDataServiceTestHelper.createDatabaseEntitiesForBusinessObjectDataDdlTesting(FileTypeEntity.TXT_FILE_TYPE, partitionKey, null, BusinessObjectDataEntity.FIRST_PARTITION_COLUMN_POSITION, UNSORTED_PARTITION_VALUES, NO_SUBPARTITION_VALUES, SCHEMA_DELIMITER_PIPE, SCHEMA_ESCAPE_CHARACTER_BACKSLASH, SCHEMA_NULL_VALUE_BACKSLASH_N, schemaColumnDaoTestHelper.getTestSchemaColumns(), partitionColumns, false, null, true, ALLOW_DUPLICATE_BUSINESS_OBJECT_DATA);
// Retrieve business object data ddl without specifying custom ddl name.
BusinessObjectDataDdlRequest request = businessObjectDataServiceTestHelper.getTestBusinessObjectDataDdlRequest(UNSORTED_PARTITION_VALUES);
BusinessObjectDataDdl resultDdl = businessObjectDataService.generateBusinessObjectDataDdl(request);
// Validate the results.
String expectedDdl = businessObjectDataServiceTestHelper.getExpectedBusinessObjectDataDdl(partitionColumns.size(), FIRST_COLUMN_NAME, FIRST_COLUMN_DATA_TYPE, ROW_FORMAT, Hive13DdlGenerator.TEXT_HIVE_FILE_FORMAT, FileTypeEntity.TXT_FILE_TYPE, BusinessObjectDataEntity.FIRST_PARTITION_COLUMN_POSITION, STORAGE_1_AVAILABLE_PARTITION_VALUES, SUBPARTITION_VALUES, false, true, true);
businessObjectDataServiceTestHelper.validateBusinessObjectDataDdl(request, expectedDdl, resultDdl);
// Retrieve business object data ddl when request partition value list has duplicate values.
request = businessObjectDataServiceTestHelper.getTestBusinessObjectDataDdlRequest(UNSORTED_PARTITION_VALUES);
request.getPartitionValueFilters().get(0).getPartitionValues().add(UNSORTED_PARTITION_VALUES.get(0));
resultDdl = businessObjectDataService.generateBusinessObjectDataDdl(request);
// Validate the results.
businessObjectDataServiceTestHelper.validateBusinessObjectDataDdl(request, expectedDdl, resultDdl);
}
use of org.finra.herd.model.api.xml.SchemaColumn in project herd by FINRAOS.
the class BusinessObjectDataServiceGenerateBusinessObjectDataDdlTest method testGenerateBusinessObjectDataDdlLargePartitionValueListWithAutoDiscovery.
@Test
@Ignore
public void testGenerateBusinessObjectDataDdlLargePartitionValueListWithAutoDiscovery() {
final int PRIMARY_PARTITION_VALUE_LIST_SIZE = 10000;
final int SECOND_LEVEL_PARTITION_VALUES_PER_BUSINESS_OBJECT_DATA = 1;
final int STORAGE_FILES_PER_PARTITION = 1;
// Prepare test data and build a list of partition values to generate business object data DDL for.
// Build a list of schema columns.
List<SchemaColumn> schemaColumns = new ArrayList<>();
schemaColumns.add(new SchemaColumn(PARTITION_KEY, "DATE", NO_COLUMN_SIZE, COLUMN_REQUIRED, NO_COLUMN_DEFAULT_VALUE, NO_COLUMN_DESCRIPTION));
schemaColumns.add(new SchemaColumn(COLUMN_NAME, "NUMBER", COLUMN_SIZE, NO_COLUMN_REQUIRED, NO_COLUMN_DEFAULT_VALUE, COLUMN_DESCRIPTION));
schemaColumns.add(new SchemaColumn(COLUMN_NAME_2, "STRING", NO_COLUMN_SIZE, NO_COLUMN_REQUIRED, NO_COLUMN_DEFAULT_VALUE, NO_COLUMN_DESCRIPTION));
// Use the first two columns as partition columns.
List<SchemaColumn> partitionColumns = schemaColumns.subList(0, 2);
// Create a business object format entity with the schema.
BusinessObjectFormatEntity businessObjectFormatEntity = businessObjectFormatDaoTestHelper.createBusinessObjectFormatEntity(NAMESPACE, BDEF_NAME, FORMAT_USAGE_CODE, FileTypeEntity.TXT_FILE_TYPE, FORMAT_VERSION, FORMAT_DESCRIPTION, LATEST_VERSION_FLAG_SET, PARTITION_KEY, NO_PARTITION_KEY_GROUP, NO_ATTRIBUTES, SCHEMA_DELIMITER_PIPE, SCHEMA_ESCAPE_CHARACTER_BACKSLASH, SCHEMA_NULL_VALUE_BACKSLASH_N, schemaColumns, partitionColumns);
// Create an S3 storage entity.
StorageEntity storageEntity = storageDaoTestHelper.createStorageEntity(STORAGE_NAME, StoragePlatformEntity.S3, configurationHelper.getProperty(ConfigurationValue.S3_ATTRIBUTE_NAME_BUCKET_NAME), S3_BUCKET_NAME);
// Create relative business object data, storage unit, and storage file entities.
List<String> partitionValues = new ArrayList<>();
for (int i = 0; i < PRIMARY_PARTITION_VALUE_LIST_SIZE; i++) {
String partitionValue = String.format("%s-%03d", PARTITION_VALUE, i);
partitionValues.add(partitionValue);
// Create a business object data entity.
BusinessObjectDataEntity businessObjectDataEntity = businessObjectDataDaoTestHelper.createBusinessObjectDataEntity(businessObjectFormatEntity, partitionValue, NO_SUBPARTITION_VALUES, DATA_VERSION, true, BusinessObjectDataStatusEntity.VALID);
// Build an S3 key prefix according to the herd S3 naming convention.
String s3KeyPrefix = getExpectedS3KeyPrefix(NAMESPACE, DATA_PROVIDER_NAME, BDEF_NAME, FORMAT_USAGE_CODE, FileTypeEntity.TXT_FILE_TYPE, FORMAT_VERSION, PARTITION_KEY, partitionValue, null, null, DATA_VERSION);
// Create a storage unit with a storage directory path.
StorageUnitEntity storageUnitEntity = storageUnitDaoTestHelper.createStorageUnitEntity(storageEntity, businessObjectDataEntity, StorageUnitStatusEntity.ENABLED, s3KeyPrefix);
// Create storage file entities.
for (int j = 0; j < SECOND_LEVEL_PARTITION_VALUES_PER_BUSINESS_OBJECT_DATA; j++) {
// Build a storage file directory path that includes the relative second level partition value - needed for auto discovery.
String storageFileDirectoryPath = String.format("%s/%s=%s-%03d", s3KeyPrefix, COLUMN_NAME, PARTITION_VALUE_2, j);
for (int k = 0; k < STORAGE_FILES_PER_PARTITION; k++) {
String storageFilePath = String.format("%s/%03d.data", storageFileDirectoryPath, k);
storageFileDaoTestHelper.createStorageFileEntity(storageUnitEntity, storageFilePath, FILE_SIZE_1_KB, ROW_COUNT_1000);
}
}
herdDao.saveAndRefresh(storageUnitEntity);
}
// Retrieve business object data ddl for the entire list of partition values.
BusinessObjectDataDdl businessObjectDataDdl = businessObjectDataService.generateBusinessObjectDataDdl(new BusinessObjectDataDdlRequest(NAMESPACE, BDEF_NAME, FORMAT_USAGE_CODE, FileTypeEntity.TXT_FILE_TYPE, FORMAT_VERSION, Arrays.asList(new PartitionValueFilter(PARTITION_KEY, partitionValues, NO_PARTITION_VALUE_RANGE, NO_LATEST_BEFORE_PARTITION_VALUE, NO_LATEST_AFTER_PARTITION_VALUE)), NO_STANDALONE_PARTITION_VALUE_FILTER, DATA_VERSION, NO_STORAGE_NAMES, STORAGE_NAME, BusinessObjectDataDdlOutputFormatEnum.HIVE_13_DDL, TABLE_NAME, NO_CUSTOM_DDL_NAME, INCLUDE_DROP_TABLE_STATEMENT, INCLUDE_IF_NOT_EXISTS_OPTION, NO_INCLUDE_DROP_PARTITIONS, NO_ALLOW_MISSING_DATA, NO_INCLUDE_ALL_REGISTERED_SUBPARTITIONS, NO_SUPPRESS_SCAN_FOR_UNREGISTERED_SUBPARTITIONS));
// Validate the results.
assertNotNull(businessObjectDataDdl);
}
use of org.finra.herd.model.api.xml.SchemaColumn in project herd by FINRAOS.
the class BusinessObjectDataServiceGenerateBusinessObjectDataDdlTest method testGenerateBusinessObjectDataDdlMissingBusinessObjectDataAllowMissingDataAllDataNoExists.
@Test
public void testGenerateBusinessObjectDataDdlMissingBusinessObjectDataAllowMissingDataAllDataNoExists() {
// Prepare test data.
List<SchemaColumn> partitionColumns = schemaColumnDaoTestHelper.getTestPartitionColumns();
businessObjectDataServiceTestHelper.createDatabaseEntitiesForBusinessObjectDataDdlTesting(FileTypeEntity.TXT_FILE_TYPE, FIRST_PARTITION_COLUMN_NAME, PARTITION_KEY_GROUP, BusinessObjectDataEntity.FIRST_PARTITION_COLUMN_POSITION, UNSORTED_PARTITION_VALUES, SUBPARTITION_VALUES, SCHEMA_DELIMITER_PIPE, SCHEMA_ESCAPE_CHARACTER_BACKSLASH, SCHEMA_NULL_VALUE_BACKSLASH_N, schemaColumnDaoTestHelper.getTestSchemaColumns(), partitionColumns, false, CUSTOM_DDL_NAME, true, ALLOW_DUPLICATE_BUSINESS_OBJECT_DATA);
// Retrieve business object data ddl when all of the business object data is not available and "allow missing data" flag is set to "true".
BusinessObjectDataDdlRequest request = businessObjectDataServiceTestHelper.getTestBusinessObjectDataDdlRequest(Arrays.asList("I_DO_NOT_EXIST"), CUSTOM_DDL_NAME);
assertTrue(request.isAllowMissingData());
BusinessObjectDataDdl resultDdl = businessObjectDataService.generateBusinessObjectDataDdl(request);
// Validate the results.
String expectedDdl = businessObjectDataServiceTestHelper.getExpectedBusinessObjectDataDdl(PARTITION_COLUMNS.length, FIRST_COLUMN_NAME, FIRST_COLUMN_DATA_TYPE, ROW_FORMAT, Hive13DdlGenerator.TEXT_HIVE_FILE_FORMAT, FileTypeEntity.TXT_FILE_TYPE, BusinessObjectDataEntity.FIRST_PARTITION_COLUMN_POSITION, null, null, false, true, true);
businessObjectDataServiceTestHelper.validateBusinessObjectDataDdl(request, expectedDdl, resultDdl);
}
use of org.finra.herd.model.api.xml.SchemaColumn in project herd by FINRAOS.
the class BusinessObjectDataServiceGenerateBusinessObjectDataDdlTest method testGenerateBusinessObjectDataDdlNoCustomDdlSingleLevelPartitioningPartitionValueRange.
@Test
public void testGenerateBusinessObjectDataDdlNoCustomDdlSingleLevelPartitioningPartitionValueRange() {
// Prepare test data without custom ddl and with partition key using NO_PARTITIONING_PARTITION_KEY.
List<SchemaColumn> partitionColumns = schemaColumnDaoTestHelper.getTestPartitionColumns().subList(0, 1);
String partitionKey = Hive13DdlGenerator.NO_PARTITIONING_PARTITION_KEY;
partitionColumns.get(0).setName(partitionKey);
businessObjectDataServiceTestHelper.createDatabaseEntitiesForBusinessObjectDataDdlTesting(FileTypeEntity.TXT_FILE_TYPE, partitionKey, PARTITION_KEY_GROUP, BusinessObjectDataEntity.FIRST_PARTITION_COLUMN_POSITION, UNSORTED_PARTITION_VALUES, NO_SUBPARTITION_VALUES, SCHEMA_DELIMITER_PIPE, SCHEMA_ESCAPE_CHARACTER_BACKSLASH, SCHEMA_NULL_VALUE_BACKSLASH_N, schemaColumnDaoTestHelper.getTestSchemaColumns(), partitionColumns, false, null, true, ALLOW_DUPLICATE_BUSINESS_OBJECT_DATA);
expectedPartitionValueDaoTestHelper.createExpectedPartitionValueProcessDatesForApril2014(PARTITION_KEY_GROUP);
// Retrieve business object data ddl without specifying custom ddl name.
BusinessObjectDataDdlRequest request = businessObjectDataServiceTestHelper.getTestBusinessObjectDataDdlRequest(START_PARTITION_VALUE, END_PARTITION_VALUE);
request.getPartitionValueFilters().get(0).setPartitionKey(partitionKey);
businessObjectDataService.generateBusinessObjectDataDdl(request);
// TODO: Validate the results.
businessObjectDataServiceTestHelper.getExpectedBusinessObjectDataDdl(partitionColumns.size(), FIRST_COLUMN_NAME, FIRST_COLUMN_DATA_TYPE, ROW_FORMAT, Hive13DdlGenerator.TEXT_HIVE_FILE_FORMAT, FileTypeEntity.TXT_FILE_TYPE, BusinessObjectDataEntity.FIRST_PARTITION_COLUMN_POSITION, PROCESS_DATE_PARTITION_VALUES, NO_SUBPARTITION_VALUES, false, true, true);
}
Aggregations