use of org.finra.herd.model.api.xml.SchemaColumn in project herd by FINRAOS.
the class Hive13DdlGenerator method generateCreateTableDdlHelper.
/**
* Generates and append to the string builder the create table Hive 13 DDL as per specified parameters.
*/
private String generateCreateTableDdlHelper(GenerateDdlRequest generateDdlRequest) {
// TODO: We might want to consider using a template engine such as Velocity to generate this DDL so we don't wind up just doing string manipulation.
StringBuilder sb = new StringBuilder();
// For custom DDL, we would need to substitute the custom DDL tokens with their relative values.
HashMap<String, String> replacements = new HashMap<>();
// Validate that partition values passed in the list of partition filters do not contain '/' character.
if (generateDdlRequest.isPartitioned && !CollectionUtils.isEmpty(generateDdlRequest.partitionFilters)) {
// Validate that partition values do not contain '/' characters.
for (List<String> partitionFilter : generateDdlRequest.partitionFilters) {
for (String partitionValue : partitionFilter) {
Assert.doesNotContain(partitionValue, "/", String.format("Partition value \"%s\" can not contain a '/' character.", partitionValue));
}
}
}
// Get business object format model object to directly access schema columns and partitions.
BusinessObjectFormat businessObjectFormat = businessObjectFormatHelper.createBusinessObjectFormatFromEntity(generateDdlRequest.businessObjectFormatEntity);
// Validate that we have at least one column specified in the business object format schema.
assertSchemaColumnsNotEmpty(businessObjectFormat, generateDdlRequest.businessObjectFormatEntity);
if (generateDdlRequest.isPartitioned) {
// Validate that we have at least one partition column specified in the business object format schema.
Assert.notEmpty(businessObjectFormat.getSchema().getPartitions(), String.format("No schema partitions specified for business object format {%s}.", businessObjectFormatHelper.businessObjectFormatEntityAltKeyToString(generateDdlRequest.businessObjectFormatEntity)));
// Validate that partition column names do not contain '/' characters.
for (SchemaColumn partitionColumn : businessObjectFormat.getSchema().getPartitions()) {
Assert.doesNotContain(partitionColumn.getName(), "/", String.format("Partition column name \"%s\" can not contain a '/' character. Business object format: {%s}", partitionColumn.getName(), businessObjectFormatHelper.businessObjectFormatEntityAltKeyToString(generateDdlRequest.businessObjectFormatEntity)));
}
}
// Add drop table if requested.
if (BooleanUtils.isTrue(generateDdlRequest.includeDropTableStatement)) {
sb.append(String.format("DROP TABLE IF EXISTS `%s`;\n\n", generateDdlRequest.tableName));
}
// Depending on the flag, prepare "if not exists" option text or leave it an empty string.
String ifNotExistsOption = BooleanUtils.isTrue(generateDdlRequest.includeIfNotExistsOption) ? "IF NOT EXISTS " : "";
// Only generate the create table DDL statement, if custom DDL was not specified.
if (generateDdlRequest.customDdlEntity == null) {
generateStandardBaseDdl(generateDdlRequest, sb, businessObjectFormat, ifNotExistsOption);
} else {
// Use the custom DDL in place of the create table statement.
sb.append(String.format("%s\n\n", generateDdlRequest.customDdlEntity.getDdl()));
// We need to substitute the relative custom DDL token with an actual table name.
replacements.put(TABLE_NAME_CUSTOM_DDL_TOKEN, generateDdlRequest.tableName);
}
// Add alter table statements only if the list of partition filters is not empty - this is applicable to generating DDL for business object data only.
if (!CollectionUtils.isEmpty(generateDdlRequest.partitionFilters)) {
processPartitionFiltersForGenerateDdl(generateDdlRequest, sb, replacements, generateDdlRequest.businessObjectFormatEntity, businessObjectFormat, ifNotExistsOption);
} else // Add a location statement with a token if this is format dll that does not use custom ddl.
if (!generateDdlRequest.isPartitioned && generateDdlRequest.customDdlEntity == null) {
// Since custom DDL is not specified, there are no partition values, and this table is not partitioned, add a LOCATION clause with a token.
sb.append(String.format("LOCATION '%s';", NON_PARTITIONED_TABLE_LOCATION_CUSTOM_DDL_TOKEN));
}
// Trim to remove unnecessary end-of-line characters, if any, from the end of the generated DDL.
String resultDdl = sb.toString().trim();
// For custom DDL, substitute the relative custom DDL tokens with their values.
if (generateDdlRequest.customDdlEntity != null) {
for (Map.Entry<String, String> entry : replacements.entrySet()) {
String token = entry.getKey();
String value = entry.getValue();
resultDdl = resultDdl.replaceAll(Pattern.quote(token), value);
}
}
return resultDdl;
}
use of org.finra.herd.model.api.xml.SchemaColumn in project herd by FINRAOS.
the class Hive13DdlGenerator method generateStandardBaseDdl.
private void generateStandardBaseDdl(GenerateDdlRequest generateDdlRequest, StringBuilder sb, BusinessObjectFormat businessObjectFormat, String ifNotExistsOption) {
// Please note that we escape table name and all column names to avoid Hive reserved words in DDL statement generation.
sb.append(String.format("CREATE EXTERNAL TABLE %s`%s` (\n", ifNotExistsOption, generateDdlRequest.tableName));
// Add schema columns.
sb.append(generateDdlColumns(generateDdlRequest.businessObjectFormatEntity, businessObjectFormat));
if (generateDdlRequest.isPartitioned) {
// Add a partitioned by clause.
sb.append("PARTITIONED BY (");
// List all partition columns.
List<String> partitionColumnDeclarations = new ArrayList<>();
for (SchemaColumn partitionColumn : businessObjectFormat.getSchema().getPartitions()) {
partitionColumnDeclarations.add(String.format("`%s` %s", partitionColumn.getName(), getHiveDataType(partitionColumn, generateDdlRequest.businessObjectFormatEntity)));
}
sb.append(StringUtils.join(partitionColumnDeclarations, ", "));
sb.append(")\n");
}
// We output delimiter character, escape character, and null value only when they are defined in the business object format schema.
sb.append("ROW FORMAT DELIMITED");
if (!StringUtils.isEmpty(generateDdlRequest.businessObjectFormatEntity.getDelimiter())) {
// Note that the escape character is only output when the delimiter is present.
sb.append(String.format(" FIELDS TERMINATED BY '%s'%s", escapeSingleQuotes(getDdlCharacterValue(generateDdlRequest.businessObjectFormatEntity.getDelimiter(), true)), StringUtils.isEmpty(generateDdlRequest.businessObjectFormatEntity.getEscapeCharacter()) ? "" : String.format(" ESCAPED BY '%s'", escapeSingleQuotes(getDdlCharacterValue(generateDdlRequest.businessObjectFormatEntity.getEscapeCharacter(), true)))));
}
sb.append(String.format(" NULL DEFINED AS '%s'\n", escapeSingleQuotes(getDdlCharacterValue(generateDdlRequest.businessObjectFormatEntity.getNullValue()))));
// If this table is not partitioned, then STORED AS clause will be followed by LOCATION. Otherwise, the CREATE TABLE is complete.
sb.append(String.format("STORED AS %s%s\n", getHiveFileFormat(generateDdlRequest.businessObjectFormatEntity), generateDdlRequest.isPartitioned ? ";\n" : ""));
}
use of org.finra.herd.model.api.xml.SchemaColumn in project herd by FINRAOS.
the class Hive13DdlGenerator method getHivePathPattern.
/**
* Gets a pattern to match Hive partition sub-directories.
*
* @param partitionColumns the list of partition columns
*
* @return the newly created pattern to match Hive partition sub-directories.
*/
public Pattern getHivePathPattern(List<SchemaColumn> partitionColumns) {
StringBuilder sb = new StringBuilder(26);
// For each partition column, add a regular expression to match "<COLUMN_NAME|COLUMN-NAME>=<VALUE>" sub-directory.
for (SchemaColumn partitionColumn : partitionColumns) {
String partitionColumnName = partitionColumn.getName();
// We are using a non-capturing group for the partition column names here - this is done by adding "?:" to the beginning of a capture group.
sb.append("\\/(?:");
sb.append(Matcher.quoteReplacement(partitionColumnName));
// Please note that for subpartition folder, we do support partition column names having all underscores replaced with hyphens.
sb.append('|');
sb.append(Matcher.quoteReplacement(partitionColumnName.replace("_", "-")));
sb.append(")=([^/]+)");
}
// Add a regular expression for a trailing "/" and an optional file name.
sb.append("\\/[^/]*");
// We do a case-insensitive match for partition column names.
return Pattern.compile(sb.toString(), Pattern.CASE_INSENSITIVE);
}
use of org.finra.herd.model.api.xml.SchemaColumn in project herd by FINRAOS.
the class StorageUnitServiceGetS3KeyPrefixTest method testGetS3KeyPrefixTrimParameters.
@Test
public void testGetS3KeyPrefixTrimParameters() {
// Create database entities required for testing. Please note that we are not passing the flag to create a business object data entity.
businessObjectDataServiceTestHelper.createDatabaseEntitiesForGetS3KeyPrefixTesting(false);
// Get the test partition columns.
List<SchemaColumn> testPartitionColumns = schemaColumnDaoTestHelper.getTestPartitionColumns();
String testPartitionKey = testPartitionColumns.get(0).getName();
List<SchemaColumn> testSubPartitionColumns = testPartitionColumns.subList(1, SUBPARTITION_VALUES.size() + 1);
// Get an S3 key prefix by using input parameters with leading and trailing empty spaces.
S3KeyPrefixInformation resultS3KeyPrefixInformation = storageUnitService.getS3KeyPrefix(new BusinessObjectDataKey(addWhitespace(NAMESPACE), addWhitespace(BDEF_NAME), addWhitespace(FORMAT_USAGE_CODE), addWhitespace(FORMAT_FILE_TYPE_CODE), FORMAT_VERSION, addWhitespace(PARTITION_VALUE), addWhitespace(SUBPARTITION_VALUES), DATA_VERSION), addWhitespace(testPartitionKey), addWhitespace(STORAGE_NAME), false);
// Get the expected S3 key prefix value using the business object data version.
String expectedS3KeyPrefix = getExpectedS3KeyPrefix(NAMESPACE, DATA_PROVIDER_NAME, BDEF_NAME, FORMAT_USAGE_CODE, FORMAT_FILE_TYPE_CODE, FORMAT_VERSION, testPartitionKey, PARTITION_VALUE, testSubPartitionColumns.toArray(new SchemaColumn[testSubPartitionColumns.size()]), SUBPARTITION_VALUES.toArray(new String[SUBPARTITION_VALUES.size()]), DATA_VERSION);
// Validate the results.
assertEquals(new S3KeyPrefixInformation(expectedS3KeyPrefix), resultS3KeyPrefixInformation);
}
use of org.finra.herd.model.api.xml.SchemaColumn in project herd by FINRAOS.
the class StorageUnitServiceGetS3KeyPrefixTest method testGetS3KeyPrefixLowerCaseParameters.
@Test
public void testGetS3KeyPrefixLowerCaseParameters() {
// Create database entities required for testing. Please note that we are not passing the flag to create a business object data entity.
businessObjectDataServiceTestHelper.createDatabaseEntitiesForGetS3KeyPrefixTesting(false);
// Get the test partition columns.
List<SchemaColumn> testPartitionColumns = schemaColumnDaoTestHelper.getTestPartitionColumns();
String testPartitionKey = testPartitionColumns.get(0).getName();
List<SchemaColumn> testSubPartitionColumns = testPartitionColumns.subList(1, SUBPARTITION_VALUES.size() + 1);
// Get an S3 key prefix using lower case input parameters (except for case-sensitive partition values).
S3KeyPrefixInformation resultS3KeyPrefixInformation = storageUnitService.getS3KeyPrefix(new BusinessObjectDataKey(NAMESPACE.toLowerCase(), BDEF_NAME.toLowerCase(), FORMAT_USAGE_CODE.toLowerCase(), FORMAT_FILE_TYPE_CODE.toLowerCase(), FORMAT_VERSION, PARTITION_VALUE, SUBPARTITION_VALUES, DATA_VERSION), testPartitionKey.toLowerCase(), STORAGE_NAME.toLowerCase(), false);
// Get the expected S3 key prefix value using the business object data version.
String expectedS3KeyPrefix = getExpectedS3KeyPrefix(NAMESPACE, DATA_PROVIDER_NAME, BDEF_NAME, FORMAT_USAGE_CODE, FORMAT_FILE_TYPE_CODE, FORMAT_VERSION, testPartitionKey, PARTITION_VALUE, testSubPartitionColumns.toArray(new SchemaColumn[testSubPartitionColumns.size()]), SUBPARTITION_VALUES.toArray(new String[SUBPARTITION_VALUES.size()]), DATA_VERSION);
// Validate the results.
assertEquals(new S3KeyPrefixInformation(expectedS3KeyPrefix), resultS3KeyPrefixInformation);
}
Aggregations