Search in sources :

Example 71 with SchemaColumn

use of org.finra.herd.model.api.xml.SchemaColumn in project herd by FINRAOS.

the class Hive13DdlGenerator method generateCreateTableDdlHelper.

/**
 * Generates and append to the string builder the create table Hive 13 DDL as per specified parameters.
 */
private String generateCreateTableDdlHelper(GenerateDdlRequest generateDdlRequest) {
    // TODO: We might want to consider using a template engine such as Velocity to generate this DDL so we don't wind up just doing string manipulation.
    StringBuilder sb = new StringBuilder();
    // For custom DDL, we would need to substitute the custom DDL tokens with their relative values.
    HashMap<String, String> replacements = new HashMap<>();
    // Validate that partition values passed in the list of partition filters do not contain '/' character.
    if (generateDdlRequest.isPartitioned && !CollectionUtils.isEmpty(generateDdlRequest.partitionFilters)) {
        // Validate that partition values do not contain '/' characters.
        for (List<String> partitionFilter : generateDdlRequest.partitionFilters) {
            for (String partitionValue : partitionFilter) {
                Assert.doesNotContain(partitionValue, "/", String.format("Partition value \"%s\" can not contain a '/' character.", partitionValue));
            }
        }
    }
    // Get business object format model object to directly access schema columns and partitions.
    BusinessObjectFormat businessObjectFormat = businessObjectFormatHelper.createBusinessObjectFormatFromEntity(generateDdlRequest.businessObjectFormatEntity);
    // Validate that we have at least one column specified in the business object format schema.
    assertSchemaColumnsNotEmpty(businessObjectFormat, generateDdlRequest.businessObjectFormatEntity);
    if (generateDdlRequest.isPartitioned) {
        // Validate that we have at least one partition column specified in the business object format schema.
        Assert.notEmpty(businessObjectFormat.getSchema().getPartitions(), String.format("No schema partitions specified for business object format {%s}.", businessObjectFormatHelper.businessObjectFormatEntityAltKeyToString(generateDdlRequest.businessObjectFormatEntity)));
        // Validate that partition column names do not contain '/' characters.
        for (SchemaColumn partitionColumn : businessObjectFormat.getSchema().getPartitions()) {
            Assert.doesNotContain(partitionColumn.getName(), "/", String.format("Partition column name \"%s\" can not contain a '/' character. Business object format: {%s}", partitionColumn.getName(), businessObjectFormatHelper.businessObjectFormatEntityAltKeyToString(generateDdlRequest.businessObjectFormatEntity)));
        }
    }
    // Add drop table if requested.
    if (BooleanUtils.isTrue(generateDdlRequest.includeDropTableStatement)) {
        sb.append(String.format("DROP TABLE IF EXISTS `%s`;\n\n", generateDdlRequest.tableName));
    }
    // Depending on the flag, prepare "if not exists" option text or leave it an empty string.
    String ifNotExistsOption = BooleanUtils.isTrue(generateDdlRequest.includeIfNotExistsOption) ? "IF NOT EXISTS " : "";
    // Only generate the create table DDL statement, if custom DDL was not specified.
    if (generateDdlRequest.customDdlEntity == null) {
        generateStandardBaseDdl(generateDdlRequest, sb, businessObjectFormat, ifNotExistsOption);
    } else {
        // Use the custom DDL in place of the create table statement.
        sb.append(String.format("%s\n\n", generateDdlRequest.customDdlEntity.getDdl()));
        // We need to substitute the relative custom DDL token with an actual table name.
        replacements.put(TABLE_NAME_CUSTOM_DDL_TOKEN, generateDdlRequest.tableName);
    }
    // Add alter table statements only if the list of partition filters is not empty - this is applicable to generating DDL for business object data only.
    if (!CollectionUtils.isEmpty(generateDdlRequest.partitionFilters)) {
        processPartitionFiltersForGenerateDdl(generateDdlRequest, sb, replacements, generateDdlRequest.businessObjectFormatEntity, businessObjectFormat, ifNotExistsOption);
    } else // Add a location statement with a token if this is format dll that does not use custom ddl.
    if (!generateDdlRequest.isPartitioned && generateDdlRequest.customDdlEntity == null) {
        // Since custom DDL is not specified, there are no partition values, and this table is not partitioned, add a LOCATION clause with a token.
        sb.append(String.format("LOCATION '%s';", NON_PARTITIONED_TABLE_LOCATION_CUSTOM_DDL_TOKEN));
    }
    // Trim to remove unnecessary end-of-line characters, if any, from the end of the generated DDL.
    String resultDdl = sb.toString().trim();
    // For custom DDL, substitute the relative custom DDL tokens with their values.
    if (generateDdlRequest.customDdlEntity != null) {
        for (Map.Entry<String, String> entry : replacements.entrySet()) {
            String token = entry.getKey();
            String value = entry.getValue();
            resultDdl = resultDdl.replaceAll(Pattern.quote(token), value);
        }
    }
    return resultDdl;
}
Also used : HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) ArrayListValuedHashMap(org.apache.commons.collections4.multimap.ArrayListValuedHashMap) SchemaColumn(org.finra.herd.model.api.xml.SchemaColumn) BusinessObjectFormat(org.finra.herd.model.api.xml.BusinessObjectFormat) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) ArrayListValuedHashMap(org.apache.commons.collections4.multimap.ArrayListValuedHashMap) Map(java.util.Map) MultiValuedMap(org.apache.commons.collections4.MultiValuedMap)

Example 72 with SchemaColumn

use of org.finra.herd.model.api.xml.SchemaColumn in project herd by FINRAOS.

the class Hive13DdlGenerator method generateStandardBaseDdl.

private void generateStandardBaseDdl(GenerateDdlRequest generateDdlRequest, StringBuilder sb, BusinessObjectFormat businessObjectFormat, String ifNotExistsOption) {
    // Please note that we escape table name and all column names to avoid Hive reserved words in DDL statement generation.
    sb.append(String.format("CREATE EXTERNAL TABLE %s`%s` (\n", ifNotExistsOption, generateDdlRequest.tableName));
    // Add schema columns.
    sb.append(generateDdlColumns(generateDdlRequest.businessObjectFormatEntity, businessObjectFormat));
    if (generateDdlRequest.isPartitioned) {
        // Add a partitioned by clause.
        sb.append("PARTITIONED BY (");
        // List all partition columns.
        List<String> partitionColumnDeclarations = new ArrayList<>();
        for (SchemaColumn partitionColumn : businessObjectFormat.getSchema().getPartitions()) {
            partitionColumnDeclarations.add(String.format("`%s` %s", partitionColumn.getName(), getHiveDataType(partitionColumn, generateDdlRequest.businessObjectFormatEntity)));
        }
        sb.append(StringUtils.join(partitionColumnDeclarations, ", "));
        sb.append(")\n");
    }
    // We output delimiter character, escape character, and null value only when they are defined in the business object format schema.
    sb.append("ROW FORMAT DELIMITED");
    if (!StringUtils.isEmpty(generateDdlRequest.businessObjectFormatEntity.getDelimiter())) {
        // Note that the escape character is only output when the delimiter is present.
        sb.append(String.format(" FIELDS TERMINATED BY '%s'%s", escapeSingleQuotes(getDdlCharacterValue(generateDdlRequest.businessObjectFormatEntity.getDelimiter(), true)), StringUtils.isEmpty(generateDdlRequest.businessObjectFormatEntity.getEscapeCharacter()) ? "" : String.format(" ESCAPED BY '%s'", escapeSingleQuotes(getDdlCharacterValue(generateDdlRequest.businessObjectFormatEntity.getEscapeCharacter(), true)))));
    }
    sb.append(String.format(" NULL DEFINED AS '%s'\n", escapeSingleQuotes(getDdlCharacterValue(generateDdlRequest.businessObjectFormatEntity.getNullValue()))));
    // If this table is not partitioned, then STORED AS clause will be followed by LOCATION. Otherwise, the CREATE TABLE is complete.
    sb.append(String.format("STORED AS %s%s\n", getHiveFileFormat(generateDdlRequest.businessObjectFormatEntity), generateDdlRequest.isPartitioned ? ";\n" : ""));
}
Also used : ArrayList(java.util.ArrayList) SchemaColumn(org.finra.herd.model.api.xml.SchemaColumn)

Example 73 with SchemaColumn

use of org.finra.herd.model.api.xml.SchemaColumn in project herd by FINRAOS.

the class Hive13DdlGenerator method getHivePathPattern.

/**
 * Gets a pattern to match Hive partition sub-directories.
 *
 * @param partitionColumns the list of partition columns
 *
 * @return the newly created pattern to match Hive partition sub-directories.
 */
public Pattern getHivePathPattern(List<SchemaColumn> partitionColumns) {
    StringBuilder sb = new StringBuilder(26);
    // For each partition column, add a regular expression to match "<COLUMN_NAME|COLUMN-NAME>=<VALUE>" sub-directory.
    for (SchemaColumn partitionColumn : partitionColumns) {
        String partitionColumnName = partitionColumn.getName();
        // We are using a non-capturing group for the partition column names here - this is done by adding "?:" to the beginning of a capture group.
        sb.append("\\/(?:");
        sb.append(Matcher.quoteReplacement(partitionColumnName));
        // Please note that for subpartition folder, we do support partition column names having all underscores replaced with hyphens.
        sb.append('|');
        sb.append(Matcher.quoteReplacement(partitionColumnName.replace("_", "-")));
        sb.append(")=([^/]+)");
    }
    // Add a regular expression for a trailing "/" and an optional file name.
    sb.append("\\/[^/]*");
    // We do a case-insensitive match for partition column names.
    return Pattern.compile(sb.toString(), Pattern.CASE_INSENSITIVE);
}
Also used : SchemaColumn(org.finra.herd.model.api.xml.SchemaColumn)

Example 74 with SchemaColumn

use of org.finra.herd.model.api.xml.SchemaColumn in project herd by FINRAOS.

the class StorageUnitServiceGetS3KeyPrefixTest method testGetS3KeyPrefixTrimParameters.

@Test
public void testGetS3KeyPrefixTrimParameters() {
    // Create database entities required for testing. Please note that we are not passing the flag to create a business object data entity.
    businessObjectDataServiceTestHelper.createDatabaseEntitiesForGetS3KeyPrefixTesting(false);
    // Get the test partition columns.
    List<SchemaColumn> testPartitionColumns = schemaColumnDaoTestHelper.getTestPartitionColumns();
    String testPartitionKey = testPartitionColumns.get(0).getName();
    List<SchemaColumn> testSubPartitionColumns = testPartitionColumns.subList(1, SUBPARTITION_VALUES.size() + 1);
    // Get an S3 key prefix by using input parameters with leading and trailing empty spaces.
    S3KeyPrefixInformation resultS3KeyPrefixInformation = storageUnitService.getS3KeyPrefix(new BusinessObjectDataKey(addWhitespace(NAMESPACE), addWhitespace(BDEF_NAME), addWhitespace(FORMAT_USAGE_CODE), addWhitespace(FORMAT_FILE_TYPE_CODE), FORMAT_VERSION, addWhitespace(PARTITION_VALUE), addWhitespace(SUBPARTITION_VALUES), DATA_VERSION), addWhitespace(testPartitionKey), addWhitespace(STORAGE_NAME), false);
    // Get the expected S3 key prefix value using the business object data version.
    String expectedS3KeyPrefix = getExpectedS3KeyPrefix(NAMESPACE, DATA_PROVIDER_NAME, BDEF_NAME, FORMAT_USAGE_CODE, FORMAT_FILE_TYPE_CODE, FORMAT_VERSION, testPartitionKey, PARTITION_VALUE, testSubPartitionColumns.toArray(new SchemaColumn[testSubPartitionColumns.size()]), SUBPARTITION_VALUES.toArray(new String[SUBPARTITION_VALUES.size()]), DATA_VERSION);
    // Validate the results.
    assertEquals(new S3KeyPrefixInformation(expectedS3KeyPrefix), resultS3KeyPrefixInformation);
}
Also used : SchemaColumn(org.finra.herd.model.api.xml.SchemaColumn) S3KeyPrefixInformation(org.finra.herd.model.api.xml.S3KeyPrefixInformation) BusinessObjectDataKey(org.finra.herd.model.api.xml.BusinessObjectDataKey) Test(org.junit.Test)

Example 75 with SchemaColumn

use of org.finra.herd.model.api.xml.SchemaColumn in project herd by FINRAOS.

the class StorageUnitServiceGetS3KeyPrefixTest method testGetS3KeyPrefixLowerCaseParameters.

@Test
public void testGetS3KeyPrefixLowerCaseParameters() {
    // Create database entities required for testing. Please note that we are not passing the flag to create a business object data entity.
    businessObjectDataServiceTestHelper.createDatabaseEntitiesForGetS3KeyPrefixTesting(false);
    // Get the test partition columns.
    List<SchemaColumn> testPartitionColumns = schemaColumnDaoTestHelper.getTestPartitionColumns();
    String testPartitionKey = testPartitionColumns.get(0).getName();
    List<SchemaColumn> testSubPartitionColumns = testPartitionColumns.subList(1, SUBPARTITION_VALUES.size() + 1);
    // Get an S3 key prefix using lower case input parameters (except for case-sensitive partition values).
    S3KeyPrefixInformation resultS3KeyPrefixInformation = storageUnitService.getS3KeyPrefix(new BusinessObjectDataKey(NAMESPACE.toLowerCase(), BDEF_NAME.toLowerCase(), FORMAT_USAGE_CODE.toLowerCase(), FORMAT_FILE_TYPE_CODE.toLowerCase(), FORMAT_VERSION, PARTITION_VALUE, SUBPARTITION_VALUES, DATA_VERSION), testPartitionKey.toLowerCase(), STORAGE_NAME.toLowerCase(), false);
    // Get the expected S3 key prefix value using the business object data version.
    String expectedS3KeyPrefix = getExpectedS3KeyPrefix(NAMESPACE, DATA_PROVIDER_NAME, BDEF_NAME, FORMAT_USAGE_CODE, FORMAT_FILE_TYPE_CODE, FORMAT_VERSION, testPartitionKey, PARTITION_VALUE, testSubPartitionColumns.toArray(new SchemaColumn[testSubPartitionColumns.size()]), SUBPARTITION_VALUES.toArray(new String[SUBPARTITION_VALUES.size()]), DATA_VERSION);
    // Validate the results.
    assertEquals(new S3KeyPrefixInformation(expectedS3KeyPrefix), resultS3KeyPrefixInformation);
}
Also used : SchemaColumn(org.finra.herd.model.api.xml.SchemaColumn) S3KeyPrefixInformation(org.finra.herd.model.api.xml.S3KeyPrefixInformation) BusinessObjectDataKey(org.finra.herd.model.api.xml.BusinessObjectDataKey) Test(org.junit.Test)

Aggregations

SchemaColumn (org.finra.herd.model.api.xml.SchemaColumn)98 Test (org.junit.Test)68 ArrayList (java.util.ArrayList)23 BusinessObjectDataDdlRequest (org.finra.herd.model.api.xml.BusinessObjectDataDdlRequest)22 BusinessObjectDataKey (org.finra.herd.model.api.xml.BusinessObjectDataKey)18 BusinessObjectFormatEntity (org.finra.herd.model.jpa.BusinessObjectFormatEntity)17 BusinessObjectDataDdl (org.finra.herd.model.api.xml.BusinessObjectDataDdl)16 BusinessObjectDataEntity (org.finra.herd.model.jpa.BusinessObjectDataEntity)15 BusinessObjectFormatCreateRequest (org.finra.herd.model.api.xml.BusinessObjectFormatCreateRequest)14 Attribute (org.finra.herd.model.api.xml.Attribute)12 StorageEntity (org.finra.herd.model.jpa.StorageEntity)12 BusinessObjectFormatDdlRequest (org.finra.herd.model.api.xml.BusinessObjectFormatDdlRequest)11 BusinessObjectFormatDdl (org.finra.herd.model.api.xml.BusinessObjectFormatDdl)9 StorageUnitEntity (org.finra.herd.model.jpa.StorageUnitEntity)9 S3KeyPrefixInformation (org.finra.herd.model.api.xml.S3KeyPrefixInformation)8 Schema (org.finra.herd.model.api.xml.Schema)8 BusinessObjectFormat (org.finra.herd.model.api.xml.BusinessObjectFormat)7 PartitionValueFilter (org.finra.herd.model.api.xml.PartitionValueFilter)5 HashMap (java.util.HashMap)3 LinkedHashMap (java.util.LinkedHashMap)3