use of org.finra.herd.model.api.xml.BusinessObjectDataKey in project herd by FINRAOS.
the class BusinessObjectDataHelper method createBusinessObjectDataKey.
/**
* Creates a business object data key from a business object data DTO.
*
* @param businessObjectData the business object data DTO.
*
* @return the business object data key.
*/
public BusinessObjectDataKey createBusinessObjectDataKey(BusinessObjectData businessObjectData) {
BusinessObjectDataKey businessObjectDataKey = new BusinessObjectDataKey();
businessObjectDataKey.setNamespace(businessObjectData.getNamespace());
businessObjectDataKey.setBusinessObjectDefinitionName(businessObjectData.getBusinessObjectDefinitionName());
businessObjectDataKey.setBusinessObjectFormatUsage(businessObjectData.getBusinessObjectFormatUsage());
businessObjectDataKey.setBusinessObjectFormatFileType(businessObjectData.getBusinessObjectFormatFileType());
businessObjectDataKey.setBusinessObjectFormatVersion(businessObjectData.getBusinessObjectFormatVersion());
businessObjectDataKey.setPartitionValue(businessObjectData.getPartitionValue());
businessObjectDataKey.setSubPartitionValues(businessObjectData.getSubPartitionValues());
businessObjectDataKey.setBusinessObjectDataVersion(businessObjectData.getVersion());
return businessObjectDataKey;
}
use of org.finra.herd.model.api.xml.BusinessObjectDataKey in project herd by FINRAOS.
the class BusinessObjectDataHelper method createBusinessObjectDataKeyFromEntity.
/**
* Creates a business object data key from a business object data entity.
*
* @param businessObjectDataEntity the business object data entity.
*
* @return the business object data key.
*/
public BusinessObjectDataKey createBusinessObjectDataKeyFromEntity(BusinessObjectDataEntity businessObjectDataEntity) {
BusinessObjectDataKey businessObjectDataKey = new BusinessObjectDataKey();
businessObjectDataKey.setNamespace(businessObjectDataEntity.getBusinessObjectFormat().getBusinessObjectDefinition().getNamespace().getCode());
businessObjectDataKey.setBusinessObjectDefinitionName(businessObjectDataEntity.getBusinessObjectFormat().getBusinessObjectDefinition().getName());
businessObjectDataKey.setBusinessObjectFormatUsage(businessObjectDataEntity.getBusinessObjectFormat().getUsage());
businessObjectDataKey.setBusinessObjectFormatFileType(businessObjectDataEntity.getBusinessObjectFormat().getFileType().getCode());
businessObjectDataKey.setBusinessObjectFormatVersion(businessObjectDataEntity.getBusinessObjectFormat().getBusinessObjectFormatVersion());
businessObjectDataKey.setPartitionValue(businessObjectDataEntity.getPartitionValue());
businessObjectDataKey.setSubPartitionValues(getSubPartitionValues(businessObjectDataEntity));
businessObjectDataKey.setBusinessObjectDataVersion(businessObjectDataEntity.getVersion());
return businessObjectDataKey;
}
use of org.finra.herd.model.api.xml.BusinessObjectDataKey in project herd by FINRAOS.
the class Hive13DdlGenerator method processPartitionFiltersForGenerateDdl.
/**
* Processes partition filters for DDL generation as per generate DDL request.
*
* @param generateDdlRequest the generate DDL request
* @param sb the string builder to be updated with the "alter table add partition" statements
* @param replacements the hash map of string values to be used to substitute the custom DDL tokens with their actual values
* @param businessObjectFormatEntity the business object format entity
* @param businessObjectFormat the business object format
* @param ifNotExistsOption specifies if generated DDL contains "if not exists" option
*/
private void processPartitionFiltersForGenerateDdl(GenerateDdlRequest generateDdlRequest, StringBuilder sb, HashMap<String, String> replacements, BusinessObjectFormatEntity businessObjectFormatEntity, BusinessObjectFormat businessObjectFormat, String ifNotExistsOption) {
// Get the business object format key from the entity.
BusinessObjectFormatKey businessObjectFormatKey = businessObjectFormatHelper.getBusinessObjectFormatKey(generateDdlRequest.businessObjectFormatEntity);
// Override the business object format version with the original (optional) value from the request.
businessObjectFormatKey.setBusinessObjectFormatVersion(generateDdlRequest.businessObjectFormatVersion);
// Retrieve a list of storage unit entities for the specified list of partition filters. The entities will be sorted by partition values and storages.
// For a non-partitioned table, there should only exist a single business object data entity (with partitionValue equals to "none"). We do validate that
// all specified storages are of "S3" storage platform type, so we specify S3 storage platform type in the herdDao call below, so we select storage
// units only from all S3 storages, when the specified list of storages is empty. We also specify to select only "available" storage units.
List<StorageUnitEntity> storageUnitEntities = storageUnitDao.getStorageUnitsByPartitionFiltersAndStorages(businessObjectFormatKey, generateDdlRequest.partitionFilters, generateDdlRequest.businessObjectDataVersion, BusinessObjectDataStatusEntity.VALID, generateDdlRequest.storageNames, StoragePlatformEntity.S3, null, true);
// Exclude duplicate business object data per specified list of storage names.
// If storage names are not specified, the method fails on business object data instances registered with multiple storages.
storageUnitEntities = excludeDuplicateBusinessObjectData(storageUnitEntities, generateDdlRequest.storageNames, generateDdlRequest.storageEntities);
// Build a list of matched partition filters. Please note that each request partition
// filter might result in multiple available business object data entities.
List<List<String>> matchedAvailablePartitionFilters = new ArrayList<>();
List<List<String>> availablePartitions = new ArrayList<>();
for (StorageUnitEntity storageUnitEntity : storageUnitEntities) {
BusinessObjectDataKey businessObjectDataKey = businessObjectDataHelper.getBusinessObjectDataKey(storageUnitEntity.getBusinessObjectData());
matchedAvailablePartitionFilters.add(businessObjectDataHelper.getPartitionFilter(businessObjectDataKey, generateDdlRequest.partitionFilters.get(0)));
availablePartitions.add(businessObjectDataHelper.getPrimaryAndSubPartitionValues(businessObjectDataKey));
}
// If request specifies to include all registered sub-partitions, fail if any of "non-available" registered sub-partitions are found.
if (generateDdlRequest.businessObjectDataVersion == null && BooleanUtils.isTrue(generateDdlRequest.includeAllRegisteredSubPartitions) && !CollectionUtils.isEmpty(matchedAvailablePartitionFilters)) {
notAllowNonAvailableRegisteredSubPartitions(businessObjectFormatKey, matchedAvailablePartitionFilters, availablePartitions, generateDdlRequest.storageNames);
}
// Fail on any missing business object data unless the flag is set to allow missing business object data.
if (!BooleanUtils.isTrue(generateDdlRequest.allowMissingData)) {
// Get a list of unmatched partition filters.
List<List<String>> unmatchedPartitionFilters = new ArrayList<>(generateDdlRequest.partitionFilters);
unmatchedPartitionFilters.removeAll(matchedAvailablePartitionFilters);
// Throw an exception if we have any unmatched partition filters.
if (!unmatchedPartitionFilters.isEmpty()) {
// Get the first unmatched partition filter and throw exception.
List<String> unmatchedPartitionFilter = getFirstUnmatchedPartitionFilter(unmatchedPartitionFilters);
throw new ObjectNotFoundException(String.format("Business object data {namespace: \"%s\", businessObjectDefinitionName: \"%s\", businessObjectFormatUsage: \"%s\", " + "businessObjectFormatFileType: \"%s\", businessObjectFormatVersion: %d, partitionValue: \"%s\", " + "subpartitionValues: \"%s\", businessObjectDataVersion: %d} is not available in \"%s\" storage(s).", businessObjectFormatKey.getNamespace(), businessObjectFormatKey.getBusinessObjectDefinitionName(), businessObjectFormatKey.getBusinessObjectFormatUsage(), businessObjectFormatKey.getBusinessObjectFormatFileType(), businessObjectFormatKey.getBusinessObjectFormatVersion(), unmatchedPartitionFilter.get(0), StringUtils.join(unmatchedPartitionFilter.subList(1, unmatchedPartitionFilter.size()), ","), generateDdlRequest.businessObjectDataVersion, StringUtils.join(generateDdlRequest.storageNames, ",")));
}
}
// the table is non-partitioned, and there is no business object data found.
if (generateDdlRequest.customDdlEntity == null && !generateDdlRequest.isPartitioned && CollectionUtils.isEmpty(storageUnitEntities)) {
// Add a LOCATION clause with a token.
sb.append(String.format("LOCATION '%s';", NON_PARTITIONED_TABLE_LOCATION_CUSTOM_DDL_TOKEN));
} else // The table is partitioned, custom DDL is specified, or there is at least one business object data instance found.
{
// If drop partitions flag is set and the table is partitioned, drop partitions specified by the partition filters.
if (generateDdlRequest.isPartitioned && BooleanUtils.isTrue(generateDdlRequest.includeDropPartitions)) {
// Add a drop partition statement for each partition filter entry.
for (List<String> partitionFilter : generateDdlRequest.partitionFilters) {
sb.append(String.format("ALTER TABLE `%s` DROP IF EXISTS PARTITION (", generateDdlRequest.tableName));
// Specify all partition column values as per this partition filter.
List<String> partitionKeyValuePairs = new ArrayList<>();
for (int i = 0; i < partitionFilter.size(); i++) {
if (StringUtils.isNotBlank(partitionFilter.get(i))) {
// We cannot hit ArrayIndexOutOfBoundsException on getPartitions() since partitionFilter would
// not have a value set at an index that is greater or equal than the number of partitions in the schema.
String partitionColumnName = businessObjectFormat.getSchema().getPartitions().get(i).getName();
partitionKeyValuePairs.add(String.format("`%s`='%s'", partitionColumnName, partitionFilter.get(i)));
}
}
sb.append(StringUtils.join(partitionKeyValuePairs, ", "));
sb.append(");\n");
}
sb.append('\n');
}
// Process storage unit entities.
if (!CollectionUtils.isEmpty(storageUnitEntities)) {
processStorageUnitsForGenerateDdl(generateDdlRequest, sb, replacements, businessObjectFormatEntity, businessObjectFormat, ifNotExistsOption, storageUnitEntities);
}
}
}
use of org.finra.herd.model.api.xml.BusinessObjectDataKey in project herd by FINRAOS.
the class Hive13DdlGenerator method notAllowNonAvailableRegisteredSubPartitions.
/**
* Searches for and fails on any of "non-available" registered sub-partitions as per list of "matched" partition filters.
*
* @param businessObjectFormatKey the business object format key
* @param matchedAvailablePartitionFilters the list of "matched" partition filters
* @param availablePartitions the list of already discovered "available" partitions, where each partition consists of primary and optional sub-partition
* values
* @param storageNames the list of storage names
*/
protected void notAllowNonAvailableRegisteredSubPartitions(BusinessObjectFormatKey businessObjectFormatKey, List<List<String>> matchedAvailablePartitionFilters, List<List<String>> availablePartitions, List<String> storageNames) {
// Query all matched partition filters to discover any non-available registered sub-partitions. Retrieve latest business object data per list of
// matched filters regardless of business object data and/or storage unit statuses. This is done to discover all registered sub-partitions regardless
// of business object data or storage unit statuses. We do validate that all specified storages are of "S3" storage platform type, so we specify S3
// storage platform type in the herdDao call below, so we select storage units only from all S3 storages, when the specified list of storages is empty.
// We want to select any existing storage units regardless of their status, so we pass "false" for selectOnlyAvailableStorageUnits parameter.
List<StorageUnitEntity> matchedNotAvailableStorageUnitEntities = storageUnitDao.getStorageUnitsByPartitionFiltersAndStorages(businessObjectFormatKey, matchedAvailablePartitionFilters, null, null, storageNames, StoragePlatformEntity.S3, null, false);
// Exclude all storage units with business object data having "DELETED" status.
matchedNotAvailableStorageUnitEntities = storageUnitHelper.excludeBusinessObjectDataStatus(matchedNotAvailableStorageUnitEntities, BusinessObjectDataStatusEntity.DELETED);
// Exclude all already discovered "available" partitions. Please note that, since we got here, the list of matched partitions can not be empty.
matchedNotAvailableStorageUnitEntities = storageUnitHelper.excludePartitions(matchedNotAvailableStorageUnitEntities, availablePartitions);
// Fail on any "non-available" registered sub-partitions.
if (!CollectionUtils.isEmpty(matchedNotAvailableStorageUnitEntities)) {
// Get the business object data key for the first "non-available" registered sub-partition.
BusinessObjectDataKey businessObjectDataKey = businessObjectDataHelper.getBusinessObjectDataKey(matchedNotAvailableStorageUnitEntities.get(0).getBusinessObjectData());
throw new ObjectNotFoundException(String.format("Business object data {namespace: \"%s\", businessObjectDefinitionName: \"%s\", businessObjectFormatUsage: \"%s\", " + "businessObjectFormatFileType: \"%s\", businessObjectFormatVersion: %d, partitionValue: \"%s\", " + "subpartitionValues: \"%s\", businessObjectDataVersion: %d} is not available in \"%s\" storage(s).", businessObjectFormatKey.getNamespace(), businessObjectFormatKey.getBusinessObjectDefinitionName(), businessObjectFormatKey.getBusinessObjectFormatUsage(), businessObjectFormatKey.getBusinessObjectFormatFileType(), businessObjectFormatKey.getBusinessObjectFormatVersion(), businessObjectDataKey.getPartitionValue(), StringUtils.join(businessObjectDataKey.getSubPartitionValues(), ","), businessObjectDataKey.getBusinessObjectDataVersion(), StringUtils.join(storageNames, ",")));
}
}
use of org.finra.herd.model.api.xml.BusinessObjectDataKey in project herd by FINRAOS.
the class Hive13DdlGenerator method processStorageUnitsForGenerateDdl.
/**
* Adds the relative "alter table add partition" statements for each storage unit entity. Please note that each request partition value might result in
* multiple available storage unit entities (subpartitions).
*
* @param sb the string builder to be updated with the "alter table add partition" statements
* @param replacements the hash map of string values to be used to substitute the custom DDL tokens with their actual values
* @param businessObjectFormatEntity the business object format entity
* @param businessObjectFormat the business object format
* @param ifNotExistsOption specifies if generated DDL contains "if not exists" option
* @param storageUnitEntities the list of storage unit entities
*/
private void processStorageUnitsForGenerateDdl(GenerateDdlRequest generateDdlRequest, StringBuilder sb, HashMap<String, String> replacements, BusinessObjectFormatEntity businessObjectFormatEntity, BusinessObjectFormat businessObjectFormat, String ifNotExistsOption, List<StorageUnitEntity> storageUnitEntities) {
// If flag is not set to suppress scan for unregistered sub-partitions, retrieve all storage
// file paths for the relative storage units loaded in a multi-valued map for easy access.
MultiValuedMap<Integer, String> storageUnitIdToStorageFilePathsMap = BooleanUtils.isTrue(generateDdlRequest.suppressScanForUnregisteredSubPartitions) ? new ArrayListValuedHashMap<>() : storageFileDao.getStorageFilePathsByStorageUnitIds(storageUnitHelper.getStorageUnitIds(storageUnitEntities));
// Process all available business object data instances.
for (StorageUnitEntity storageUnitEntity : storageUnitEntities) {
// Get business object data key and S3 key prefix for this business object data.
BusinessObjectDataKey businessObjectDataKey = businessObjectDataHelper.getBusinessObjectDataKey(storageUnitEntity.getBusinessObjectData());
String s3KeyPrefix = s3KeyPrefixHelper.buildS3KeyPrefix(storageUnitEntity.getStorage(), storageUnitEntity.getBusinessObjectData().getBusinessObjectFormat(), businessObjectDataKey);
// If flag is set to suppress scan for unregistered sub-partitions, use the directory path or the S3 key prefix
// as the partition's location, otherwise, use storage files to discover all unregistered sub-partitions.
Collection<String> storageFilePaths = new ArrayList<>();
if (BooleanUtils.isTrue(generateDdlRequest.suppressScanForUnregisteredSubPartitions)) {
// Validate the directory path value if it is present.
if (storageUnitEntity.getDirectoryPath() != null) {
Assert.isTrue(storageUnitEntity.getDirectoryPath().equals(s3KeyPrefix), String.format("Storage directory path \"%s\" registered with business object data {%s} " + "in \"%s\" storage does not match the expected S3 key prefix \"%s\".", storageUnitEntity.getDirectoryPath(), businessObjectDataHelper.businessObjectDataEntityAltKeyToString(storageUnitEntity.getBusinessObjectData()), storageUnitEntity.getStorage().getName(), s3KeyPrefix));
}
// Add the S3 key prefix to the list of storage files.
// We add a trailing '/' character to the prefix, since it represents a directory.
storageFilePaths.add(StringUtils.appendIfMissing(s3KeyPrefix, "/"));
} else {
// Retrieve storage file paths registered with this business object data in the specified storage.
storageFilePaths = storageUnitIdToStorageFilePathsMap.containsKey(storageUnitEntity.getId()) ? storageUnitIdToStorageFilePathsMap.get(storageUnitEntity.getId()) : new ArrayList<>();
// Validate storage file paths registered with this business object data in the specified storage.
// The validation check below is required even if we have no storage files registered.
storageFileHelper.validateStorageFilePaths(storageFilePaths, s3KeyPrefix, storageUnitEntity.getBusinessObjectData(), storageUnitEntity.getStorage().getName());
// If there are no storage files registered for this storage unit, we should use the storage directory path value.
if (storageFilePaths.isEmpty()) {
// Validate that directory path value is present and it matches the S3 key prefix.
Assert.isTrue(storageUnitEntity.getDirectoryPath() != null && storageUnitEntity.getDirectoryPath().startsWith(s3KeyPrefix), String.format("Storage directory path \"%s\" registered with business object data {%s} " + "in \"%s\" storage does not match the expected S3 key prefix \"%s\".", storageUnitEntity.getDirectoryPath(), businessObjectDataHelper.businessObjectDataEntityAltKeyToString(storageUnitEntity.getBusinessObjectData()), storageUnitEntity.getStorage().getName(), s3KeyPrefix));
// Add storage directory path the empty storage files list.
// We add a trailing '/' character to the path, since it represents a directory.
storageFilePaths.add(storageUnitEntity.getDirectoryPath() + "/");
}
}
// Retrieve the s3 bucket name.
String s3BucketName = getS3BucketName(storageUnitEntity.getStorage(), generateDdlRequest.s3BucketNames);
// For partitioned table, add the relative partitions to the generated DDL.
if (generateDdlRequest.isPartitioned) {
// the business object data equals to the number of partition columns defined in schema for the format selected for DDL generation.
if (BooleanUtils.isTrue(generateDdlRequest.suppressScanForUnregisteredSubPartitions)) {
int businessObjectDataRegisteredPartitions = 1 + CollectionUtils.size(businessObjectDataKey.getSubPartitionValues());
Assert.isTrue(businessObjectFormat.getSchema().getPartitions().size() == businessObjectDataRegisteredPartitions, String.format("Number of primary and sub-partition values (%d) specified for the business object data is not equal to " + "the number of partition columns (%d) defined in the schema of the business object format selected for DDL generation. " + "Business object data: {%s}, business object format: {%s}", businessObjectDataRegisteredPartitions, businessObjectFormat.getSchema().getPartitions().size(), businessObjectDataHelper.businessObjectDataKeyToString(businessObjectDataKey), businessObjectFormatHelper.businessObjectFormatEntityAltKeyToString(businessObjectFormatEntity)));
} else // Otherwise, since the format version selected for DDL generation might not match the relative business object format version that business
// object data is registered against, validate that the number of sub-partition values specified for the business object data is less than
// the number of partition columns defined in schema for the format selected for DDL generation.
{
Assert.isTrue(businessObjectFormat.getSchema().getPartitions().size() > CollectionUtils.size(businessObjectDataKey.getSubPartitionValues()), String.format("Number of subpartition values specified for the business object data is greater than or equal to " + "the number of partition columns defined in the schema of the business object format selected for DDL generation. " + "Business object data: {%s}, business object format: {%s}", businessObjectDataHelper.businessObjectDataKeyToString(businessObjectDataKey), businessObjectFormatHelper.businessObjectFormatEntityAltKeyToString(businessObjectFormatEntity)));
}
// Get partition information. For multiple level partitioning, auto-discover subpartitions (subdirectories) not already included into the S3 key
// prefix. Each discovered partition requires a standalone "add partition" clause. Please note that due to the above validation check, there
// should be no auto discoverable sub-partition columns, when flag is set to suppress scan for unregistered sub-partitions.
List<SchemaColumn> autoDiscoverableSubPartitionColumns = businessObjectFormat.getSchema().getPartitions().subList(1 + CollectionUtils.size(businessObjectDataKey.getSubPartitionValues()), businessObjectFormat.getSchema().getPartitions().size());
for (HivePartitionDto hivePartition : getHivePartitions(businessObjectDataKey, autoDiscoverableSubPartitionColumns, s3KeyPrefix, storageFilePaths, storageUnitEntity.getBusinessObjectData(), storageUnitEntity.getStorage().getName())) {
sb.append(String.format("ALTER TABLE `%s` ADD %sPARTITION (", generateDdlRequest.tableName, ifNotExistsOption));
// Specify all partition column values.
List<String> partitionKeyValuePairs = new ArrayList<>();
for (int i = 0; i < businessObjectFormat.getSchema().getPartitions().size(); i++) {
String partitionColumnName = businessObjectFormat.getSchema().getPartitions().get(i).getName();
String partitionValue = hivePartition.getPartitionValues().get(i);
partitionKeyValuePairs.add(String.format("`%s`='%s'", partitionColumnName, partitionValue));
}
sb.append(StringUtils.join(partitionKeyValuePairs, ", "));
sb.append(String.format(") LOCATION 's3n://%s/%s%s';\n", s3BucketName, s3KeyPrefix, StringUtils.isNotBlank(hivePartition.getPath()) ? hivePartition.getPath() : ""));
}
} else // This is a non-partitioned table.
{
// Get location for this non-partitioned table.
String tableLocation = String.format("s3n://%s/%s", s3BucketName, s3KeyPrefix);
if (generateDdlRequest.customDdlEntity == null) {
// Since custom DDL was not specified and this table is not partitioned, add a LOCATION clause.
// This is the last line in the non-partitioned table DDL.
sb.append(String.format("LOCATION '%s';", tableLocation));
} else {
// Since custom DDL was used for a non-partitioned table, substitute the relative custom DDL token with the actual table location.
replacements.put(NON_PARTITIONED_TABLE_LOCATION_CUSTOM_DDL_TOKEN, tableLocation);
}
}
}
}
Aggregations