use of org.finra.herd.model.jpa.StorageUnitEntity in project herd by FINRAOS.
the class BusinessObjectDataDaoHelper method createStorageUnitEntitiesFromStorageUnits.
/**
* Creates a list of storage unit entities from a list of storage unit create requests.
*
* @param storageUnitCreateRequests the storage unit create requests
* @param businessObjectDataEntity the business object data entity
*
* @return the list of storage unit entities.
*/
private List<StorageUnitEntity> createStorageUnitEntitiesFromStorageUnits(List<StorageUnitCreateRequest> storageUnitCreateRequests, BusinessObjectDataEntity businessObjectDataEntity) {
// Create the storage units for the data.
List<StorageUnitEntity> storageUnitEntities = new ArrayList<>();
for (StorageUnitCreateRequest storageUnit : storageUnitCreateRequests) {
// Get the storage entity per request and verify that it exists.
StorageEntity storageEntity = storageDaoHelper.getStorageEntity(storageUnit.getStorageName());
// Create storage unit and add it to the result list.
storageUnitEntities.add(createStorageUnitEntity(businessObjectDataEntity, storageEntity, storageUnit.getStorageDirectory(), storageUnit.getStorageFiles(), storageUnit.isDiscoverStorageFiles()));
}
return storageUnitEntities;
}
use of org.finra.herd.model.jpa.StorageUnitEntity in project herd by FINRAOS.
the class BusinessObjectDataRetryStoragePolicyTransitionHelper method retryStoragePolicyTransition.
/**
* Executes a retry of the storage policy transition and return the business object data information.
*
* @param businessObjectDataKey the business object data key
* @param request the information needed to retry a storage policy transition
*
* @return the business object data information
*/
public BusinessObjectData retryStoragePolicyTransition(BusinessObjectDataKey businessObjectDataKey, BusinessObjectDataRetryStoragePolicyTransitionRequest request) {
// Validate and trim the business object data key.
businessObjectDataHelper.validateBusinessObjectDataKey(businessObjectDataKey, true, true);
// Validate and trim the request.
validateBusinessObjectDataRetryStoragePolicyTransitionRequest(request);
// Retrieve the business object data and ensure it exists.
BusinessObjectDataEntity businessObjectDataEntity = businessObjectDataDaoHelper.getBusinessObjectDataEntity(businessObjectDataKey);
// Retrieve and ensure that a storage policy exists with the specified key.
StoragePolicyEntity storagePolicyEntity = storagePolicyDaoHelper.getStoragePolicyEntityByKey(request.getStoragePolicyKey());
// Validate that storage policy filter matches this business object data, except for the storage.
Assert.isTrue((storagePolicyEntity.getBusinessObjectDefinition() == null || storagePolicyEntity.getBusinessObjectDefinition().equals(businessObjectDataEntity.getBusinessObjectFormat().getBusinessObjectDefinition())) && (StringUtils.isBlank(storagePolicyEntity.getUsage()) || storagePolicyEntity.getUsage().equalsIgnoreCase(businessObjectDataEntity.getBusinessObjectFormat().getUsage())) && (storagePolicyEntity.getFileType() == null || storagePolicyEntity.getFileType().equals(businessObjectDataEntity.getBusinessObjectFormat().getFileType())), String.format("Business object data does not match storage policy filter. " + "Storage policy: {%s}, business object data: {%s}", storagePolicyHelper.storagePolicyKeyAndVersionToString(request.getStoragePolicyKey(), storagePolicyEntity.getVersion()), businessObjectDataHelper.businessObjectDataEntityAltKeyToString(businessObjectDataEntity)));
// Validate the storage policy filter storage.
storagePolicyDaoHelper.validateStoragePolicyFilterStorage(storagePolicyEntity.getStorage());
// Retrieve and validate a storage unit for this business object data.
StorageUnitEntity storageUnitEntity = getStorageUnit(businessObjectDataEntity, storagePolicyEntity.getStorage());
// Get S3 key prefix for this business object data.
String s3KeyPrefix = s3KeyPrefixHelper.buildS3KeyPrefix(storagePolicyEntity.getStorage(), storageUnitEntity.getBusinessObjectData().getBusinessObjectFormat(), businessObjectDataKey);
// Retrieve storage files registered with this business object data in the storage.
int storageFilesCount = storageUnitEntity.getStorageFiles().size();
// Validate that we have storage files registered in the storage.
Assert.isTrue(storageFilesCount > 0, String.format("Business object data has no storage files registered in \"%s\" storage. Business object data: {%s}", storageUnitEntity.getStorage().getName(), businessObjectDataHelper.businessObjectDataKeyToString(businessObjectDataKey)));
// Retrieve all registered storage files from the storage that start with the S3 key prefix.
// Since the S3 key prefix represents a directory, we add a trailing '/' character to it.
String s3KeyPrefixWithTrailingSlash = StringUtils.appendIfMissing(s3KeyPrefix, "/");
Long registeredStorageFilesMatchingS3KeyPrefixCount = storageFileDao.getStorageFileCount(storageUnitEntity.getStorage().getName(), s3KeyPrefixWithTrailingSlash);
// Sanity check for the S3 key prefix.
if (registeredStorageFilesMatchingS3KeyPrefixCount.intValue() != storageFilesCount) {
throw new IllegalArgumentException(String.format("Number of storage files (%d) registered for the business object data in \"%s\" storage is not equal to " + "the number of registered storage files (%d) matching \"%s\" S3 key prefix in the same storage. Business object data: {%s}", storageFilesCount, storageUnitEntity.getStorage().getName(), registeredStorageFilesMatchingS3KeyPrefixCount, s3KeyPrefixWithTrailingSlash, businessObjectDataHelper.businessObjectDataKeyToString(businessObjectDataKey)));
}
// Get the SQS queue name from the system configuration.
String sqsQueueName = configurationHelper.getProperty(ConfigurationValue.STORAGE_POLICY_SELECTOR_JOB_SQS_QUEUE_NAME);
// Throw IllegalStateException if SQS queue name is not defined.
if (StringUtils.isBlank(sqsQueueName)) {
throw new IllegalStateException(String.format("SQS queue name not found. Ensure the \"%s\" configuration entry is configured.", ConfigurationValue.STORAGE_POLICY_SELECTOR_JOB_SQS_QUEUE_NAME.getKey()));
}
// Create a storage policy selection.
StoragePolicySelection storagePolicySelection = new StoragePolicySelection(businessObjectDataKey, storagePolicyHelper.getStoragePolicyKey(storagePolicyEntity), storagePolicyEntity.getVersion());
// Executes SQS specific steps needed to retry a storage policy transition.
sendStoragePolicySelectionSqsMessage(sqsQueueName, storagePolicySelection);
// Create and return the business object data object from the entity.
return businessObjectDataHelper.createBusinessObjectDataFromEntity(businessObjectDataEntity);
}
use of org.finra.herd.model.jpa.StorageUnitEntity in project herd by FINRAOS.
the class Hive13DdlGenerator method processPartitionFiltersForGenerateDdl.
/**
* Processes partition filters for DDL generation as per generate DDL request.
*
* @param generateDdlRequest the generate DDL request
* @param sb the string builder to be updated with the "alter table add partition" statements
* @param replacements the hash map of string values to be used to substitute the custom DDL tokens with their actual values
* @param businessObjectFormatEntity the business object format entity
* @param businessObjectFormat the business object format
* @param ifNotExistsOption specifies if generated DDL contains "if not exists" option
*/
private void processPartitionFiltersForGenerateDdl(GenerateDdlRequest generateDdlRequest, StringBuilder sb, HashMap<String, String> replacements, BusinessObjectFormatEntity businessObjectFormatEntity, BusinessObjectFormat businessObjectFormat, String ifNotExistsOption) {
// Get the business object format key from the entity.
BusinessObjectFormatKey businessObjectFormatKey = businessObjectFormatHelper.getBusinessObjectFormatKey(generateDdlRequest.businessObjectFormatEntity);
// Override the business object format version with the original (optional) value from the request.
businessObjectFormatKey.setBusinessObjectFormatVersion(generateDdlRequest.businessObjectFormatVersion);
// Retrieve a list of storage unit entities for the specified list of partition filters. The entities will be sorted by partition values and storages.
// For a non-partitioned table, there should only exist a single business object data entity (with partitionValue equals to "none"). We do validate that
// all specified storages are of "S3" storage platform type, so we specify S3 storage platform type in the herdDao call below, so we select storage
// units only from all S3 storages, when the specified list of storages is empty. We also specify to select only "available" storage units.
List<StorageUnitEntity> storageUnitEntities = storageUnitDao.getStorageUnitsByPartitionFiltersAndStorages(businessObjectFormatKey, generateDdlRequest.partitionFilters, generateDdlRequest.businessObjectDataVersion, BusinessObjectDataStatusEntity.VALID, generateDdlRequest.storageNames, StoragePlatformEntity.S3, null, true);
// Exclude duplicate business object data per specified list of storage names.
// If storage names are not specified, the method fails on business object data instances registered with multiple storages.
storageUnitEntities = excludeDuplicateBusinessObjectData(storageUnitEntities, generateDdlRequest.storageNames, generateDdlRequest.storageEntities);
// Build a list of matched partition filters. Please note that each request partition
// filter might result in multiple available business object data entities.
List<List<String>> matchedAvailablePartitionFilters = new ArrayList<>();
List<List<String>> availablePartitions = new ArrayList<>();
for (StorageUnitEntity storageUnitEntity : storageUnitEntities) {
BusinessObjectDataKey businessObjectDataKey = businessObjectDataHelper.getBusinessObjectDataKey(storageUnitEntity.getBusinessObjectData());
matchedAvailablePartitionFilters.add(businessObjectDataHelper.getPartitionFilter(businessObjectDataKey, generateDdlRequest.partitionFilters.get(0)));
availablePartitions.add(businessObjectDataHelper.getPrimaryAndSubPartitionValues(businessObjectDataKey));
}
// If request specifies to include all registered sub-partitions, fail if any of "non-available" registered sub-partitions are found.
if (generateDdlRequest.businessObjectDataVersion == null && BooleanUtils.isTrue(generateDdlRequest.includeAllRegisteredSubPartitions) && !CollectionUtils.isEmpty(matchedAvailablePartitionFilters)) {
notAllowNonAvailableRegisteredSubPartitions(businessObjectFormatKey, matchedAvailablePartitionFilters, availablePartitions, generateDdlRequest.storageNames);
}
// Fail on any missing business object data unless the flag is set to allow missing business object data.
if (!BooleanUtils.isTrue(generateDdlRequest.allowMissingData)) {
// Get a list of unmatched partition filters.
List<List<String>> unmatchedPartitionFilters = new ArrayList<>(generateDdlRequest.partitionFilters);
unmatchedPartitionFilters.removeAll(matchedAvailablePartitionFilters);
// Throw an exception if we have any unmatched partition filters.
if (!unmatchedPartitionFilters.isEmpty()) {
// Get the first unmatched partition filter and throw exception.
List<String> unmatchedPartitionFilter = getFirstUnmatchedPartitionFilter(unmatchedPartitionFilters);
throw new ObjectNotFoundException(String.format("Business object data {namespace: \"%s\", businessObjectDefinitionName: \"%s\", businessObjectFormatUsage: \"%s\", " + "businessObjectFormatFileType: \"%s\", businessObjectFormatVersion: %d, partitionValue: \"%s\", " + "subpartitionValues: \"%s\", businessObjectDataVersion: %d} is not available in \"%s\" storage(s).", businessObjectFormatKey.getNamespace(), businessObjectFormatKey.getBusinessObjectDefinitionName(), businessObjectFormatKey.getBusinessObjectFormatUsage(), businessObjectFormatKey.getBusinessObjectFormatFileType(), businessObjectFormatKey.getBusinessObjectFormatVersion(), unmatchedPartitionFilter.get(0), StringUtils.join(unmatchedPartitionFilter.subList(1, unmatchedPartitionFilter.size()), ","), generateDdlRequest.businessObjectDataVersion, StringUtils.join(generateDdlRequest.storageNames, ",")));
}
}
// the table is non-partitioned, and there is no business object data found.
if (generateDdlRequest.customDdlEntity == null && !generateDdlRequest.isPartitioned && CollectionUtils.isEmpty(storageUnitEntities)) {
// Add a LOCATION clause with a token.
sb.append(String.format("LOCATION '%s';", NON_PARTITIONED_TABLE_LOCATION_CUSTOM_DDL_TOKEN));
} else // The table is partitioned, custom DDL is specified, or there is at least one business object data instance found.
{
// If drop partitions flag is set and the table is partitioned, drop partitions specified by the partition filters.
if (generateDdlRequest.isPartitioned && BooleanUtils.isTrue(generateDdlRequest.includeDropPartitions)) {
// Add a drop partition statement for each partition filter entry.
for (List<String> partitionFilter : generateDdlRequest.partitionFilters) {
sb.append(String.format("ALTER TABLE `%s` DROP IF EXISTS PARTITION (", generateDdlRequest.tableName));
// Specify all partition column values as per this partition filter.
List<String> partitionKeyValuePairs = new ArrayList<>();
for (int i = 0; i < partitionFilter.size(); i++) {
if (StringUtils.isNotBlank(partitionFilter.get(i))) {
// We cannot hit ArrayIndexOutOfBoundsException on getPartitions() since partitionFilter would
// not have a value set at an index that is greater or equal than the number of partitions in the schema.
String partitionColumnName = businessObjectFormat.getSchema().getPartitions().get(i).getName();
partitionKeyValuePairs.add(String.format("`%s`='%s'", partitionColumnName, partitionFilter.get(i)));
}
}
sb.append(StringUtils.join(partitionKeyValuePairs, ", "));
sb.append(");\n");
}
sb.append('\n');
}
// Process storage unit entities.
if (!CollectionUtils.isEmpty(storageUnitEntities)) {
processStorageUnitsForGenerateDdl(generateDdlRequest, sb, replacements, businessObjectFormatEntity, businessObjectFormat, ifNotExistsOption, storageUnitEntities);
}
}
}
use of org.finra.herd.model.jpa.StorageUnitEntity in project herd by FINRAOS.
the class Hive13DdlGenerator method excludeDuplicateBusinessObjectData.
/**
* Eliminate storage units that belong to the same business object data by picking storage unit registered in a storage listed earlier in the list of
* storage names specified in the request. If storage names are not specified, simply fail on business object data instances registered with multiple
* storages.
*
* @param storageUnitEntities the list of storage unit entities
* @param storageNames the list of storage names
* @param storageEntities the list of storage entities
*
* @return the updated list of storage unit entities
* @throws IllegalArgumentException on business object data being registered in multiple storages and storage names are not specified to resolve this
*/
protected List<StorageUnitEntity> excludeDuplicateBusinessObjectData(List<StorageUnitEntity> storageUnitEntities, List<String> storageNames, List<StorageEntity> storageEntities) throws IllegalArgumentException {
// If storage names are not specified, fail on business object data instances registered with multiple storages.
// Otherwise, in a case when the same business object data is registered with multiple storages,
// pick storage unit registered in a storage listed earlier in the list of storage names specified in the request.
Map<BusinessObjectDataEntity, StorageUnitEntity> businessObjectDataToStorageUnitMap = new LinkedHashMap<>();
for (StorageUnitEntity storageUnitEntity : storageUnitEntities) {
BusinessObjectDataEntity businessObjectDataEntity = storageUnitEntity.getBusinessObjectData();
if (businessObjectDataToStorageUnitMap.containsKey(businessObjectDataEntity)) {
// Duplicate business object data is found, so check if storage names are specified.
if (CollectionUtils.isEmpty(storageNames)) {
// Fail on business object data registered in multiple storages.
throw new IllegalArgumentException(String.format("Found business object data registered in more than one storage. " + "Please specify storage(s) in the request to resolve this. Business object data {%s}", businessObjectDataHelper.businessObjectDataEntityAltKeyToString(businessObjectDataEntity)));
} else {
// Replace the storage unit entity if it belongs to a "higher priority" storage.
StorageEntity currentStorageEntity = businessObjectDataToStorageUnitMap.get(businessObjectDataEntity).getStorage();
int currentStorageIndex = storageEntities.indexOf(currentStorageEntity);
int newStorageIndex = storageEntities.indexOf(storageUnitEntity.getStorage());
if (newStorageIndex < currentStorageIndex) {
businessObjectDataToStorageUnitMap.put(storageUnitEntity.getBusinessObjectData(), storageUnitEntity);
}
}
} else {
businessObjectDataToStorageUnitMap.put(storageUnitEntity.getBusinessObjectData(), storageUnitEntity);
}
}
return new ArrayList<>(businessObjectDataToStorageUnitMap.values());
}
use of org.finra.herd.model.jpa.StorageUnitEntity in project herd by FINRAOS.
the class Hive13DdlGenerator method notAllowNonAvailableRegisteredSubPartitions.
/**
* Searches for and fails on any of "non-available" registered sub-partitions as per list of "matched" partition filters.
*
* @param businessObjectFormatKey the business object format key
* @param matchedAvailablePartitionFilters the list of "matched" partition filters
* @param availablePartitions the list of already discovered "available" partitions, where each partition consists of primary and optional sub-partition
* values
* @param storageNames the list of storage names
*/
protected void notAllowNonAvailableRegisteredSubPartitions(BusinessObjectFormatKey businessObjectFormatKey, List<List<String>> matchedAvailablePartitionFilters, List<List<String>> availablePartitions, List<String> storageNames) {
// Query all matched partition filters to discover any non-available registered sub-partitions. Retrieve latest business object data per list of
// matched filters regardless of business object data and/or storage unit statuses. This is done to discover all registered sub-partitions regardless
// of business object data or storage unit statuses. We do validate that all specified storages are of "S3" storage platform type, so we specify S3
// storage platform type in the herdDao call below, so we select storage units only from all S3 storages, when the specified list of storages is empty.
// We want to select any existing storage units regardless of their status, so we pass "false" for selectOnlyAvailableStorageUnits parameter.
List<StorageUnitEntity> matchedNotAvailableStorageUnitEntities = storageUnitDao.getStorageUnitsByPartitionFiltersAndStorages(businessObjectFormatKey, matchedAvailablePartitionFilters, null, null, storageNames, StoragePlatformEntity.S3, null, false);
// Exclude all storage units with business object data having "DELETED" status.
matchedNotAvailableStorageUnitEntities = storageUnitHelper.excludeBusinessObjectDataStatus(matchedNotAvailableStorageUnitEntities, BusinessObjectDataStatusEntity.DELETED);
// Exclude all already discovered "available" partitions. Please note that, since we got here, the list of matched partitions can not be empty.
matchedNotAvailableStorageUnitEntities = storageUnitHelper.excludePartitions(matchedNotAvailableStorageUnitEntities, availablePartitions);
// Fail on any "non-available" registered sub-partitions.
if (!CollectionUtils.isEmpty(matchedNotAvailableStorageUnitEntities)) {
// Get the business object data key for the first "non-available" registered sub-partition.
BusinessObjectDataKey businessObjectDataKey = businessObjectDataHelper.getBusinessObjectDataKey(matchedNotAvailableStorageUnitEntities.get(0).getBusinessObjectData());
throw new ObjectNotFoundException(String.format("Business object data {namespace: \"%s\", businessObjectDefinitionName: \"%s\", businessObjectFormatUsage: \"%s\", " + "businessObjectFormatFileType: \"%s\", businessObjectFormatVersion: %d, partitionValue: \"%s\", " + "subpartitionValues: \"%s\", businessObjectDataVersion: %d} is not available in \"%s\" storage(s).", businessObjectFormatKey.getNamespace(), businessObjectFormatKey.getBusinessObjectDefinitionName(), businessObjectFormatKey.getBusinessObjectFormatUsage(), businessObjectFormatKey.getBusinessObjectFormatFileType(), businessObjectFormatKey.getBusinessObjectFormatVersion(), businessObjectDataKey.getPartitionValue(), StringUtils.join(businessObjectDataKey.getSubPartitionValues(), ","), businessObjectDataKey.getBusinessObjectDataVersion(), StringUtils.join(storageNames, ",")));
}
}
Aggregations