Search in sources :

Example 1 with StorageUnitEntity

use of org.finra.herd.model.jpa.StorageUnitEntity in project herd by FINRAOS.

the class BusinessObjectDataDaoHelper method createStorageUnitEntitiesFromStorageUnits.

/**
 * Creates a list of storage unit entities from a list of storage unit create requests.
 *
 * @param storageUnitCreateRequests the storage unit create requests
 * @param businessObjectDataEntity the business object data entity
 *
 * @return the list of storage unit entities.
 */
private List<StorageUnitEntity> createStorageUnitEntitiesFromStorageUnits(List<StorageUnitCreateRequest> storageUnitCreateRequests, BusinessObjectDataEntity businessObjectDataEntity) {
    // Create the storage units for the data.
    List<StorageUnitEntity> storageUnitEntities = new ArrayList<>();
    for (StorageUnitCreateRequest storageUnit : storageUnitCreateRequests) {
        // Get the storage entity per request and verify that it exists.
        StorageEntity storageEntity = storageDaoHelper.getStorageEntity(storageUnit.getStorageName());
        // Create storage unit and add it to the result list.
        storageUnitEntities.add(createStorageUnitEntity(businessObjectDataEntity, storageEntity, storageUnit.getStorageDirectory(), storageUnit.getStorageFiles(), storageUnit.isDiscoverStorageFiles()));
    }
    return storageUnitEntities;
}
Also used : StorageUnitEntity(org.finra.herd.model.jpa.StorageUnitEntity) ArrayList(java.util.ArrayList) StorageEntity(org.finra.herd.model.jpa.StorageEntity) StorageUnitCreateRequest(org.finra.herd.model.api.xml.StorageUnitCreateRequest)

Example 2 with StorageUnitEntity

use of org.finra.herd.model.jpa.StorageUnitEntity in project herd by FINRAOS.

the class BusinessObjectDataRetryStoragePolicyTransitionHelper method retryStoragePolicyTransition.

/**
 * Executes a retry of the storage policy transition and return the business object data information.
 *
 * @param businessObjectDataKey the business object data key
 * @param request the information needed to retry a storage policy transition
 *
 * @return the business object data information
 */
public BusinessObjectData retryStoragePolicyTransition(BusinessObjectDataKey businessObjectDataKey, BusinessObjectDataRetryStoragePolicyTransitionRequest request) {
    // Validate and trim the business object data key.
    businessObjectDataHelper.validateBusinessObjectDataKey(businessObjectDataKey, true, true);
    // Validate and trim the request.
    validateBusinessObjectDataRetryStoragePolicyTransitionRequest(request);
    // Retrieve the business object data and ensure it exists.
    BusinessObjectDataEntity businessObjectDataEntity = businessObjectDataDaoHelper.getBusinessObjectDataEntity(businessObjectDataKey);
    // Retrieve and ensure that a storage policy exists with the specified key.
    StoragePolicyEntity storagePolicyEntity = storagePolicyDaoHelper.getStoragePolicyEntityByKey(request.getStoragePolicyKey());
    // Validate that storage policy filter matches this business object data, except for the storage.
    Assert.isTrue((storagePolicyEntity.getBusinessObjectDefinition() == null || storagePolicyEntity.getBusinessObjectDefinition().equals(businessObjectDataEntity.getBusinessObjectFormat().getBusinessObjectDefinition())) && (StringUtils.isBlank(storagePolicyEntity.getUsage()) || storagePolicyEntity.getUsage().equalsIgnoreCase(businessObjectDataEntity.getBusinessObjectFormat().getUsage())) && (storagePolicyEntity.getFileType() == null || storagePolicyEntity.getFileType().equals(businessObjectDataEntity.getBusinessObjectFormat().getFileType())), String.format("Business object data does not match storage policy filter. " + "Storage policy: {%s}, business object data: {%s}", storagePolicyHelper.storagePolicyKeyAndVersionToString(request.getStoragePolicyKey(), storagePolicyEntity.getVersion()), businessObjectDataHelper.businessObjectDataEntityAltKeyToString(businessObjectDataEntity)));
    // Validate the storage policy filter storage.
    storagePolicyDaoHelper.validateStoragePolicyFilterStorage(storagePolicyEntity.getStorage());
    // Retrieve and validate a storage unit for this business object data.
    StorageUnitEntity storageUnitEntity = getStorageUnit(businessObjectDataEntity, storagePolicyEntity.getStorage());
    // Get S3 key prefix for this business object data.
    String s3KeyPrefix = s3KeyPrefixHelper.buildS3KeyPrefix(storagePolicyEntity.getStorage(), storageUnitEntity.getBusinessObjectData().getBusinessObjectFormat(), businessObjectDataKey);
    // Retrieve storage files registered with this business object data in the  storage.
    int storageFilesCount = storageUnitEntity.getStorageFiles().size();
    // Validate that we have storage files registered in the storage.
    Assert.isTrue(storageFilesCount > 0, String.format("Business object data has no storage files registered in \"%s\" storage. Business object data: {%s}", storageUnitEntity.getStorage().getName(), businessObjectDataHelper.businessObjectDataKeyToString(businessObjectDataKey)));
    // Retrieve all registered storage files from the storage that start with the S3 key prefix.
    // Since the S3 key prefix represents a directory, we add a trailing '/' character to it.
    String s3KeyPrefixWithTrailingSlash = StringUtils.appendIfMissing(s3KeyPrefix, "/");
    Long registeredStorageFilesMatchingS3KeyPrefixCount = storageFileDao.getStorageFileCount(storageUnitEntity.getStorage().getName(), s3KeyPrefixWithTrailingSlash);
    // Sanity check for the S3 key prefix.
    if (registeredStorageFilesMatchingS3KeyPrefixCount.intValue() != storageFilesCount) {
        throw new IllegalArgumentException(String.format("Number of storage files (%d) registered for the business object data in \"%s\" storage is not equal to " + "the number of registered storage files (%d) matching \"%s\" S3 key prefix in the same storage. Business object data: {%s}", storageFilesCount, storageUnitEntity.getStorage().getName(), registeredStorageFilesMatchingS3KeyPrefixCount, s3KeyPrefixWithTrailingSlash, businessObjectDataHelper.businessObjectDataKeyToString(businessObjectDataKey)));
    }
    // Get the SQS queue name from the system configuration.
    String sqsQueueName = configurationHelper.getProperty(ConfigurationValue.STORAGE_POLICY_SELECTOR_JOB_SQS_QUEUE_NAME);
    // Throw IllegalStateException if SQS queue name is not defined.
    if (StringUtils.isBlank(sqsQueueName)) {
        throw new IllegalStateException(String.format("SQS queue name not found. Ensure the \"%s\" configuration entry is configured.", ConfigurationValue.STORAGE_POLICY_SELECTOR_JOB_SQS_QUEUE_NAME.getKey()));
    }
    // Create a storage policy selection.
    StoragePolicySelection storagePolicySelection = new StoragePolicySelection(businessObjectDataKey, storagePolicyHelper.getStoragePolicyKey(storagePolicyEntity), storagePolicyEntity.getVersion());
    // Executes SQS specific steps needed to retry a storage policy transition.
    sendStoragePolicySelectionSqsMessage(sqsQueueName, storagePolicySelection);
    // Create and return the business object data object from the entity.
    return businessObjectDataHelper.createBusinessObjectDataFromEntity(businessObjectDataEntity);
}
Also used : StorageUnitEntity(org.finra.herd.model.jpa.StorageUnitEntity) StoragePolicyEntity(org.finra.herd.model.jpa.StoragePolicyEntity) BusinessObjectDataEntity(org.finra.herd.model.jpa.BusinessObjectDataEntity) StoragePolicySelection(org.finra.herd.model.dto.StoragePolicySelection)

Example 3 with StorageUnitEntity

use of org.finra.herd.model.jpa.StorageUnitEntity in project herd by FINRAOS.

the class Hive13DdlGenerator method processPartitionFiltersForGenerateDdl.

/**
 * Processes partition filters for DDL generation as per generate DDL request.
 *
 * @param generateDdlRequest the generate DDL request
 * @param sb the string builder to be updated with the "alter table add partition" statements
 * @param replacements the hash map of string values to be used to substitute the custom DDL tokens with their actual values
 * @param businessObjectFormatEntity the business object format entity
 * @param businessObjectFormat the business object format
 * @param ifNotExistsOption specifies if generated DDL contains "if not exists" option
 */
private void processPartitionFiltersForGenerateDdl(GenerateDdlRequest generateDdlRequest, StringBuilder sb, HashMap<String, String> replacements, BusinessObjectFormatEntity businessObjectFormatEntity, BusinessObjectFormat businessObjectFormat, String ifNotExistsOption) {
    // Get the business object format key from the entity.
    BusinessObjectFormatKey businessObjectFormatKey = businessObjectFormatHelper.getBusinessObjectFormatKey(generateDdlRequest.businessObjectFormatEntity);
    // Override the business object format version with the original (optional) value from the request.
    businessObjectFormatKey.setBusinessObjectFormatVersion(generateDdlRequest.businessObjectFormatVersion);
    // Retrieve a list of storage unit entities for the specified list of partition filters. The entities will be sorted by partition values and storages.
    // For a non-partitioned table, there should only exist a single business object data entity (with partitionValue equals to "none"). We do validate that
    // all specified storages are of "S3" storage platform type, so we specify S3 storage platform type in the herdDao call below, so we select storage
    // units only from all S3 storages, when the specified list of storages is empty. We also specify to select only "available" storage units.
    List<StorageUnitEntity> storageUnitEntities = storageUnitDao.getStorageUnitsByPartitionFiltersAndStorages(businessObjectFormatKey, generateDdlRequest.partitionFilters, generateDdlRequest.businessObjectDataVersion, BusinessObjectDataStatusEntity.VALID, generateDdlRequest.storageNames, StoragePlatformEntity.S3, null, true);
    // Exclude duplicate business object data per specified list of storage names.
    // If storage names are not specified, the method fails on business object data instances registered with multiple storages.
    storageUnitEntities = excludeDuplicateBusinessObjectData(storageUnitEntities, generateDdlRequest.storageNames, generateDdlRequest.storageEntities);
    // Build a list of matched partition filters. Please note that each request partition
    // filter might result in multiple available business object data entities.
    List<List<String>> matchedAvailablePartitionFilters = new ArrayList<>();
    List<List<String>> availablePartitions = new ArrayList<>();
    for (StorageUnitEntity storageUnitEntity : storageUnitEntities) {
        BusinessObjectDataKey businessObjectDataKey = businessObjectDataHelper.getBusinessObjectDataKey(storageUnitEntity.getBusinessObjectData());
        matchedAvailablePartitionFilters.add(businessObjectDataHelper.getPartitionFilter(businessObjectDataKey, generateDdlRequest.partitionFilters.get(0)));
        availablePartitions.add(businessObjectDataHelper.getPrimaryAndSubPartitionValues(businessObjectDataKey));
    }
    // If request specifies to include all registered sub-partitions, fail if any of "non-available" registered sub-partitions are found.
    if (generateDdlRequest.businessObjectDataVersion == null && BooleanUtils.isTrue(generateDdlRequest.includeAllRegisteredSubPartitions) && !CollectionUtils.isEmpty(matchedAvailablePartitionFilters)) {
        notAllowNonAvailableRegisteredSubPartitions(businessObjectFormatKey, matchedAvailablePartitionFilters, availablePartitions, generateDdlRequest.storageNames);
    }
    // Fail on any missing business object data unless the flag is set to allow missing business object data.
    if (!BooleanUtils.isTrue(generateDdlRequest.allowMissingData)) {
        // Get a list of unmatched partition filters.
        List<List<String>> unmatchedPartitionFilters = new ArrayList<>(generateDdlRequest.partitionFilters);
        unmatchedPartitionFilters.removeAll(matchedAvailablePartitionFilters);
        // Throw an exception if we have any unmatched partition filters.
        if (!unmatchedPartitionFilters.isEmpty()) {
            // Get the first unmatched partition filter and throw exception.
            List<String> unmatchedPartitionFilter = getFirstUnmatchedPartitionFilter(unmatchedPartitionFilters);
            throw new ObjectNotFoundException(String.format("Business object data {namespace: \"%s\", businessObjectDefinitionName: \"%s\", businessObjectFormatUsage: \"%s\", " + "businessObjectFormatFileType: \"%s\", businessObjectFormatVersion: %d, partitionValue: \"%s\", " + "subpartitionValues: \"%s\", businessObjectDataVersion: %d} is not available in \"%s\" storage(s).", businessObjectFormatKey.getNamespace(), businessObjectFormatKey.getBusinessObjectDefinitionName(), businessObjectFormatKey.getBusinessObjectFormatUsage(), businessObjectFormatKey.getBusinessObjectFormatFileType(), businessObjectFormatKey.getBusinessObjectFormatVersion(), unmatchedPartitionFilter.get(0), StringUtils.join(unmatchedPartitionFilter.subList(1, unmatchedPartitionFilter.size()), ","), generateDdlRequest.businessObjectDataVersion, StringUtils.join(generateDdlRequest.storageNames, ",")));
        }
    }
    // the table is non-partitioned, and there is no business object data found.
    if (generateDdlRequest.customDdlEntity == null && !generateDdlRequest.isPartitioned && CollectionUtils.isEmpty(storageUnitEntities)) {
        // Add a LOCATION clause with a token.
        sb.append(String.format("LOCATION '%s';", NON_PARTITIONED_TABLE_LOCATION_CUSTOM_DDL_TOKEN));
    } else // The table is partitioned, custom DDL is specified, or there is at least one business object data instance found.
    {
        // If drop partitions flag is set and the table is partitioned, drop partitions specified by the partition filters.
        if (generateDdlRequest.isPartitioned && BooleanUtils.isTrue(generateDdlRequest.includeDropPartitions)) {
            // Add a drop partition statement for each partition filter entry.
            for (List<String> partitionFilter : generateDdlRequest.partitionFilters) {
                sb.append(String.format("ALTER TABLE `%s` DROP IF EXISTS PARTITION (", generateDdlRequest.tableName));
                // Specify all partition column values as per this partition filter.
                List<String> partitionKeyValuePairs = new ArrayList<>();
                for (int i = 0; i < partitionFilter.size(); i++) {
                    if (StringUtils.isNotBlank(partitionFilter.get(i))) {
                        // We cannot hit ArrayIndexOutOfBoundsException on getPartitions() since partitionFilter would
                        // not have a value set at an index that is greater or equal than the number of partitions in the schema.
                        String partitionColumnName = businessObjectFormat.getSchema().getPartitions().get(i).getName();
                        partitionKeyValuePairs.add(String.format("`%s`='%s'", partitionColumnName, partitionFilter.get(i)));
                    }
                }
                sb.append(StringUtils.join(partitionKeyValuePairs, ", "));
                sb.append(");\n");
            }
            sb.append('\n');
        }
        // Process storage unit entities.
        if (!CollectionUtils.isEmpty(storageUnitEntities)) {
            processStorageUnitsForGenerateDdl(generateDdlRequest, sb, replacements, businessObjectFormatEntity, businessObjectFormat, ifNotExistsOption, storageUnitEntities);
        }
    }
}
Also used : StorageUnitEntity(org.finra.herd.model.jpa.StorageUnitEntity) ObjectNotFoundException(org.finra.herd.model.ObjectNotFoundException) BusinessObjectFormatKey(org.finra.herd.model.api.xml.BusinessObjectFormatKey) ArrayList(java.util.ArrayList) ArrayList(java.util.ArrayList) List(java.util.List) BusinessObjectDataKey(org.finra.herd.model.api.xml.BusinessObjectDataKey)

Example 4 with StorageUnitEntity

use of org.finra.herd.model.jpa.StorageUnitEntity in project herd by FINRAOS.

the class Hive13DdlGenerator method excludeDuplicateBusinessObjectData.

/**
 * Eliminate storage units that belong to the same business object data by picking storage unit registered in a storage listed earlier in the list of
 * storage names specified in the request. If storage names are not specified, simply fail on business object data instances registered with multiple
 * storages.
 *
 * @param storageUnitEntities the list of storage unit entities
 * @param storageNames the list of storage names
 * @param storageEntities the list of storage entities
 *
 * @return the updated list of storage unit entities
 * @throws IllegalArgumentException on business object data being registered in multiple storages and storage names are not specified to resolve this
 */
protected List<StorageUnitEntity> excludeDuplicateBusinessObjectData(List<StorageUnitEntity> storageUnitEntities, List<String> storageNames, List<StorageEntity> storageEntities) throws IllegalArgumentException {
    // If storage names are not specified, fail on business object data instances registered with multiple storages.
    // Otherwise, in a case when the same business object data is registered with multiple storages,
    // pick storage unit registered in a storage listed earlier in the list of storage names specified in the request.
    Map<BusinessObjectDataEntity, StorageUnitEntity> businessObjectDataToStorageUnitMap = new LinkedHashMap<>();
    for (StorageUnitEntity storageUnitEntity : storageUnitEntities) {
        BusinessObjectDataEntity businessObjectDataEntity = storageUnitEntity.getBusinessObjectData();
        if (businessObjectDataToStorageUnitMap.containsKey(businessObjectDataEntity)) {
            // Duplicate business object data is found, so check if storage names are specified.
            if (CollectionUtils.isEmpty(storageNames)) {
                // Fail on business object data registered in multiple storages.
                throw new IllegalArgumentException(String.format("Found business object data registered in more than one storage. " + "Please specify storage(s) in the request to resolve this. Business object data {%s}", businessObjectDataHelper.businessObjectDataEntityAltKeyToString(businessObjectDataEntity)));
            } else {
                // Replace the storage unit entity if it belongs to a "higher priority" storage.
                StorageEntity currentStorageEntity = businessObjectDataToStorageUnitMap.get(businessObjectDataEntity).getStorage();
                int currentStorageIndex = storageEntities.indexOf(currentStorageEntity);
                int newStorageIndex = storageEntities.indexOf(storageUnitEntity.getStorage());
                if (newStorageIndex < currentStorageIndex) {
                    businessObjectDataToStorageUnitMap.put(storageUnitEntity.getBusinessObjectData(), storageUnitEntity);
                }
            }
        } else {
            businessObjectDataToStorageUnitMap.put(storageUnitEntity.getBusinessObjectData(), storageUnitEntity);
        }
    }
    return new ArrayList<>(businessObjectDataToStorageUnitMap.values());
}
Also used : StorageUnitEntity(org.finra.herd.model.jpa.StorageUnitEntity) ArrayList(java.util.ArrayList) StorageEntity(org.finra.herd.model.jpa.StorageEntity) BusinessObjectDataEntity(org.finra.herd.model.jpa.BusinessObjectDataEntity) LinkedHashMap(java.util.LinkedHashMap)

Example 5 with StorageUnitEntity

use of org.finra.herd.model.jpa.StorageUnitEntity in project herd by FINRAOS.

the class Hive13DdlGenerator method notAllowNonAvailableRegisteredSubPartitions.

/**
 * Searches for and fails on any of "non-available" registered sub-partitions as per list of "matched" partition filters.
 *
 * @param businessObjectFormatKey the business object format key
 * @param matchedAvailablePartitionFilters the list of "matched" partition filters
 * @param availablePartitions the list of already discovered "available" partitions, where each partition consists of primary and optional sub-partition
 * values
 * @param storageNames the list of storage names
 */
protected void notAllowNonAvailableRegisteredSubPartitions(BusinessObjectFormatKey businessObjectFormatKey, List<List<String>> matchedAvailablePartitionFilters, List<List<String>> availablePartitions, List<String> storageNames) {
    // Query all matched partition filters to discover any non-available registered sub-partitions. Retrieve latest business object data per list of
    // matched filters regardless of business object data and/or storage unit statuses. This is done to discover all registered sub-partitions regardless
    // of business object data or storage unit statuses. We do validate that all specified storages are of "S3" storage platform type, so we specify S3
    // storage platform type in the herdDao call below, so we select storage units only from all S3 storages, when the specified list of storages is empty.
    // We want to select any existing storage units regardless of their status, so we pass "false" for selectOnlyAvailableStorageUnits parameter.
    List<StorageUnitEntity> matchedNotAvailableStorageUnitEntities = storageUnitDao.getStorageUnitsByPartitionFiltersAndStorages(businessObjectFormatKey, matchedAvailablePartitionFilters, null, null, storageNames, StoragePlatformEntity.S3, null, false);
    // Exclude all storage units with business object data having "DELETED" status.
    matchedNotAvailableStorageUnitEntities = storageUnitHelper.excludeBusinessObjectDataStatus(matchedNotAvailableStorageUnitEntities, BusinessObjectDataStatusEntity.DELETED);
    // Exclude all already discovered "available" partitions. Please note that, since we got here, the list of matched partitions can not be empty.
    matchedNotAvailableStorageUnitEntities = storageUnitHelper.excludePartitions(matchedNotAvailableStorageUnitEntities, availablePartitions);
    // Fail on any "non-available" registered sub-partitions.
    if (!CollectionUtils.isEmpty(matchedNotAvailableStorageUnitEntities)) {
        // Get the business object data key for the first "non-available" registered sub-partition.
        BusinessObjectDataKey businessObjectDataKey = businessObjectDataHelper.getBusinessObjectDataKey(matchedNotAvailableStorageUnitEntities.get(0).getBusinessObjectData());
        throw new ObjectNotFoundException(String.format("Business object data {namespace: \"%s\", businessObjectDefinitionName: \"%s\", businessObjectFormatUsage: \"%s\", " + "businessObjectFormatFileType: \"%s\", businessObjectFormatVersion: %d, partitionValue: \"%s\", " + "subpartitionValues: \"%s\", businessObjectDataVersion: %d} is not available in \"%s\" storage(s).", businessObjectFormatKey.getNamespace(), businessObjectFormatKey.getBusinessObjectDefinitionName(), businessObjectFormatKey.getBusinessObjectFormatUsage(), businessObjectFormatKey.getBusinessObjectFormatFileType(), businessObjectFormatKey.getBusinessObjectFormatVersion(), businessObjectDataKey.getPartitionValue(), StringUtils.join(businessObjectDataKey.getSubPartitionValues(), ","), businessObjectDataKey.getBusinessObjectDataVersion(), StringUtils.join(storageNames, ",")));
    }
}
Also used : StorageUnitEntity(org.finra.herd.model.jpa.StorageUnitEntity) ObjectNotFoundException(org.finra.herd.model.ObjectNotFoundException) BusinessObjectDataKey(org.finra.herd.model.api.xml.BusinessObjectDataKey)

Aggregations

StorageUnitEntity (org.finra.herd.model.jpa.StorageUnitEntity)196 Test (org.junit.Test)124 BusinessObjectDataEntity (org.finra.herd.model.jpa.BusinessObjectDataEntity)105 BusinessObjectDataKey (org.finra.herd.model.api.xml.BusinessObjectDataKey)78 StorageEntity (org.finra.herd.model.jpa.StorageEntity)57 ArrayList (java.util.ArrayList)42 AbstractServiceTest (org.finra.herd.service.AbstractServiceTest)39 StorageUnitStatusEntity (org.finra.herd.model.jpa.StorageUnitStatusEntity)36 BusinessObjectDataStorageUnitKey (org.finra.herd.model.api.xml.BusinessObjectDataStorageUnitKey)30 StorageFileEntity (org.finra.herd.model.jpa.StorageFileEntity)30 StoragePolicyKey (org.finra.herd.model.api.xml.StoragePolicyKey)24 BusinessObjectFormatEntity (org.finra.herd.model.jpa.BusinessObjectFormatEntity)23 StorageFile (org.finra.herd.model.api.xml.StorageFile)22 Predicate (javax.persistence.criteria.Predicate)19 BusinessObjectDataStatusEntity (org.finra.herd.model.jpa.BusinessObjectDataStatusEntity)19 HashMap (java.util.HashMap)18 CriteriaBuilder (javax.persistence.criteria.CriteriaBuilder)17 StoragePolicySelection (org.finra.herd.model.dto.StoragePolicySelection)16 S3FileTransferRequestParamsDto (org.finra.herd.model.dto.S3FileTransferRequestParamsDto)15 PutObjectRequest (com.amazonaws.services.s3.model.PutObjectRequest)14