use of org.finra.herd.model.api.xml.SchemaColumn in project herd by FINRAOS.
the class BusinessObjectFormatHelper method createBusinessObjectFormatFromEntity.
/**
* Creates the business object format from the persisted entity.
*
* @param businessObjectFormatEntity the newly persisted business object format entity.
*
* @param checkLatestVersion need to check latest version
*
* @return the business object format.
*/
public BusinessObjectFormat createBusinessObjectFormatFromEntity(BusinessObjectFormatEntity businessObjectFormatEntity, Boolean checkLatestVersion) {
BusinessObjectFormat businessObjectFormat = new BusinessObjectFormat();
businessObjectFormat.setId(businessObjectFormatEntity.getId());
businessObjectFormat.setNamespace(businessObjectFormatEntity.getBusinessObjectDefinition().getNamespace().getCode());
businessObjectFormat.setBusinessObjectDefinitionName(businessObjectFormatEntity.getBusinessObjectDefinition().getName());
businessObjectFormat.setBusinessObjectFormatUsage(businessObjectFormatEntity.getUsage());
businessObjectFormat.setBusinessObjectFormatFileType(businessObjectFormatEntity.getFileType().getCode());
businessObjectFormat.setBusinessObjectFormatVersion(businessObjectFormatEntity.getBusinessObjectFormatVersion());
businessObjectFormat.setLatestVersion(businessObjectFormatEntity.getLatestVersion());
businessObjectFormat.setPartitionKey(businessObjectFormatEntity.getPartitionKey());
businessObjectFormat.setDescription(businessObjectFormatEntity.getDescription());
// Add in the attributes.
List<Attribute> attributes = new ArrayList<>();
businessObjectFormat.setAttributes(attributes);
for (BusinessObjectFormatAttributeEntity attributeEntity : businessObjectFormatEntity.getAttributes()) {
Attribute attribute = new Attribute();
attributes.add(attribute);
attribute.setName(attributeEntity.getName());
attribute.setValue(attributeEntity.getValue());
}
// Add in the attribute definitions.
List<AttributeDefinition> attributeDefinitions = new ArrayList<>();
businessObjectFormat.setAttributeDefinitions(attributeDefinitions);
for (BusinessObjectDataAttributeDefinitionEntity attributeDefinitionEntity : businessObjectFormatEntity.getAttributeDefinitions()) {
AttributeDefinition attributeDefinition = new AttributeDefinition();
attributeDefinitions.add(attributeDefinition);
attributeDefinition.setName(attributeDefinitionEntity.getName());
attributeDefinition.setPublish(attributeDefinitionEntity.getPublish());
}
// Only add schema information if this format has any schema columns defined.
if (!businessObjectFormatEntity.getSchemaColumns().isEmpty()) {
Schema schema = new Schema();
businessObjectFormat.setSchema(schema);
schema.setNullValue(businessObjectFormatEntity.getNullValue());
schema.setDelimiter(businessObjectFormatEntity.getDelimiter());
schema.setEscapeCharacter(businessObjectFormatEntity.getEscapeCharacter());
schema.setPartitionKeyGroup(businessObjectFormatEntity.getPartitionKeyGroup() != null ? businessObjectFormatEntity.getPartitionKeyGroup().getPartitionKeyGroupName() : null);
// Create two lists of schema column entities: one for the data columns and one for the partition columns.
List<SchemaColumnEntity> dataSchemaColumns = new ArrayList<>();
List<SchemaColumnEntity> partitionSchemaColumns = new ArrayList<>();
for (SchemaColumnEntity schemaColumnEntity : businessObjectFormatEntity.getSchemaColumns()) {
// We can determine which list (or both) a column entity belongs to depending on whether it has a position and/or partition level set.
if (schemaColumnEntity.getPosition() != null) {
dataSchemaColumns.add(schemaColumnEntity);
}
if (schemaColumnEntity.getPartitionLevel() != null) {
partitionSchemaColumns.add(schemaColumnEntity);
}
}
// Sort the data schema columns on the position.
Collections.sort(dataSchemaColumns, new SchemaColumnPositionComparator());
// Sort the partition schema columns on the partition level.
Collections.sort(partitionSchemaColumns, new SchemaColumnPartitionLevelComparator());
// Add in the data schema columns.
List<SchemaColumn> schemaColumns = new ArrayList<>();
schema.setColumns(schemaColumns);
for (SchemaColumnEntity schemaColumnEntity : dataSchemaColumns) {
schemaColumns.add(createSchemaColumn(schemaColumnEntity));
}
// columns which isn't valid from an XSD standpoint.
if (partitionSchemaColumns.size() > 0) {
schemaColumns = new ArrayList<>();
schema.setPartitions(schemaColumns);
for (SchemaColumnEntity schemaColumnEntity : partitionSchemaColumns) {
schemaColumns.add(createSchemaColumn(schemaColumnEntity));
}
}
}
BusinessObjectFormatEntity latestVersionBusinessObjectFormatEntity = businessObjectFormatEntity;
// use the latest version if it is not
if (checkLatestVersion) {
BusinessObjectFormatKey businessObjectFormatKey = getBusinessObjectFormatKey(businessObjectFormatEntity);
businessObjectFormatKey.setBusinessObjectFormatVersion(null);
latestVersionBusinessObjectFormatEntity = businessObjectFormatDao.getBusinessObjectFormatByAltKey(businessObjectFormatKey);
}
// add business object format parent
List<BusinessObjectFormatKey> businessObjectFormatParents = new ArrayList();
businessObjectFormat.setBusinessObjectFormatParents(businessObjectFormatParents);
for (BusinessObjectFormatEntity businessObjectFormatEntityParent : latestVersionBusinessObjectFormatEntity.getBusinessObjectFormatParents()) {
BusinessObjectFormatKey businessObjectFormatParent = getBusinessObjectFormatKey(businessObjectFormatEntityParent);
businessObjectFormatParent.setBusinessObjectFormatVersion(null);
businessObjectFormatParents.add(businessObjectFormatParent);
}
// add business object format children
List<BusinessObjectFormatKey> businessObjectFormatChildren = new ArrayList();
businessObjectFormat.setBusinessObjectFormatChildren(businessObjectFormatChildren);
for (BusinessObjectFormatEntity businessObjectFormatEntityChild : latestVersionBusinessObjectFormatEntity.getBusinessObjectFormatChildren()) {
BusinessObjectFormatKey businessObjectFormatChild = getBusinessObjectFormatKey(businessObjectFormatEntityChild);
businessObjectFormatChild.setBusinessObjectFormatVersion(null);
businessObjectFormatChildren.add(businessObjectFormatChild);
}
// add retention information
businessObjectFormat.setRecordFlag(latestVersionBusinessObjectFormatEntity.isRecordFlag());
businessObjectFormat.setRetentionPeriodInDays(latestVersionBusinessObjectFormatEntity.getRetentionPeriodInDays());
if (latestVersionBusinessObjectFormatEntity.getRetentionType() != null) {
businessObjectFormat.setRetentionType(latestVersionBusinessObjectFormatEntity.getRetentionType().getCode());
}
return businessObjectFormat;
}
use of org.finra.herd.model.api.xml.SchemaColumn in project herd by FINRAOS.
the class BusinessObjectFormatHelper method createSchemaColumn.
/**
* Creates a schema column from a schema column entity.
*
* @param schemaColumnEntity the schema column entity.
*
* @return the newly created schema column.
*/
private SchemaColumn createSchemaColumn(SchemaColumnEntity schemaColumnEntity) {
SchemaColumn schemaColumn = new SchemaColumn();
schemaColumn.setName(schemaColumnEntity.getName());
schemaColumn.setType(schemaColumnEntity.getType());
schemaColumn.setSize(schemaColumnEntity.getSize());
schemaColumn.setRequired(schemaColumnEntity.getRequired());
schemaColumn.setDefaultValue(schemaColumnEntity.getDefaultValue());
schemaColumn.setDescription(schemaColumnEntity.getDescription());
return schemaColumn;
}
use of org.finra.herd.model.api.xml.SchemaColumn in project herd by FINRAOS.
the class Hive13DdlGenerator method generateDdlColumns.
/**
* Generates the DDL column definitions based on the given business object format. The generated column definitions look like:
* <p/>
* <pre>
* `COL_NAME1` VARCHAR(2) COMMENT 'some comment',
* `COL_NAME2` VARCHAR(2),
* `ORIG_COL_NAME3` DATE
* )
* </pre>
* <p/>
* Each column definition is indented using 4 spaces. If a column is also a partition, the text 'ORIG_' will be prefixed in the column name. Note the
* closing parenthesis at the end of the statement.
*
* @param businessObjectFormatEntity The persistent entity of business object format
* @param businessObjectFormat The {@link BusinessObjectFormat}
*
* @return String containing the generated column definitions.
*/
private String generateDdlColumns(BusinessObjectFormatEntity businessObjectFormatEntity, BusinessObjectFormat businessObjectFormat) {
StringBuilder sb = new StringBuilder();
// Add schema columns.
Boolean firstRow = true;
for (SchemaColumn schemaColumn : businessObjectFormat.getSchema().getColumns()) {
if (!firstRow) {
sb.append(",\n");
} else {
firstRow = false;
}
// Add a schema column declaration. Check if a schema column is also a partition column and prepend "ORGNL_" prefix if this is the case.
sb.append(String.format(" `%s%s` %s%s", (!CollectionUtils.isEmpty(businessObjectFormat.getSchema().getPartitions()) && businessObjectFormat.getSchema().getPartitions().contains(schemaColumn) ? "ORGNL_" : ""), schemaColumn.getName(), getHiveDataType(schemaColumn, businessObjectFormatEntity), StringUtils.isNotBlank(schemaColumn.getDescription()) ? String.format(" COMMENT '%s'", escapeSingleQuotes(schemaColumn.getDescription())) : ""));
}
sb.append(")\n");
return sb.toString();
}
use of org.finra.herd.model.api.xml.SchemaColumn in project herd by FINRAOS.
the class Hive13DdlGenerator method processStorageUnitsForGenerateDdl.
/**
* Adds the relative "alter table add partition" statements for each storage unit entity. Please note that each request partition value might result in
* multiple available storage unit entities (subpartitions).
*
* @param sb the string builder to be updated with the "alter table add partition" statements
* @param replacements the hash map of string values to be used to substitute the custom DDL tokens with their actual values
* @param businessObjectFormatEntity the business object format entity
* @param businessObjectFormat the business object format
* @param ifNotExistsOption specifies if generated DDL contains "if not exists" option
* @param storageUnitEntities the list of storage unit entities
*/
private void processStorageUnitsForGenerateDdl(GenerateDdlRequest generateDdlRequest, StringBuilder sb, HashMap<String, String> replacements, BusinessObjectFormatEntity businessObjectFormatEntity, BusinessObjectFormat businessObjectFormat, String ifNotExistsOption, List<StorageUnitEntity> storageUnitEntities) {
// If flag is not set to suppress scan for unregistered sub-partitions, retrieve all storage
// file paths for the relative storage units loaded in a multi-valued map for easy access.
MultiValuedMap<Integer, String> storageUnitIdToStorageFilePathsMap = BooleanUtils.isTrue(generateDdlRequest.suppressScanForUnregisteredSubPartitions) ? new ArrayListValuedHashMap<>() : storageFileDao.getStorageFilePathsByStorageUnitIds(storageUnitHelper.getStorageUnitIds(storageUnitEntities));
// Process all available business object data instances.
for (StorageUnitEntity storageUnitEntity : storageUnitEntities) {
// Get business object data key and S3 key prefix for this business object data.
BusinessObjectDataKey businessObjectDataKey = businessObjectDataHelper.getBusinessObjectDataKey(storageUnitEntity.getBusinessObjectData());
String s3KeyPrefix = s3KeyPrefixHelper.buildS3KeyPrefix(storageUnitEntity.getStorage(), storageUnitEntity.getBusinessObjectData().getBusinessObjectFormat(), businessObjectDataKey);
// If flag is set to suppress scan for unregistered sub-partitions, use the directory path or the S3 key prefix
// as the partition's location, otherwise, use storage files to discover all unregistered sub-partitions.
Collection<String> storageFilePaths = new ArrayList<>();
if (BooleanUtils.isTrue(generateDdlRequest.suppressScanForUnregisteredSubPartitions)) {
// Validate the directory path value if it is present.
if (storageUnitEntity.getDirectoryPath() != null) {
Assert.isTrue(storageUnitEntity.getDirectoryPath().equals(s3KeyPrefix), String.format("Storage directory path \"%s\" registered with business object data {%s} " + "in \"%s\" storage does not match the expected S3 key prefix \"%s\".", storageUnitEntity.getDirectoryPath(), businessObjectDataHelper.businessObjectDataEntityAltKeyToString(storageUnitEntity.getBusinessObjectData()), storageUnitEntity.getStorage().getName(), s3KeyPrefix));
}
// Add the S3 key prefix to the list of storage files.
// We add a trailing '/' character to the prefix, since it represents a directory.
storageFilePaths.add(StringUtils.appendIfMissing(s3KeyPrefix, "/"));
} else {
// Retrieve storage file paths registered with this business object data in the specified storage.
storageFilePaths = storageUnitIdToStorageFilePathsMap.containsKey(storageUnitEntity.getId()) ? storageUnitIdToStorageFilePathsMap.get(storageUnitEntity.getId()) : new ArrayList<>();
// Validate storage file paths registered with this business object data in the specified storage.
// The validation check below is required even if we have no storage files registered.
storageFileHelper.validateStorageFilePaths(storageFilePaths, s3KeyPrefix, storageUnitEntity.getBusinessObjectData(), storageUnitEntity.getStorage().getName());
// If there are no storage files registered for this storage unit, we should use the storage directory path value.
if (storageFilePaths.isEmpty()) {
// Validate that directory path value is present and it matches the S3 key prefix.
Assert.isTrue(storageUnitEntity.getDirectoryPath() != null && storageUnitEntity.getDirectoryPath().startsWith(s3KeyPrefix), String.format("Storage directory path \"%s\" registered with business object data {%s} " + "in \"%s\" storage does not match the expected S3 key prefix \"%s\".", storageUnitEntity.getDirectoryPath(), businessObjectDataHelper.businessObjectDataEntityAltKeyToString(storageUnitEntity.getBusinessObjectData()), storageUnitEntity.getStorage().getName(), s3KeyPrefix));
// Add storage directory path the empty storage files list.
// We add a trailing '/' character to the path, since it represents a directory.
storageFilePaths.add(storageUnitEntity.getDirectoryPath() + "/");
}
}
// Retrieve the s3 bucket name.
String s3BucketName = getS3BucketName(storageUnitEntity.getStorage(), generateDdlRequest.s3BucketNames);
// For partitioned table, add the relative partitions to the generated DDL.
if (generateDdlRequest.isPartitioned) {
// the business object data equals to the number of partition columns defined in schema for the format selected for DDL generation.
if (BooleanUtils.isTrue(generateDdlRequest.suppressScanForUnregisteredSubPartitions)) {
int businessObjectDataRegisteredPartitions = 1 + CollectionUtils.size(businessObjectDataKey.getSubPartitionValues());
Assert.isTrue(businessObjectFormat.getSchema().getPartitions().size() == businessObjectDataRegisteredPartitions, String.format("Number of primary and sub-partition values (%d) specified for the business object data is not equal to " + "the number of partition columns (%d) defined in the schema of the business object format selected for DDL generation. " + "Business object data: {%s}, business object format: {%s}", businessObjectDataRegisteredPartitions, businessObjectFormat.getSchema().getPartitions().size(), businessObjectDataHelper.businessObjectDataKeyToString(businessObjectDataKey), businessObjectFormatHelper.businessObjectFormatEntityAltKeyToString(businessObjectFormatEntity)));
} else // Otherwise, since the format version selected for DDL generation might not match the relative business object format version that business
// object data is registered against, validate that the number of sub-partition values specified for the business object data is less than
// the number of partition columns defined in schema for the format selected for DDL generation.
{
Assert.isTrue(businessObjectFormat.getSchema().getPartitions().size() > CollectionUtils.size(businessObjectDataKey.getSubPartitionValues()), String.format("Number of subpartition values specified for the business object data is greater than or equal to " + "the number of partition columns defined in the schema of the business object format selected for DDL generation. " + "Business object data: {%s}, business object format: {%s}", businessObjectDataHelper.businessObjectDataKeyToString(businessObjectDataKey), businessObjectFormatHelper.businessObjectFormatEntityAltKeyToString(businessObjectFormatEntity)));
}
// Get partition information. For multiple level partitioning, auto-discover subpartitions (subdirectories) not already included into the S3 key
// prefix. Each discovered partition requires a standalone "add partition" clause. Please note that due to the above validation check, there
// should be no auto discoverable sub-partition columns, when flag is set to suppress scan for unregistered sub-partitions.
List<SchemaColumn> autoDiscoverableSubPartitionColumns = businessObjectFormat.getSchema().getPartitions().subList(1 + CollectionUtils.size(businessObjectDataKey.getSubPartitionValues()), businessObjectFormat.getSchema().getPartitions().size());
for (HivePartitionDto hivePartition : getHivePartitions(businessObjectDataKey, autoDiscoverableSubPartitionColumns, s3KeyPrefix, storageFilePaths, storageUnitEntity.getBusinessObjectData(), storageUnitEntity.getStorage().getName())) {
sb.append(String.format("ALTER TABLE `%s` ADD %sPARTITION (", generateDdlRequest.tableName, ifNotExistsOption));
// Specify all partition column values.
List<String> partitionKeyValuePairs = new ArrayList<>();
for (int i = 0; i < businessObjectFormat.getSchema().getPartitions().size(); i++) {
String partitionColumnName = businessObjectFormat.getSchema().getPartitions().get(i).getName();
String partitionValue = hivePartition.getPartitionValues().get(i);
partitionKeyValuePairs.add(String.format("`%s`='%s'", partitionColumnName, partitionValue));
}
sb.append(StringUtils.join(partitionKeyValuePairs, ", "));
sb.append(String.format(") LOCATION 's3n://%s/%s%s';\n", s3BucketName, s3KeyPrefix, StringUtils.isNotBlank(hivePartition.getPath()) ? hivePartition.getPath() : ""));
}
} else // This is a non-partitioned table.
{
// Get location for this non-partitioned table.
String tableLocation = String.format("s3n://%s/%s", s3BucketName, s3KeyPrefix);
if (generateDdlRequest.customDdlEntity == null) {
// Since custom DDL was not specified and this table is not partitioned, add a LOCATION clause.
// This is the last line in the non-partitioned table DDL.
sb.append(String.format("LOCATION '%s';", tableLocation));
} else {
// Since custom DDL was used for a non-partitioned table, substitute the relative custom DDL token with the actual table location.
replacements.put(NON_PARTITIONED_TABLE_LOCATION_CUSTOM_DDL_TOKEN, tableLocation);
}
}
}
}
use of org.finra.herd.model.api.xml.SchemaColumn in project herd by FINRAOS.
the class NotificationEventServiceImpl method processBusinessObjectDataNotifications.
private List<Object> processBusinessObjectDataNotifications(String notificationEventType, List<BusinessObjectDataNotificationRegistrationEntity> businessObjectDataNotificationRegistrationEntities, BusinessObjectData businessObjectData, String newBusinessObjectDataStatus, String oldBusinessObjectDataStatus) {
List<Object> notificationActions = new ArrayList<>();
// Build a list of partition value that includes primary and sub-partition values, if any are specified in the business object data key.
List<String> partitionValues = businessObjectDataHelper.getPrimaryAndSubPartitionValues(businessObjectData);
// Get a list of partition columns from the associated business object format.
List<String> partitionColumnNames = null;
BusinessObjectFormatEntity businessObjectFormatEntity = businessObjectFormatDao.getBusinessObjectFormatByAltKey(new BusinessObjectFormatKey(businessObjectData.getNamespace(), businessObjectData.getBusinessObjectDefinitionName(), businessObjectData.getBusinessObjectFormatUsage(), businessObjectData.getBusinessObjectFormatFileType(), businessObjectData.getBusinessObjectFormatVersion()));
if (businessObjectFormatEntity != null) {
// Get business object format model object to directly access schema columns and partitions.
BusinessObjectFormat businessObjectFormat = businessObjectFormatHelper.createBusinessObjectFormatFromEntity(businessObjectFormatEntity);
// Proceed only if this format has schema with partition columns specified.
if (businessObjectFormat.getSchema() != null && !CollectionUtils.isEmpty(businessObjectFormat.getSchema().getPartitions())) {
// Do not provide more partition column names than there are primary and
// sub-partition values that this business object data is registered with.
partitionColumnNames = new ArrayList<>();
List<SchemaColumn> partitionColumns = businessObjectFormat.getSchema().getPartitions();
for (int i = 0; i < Math.min(partitionValues.size(), partitionColumns.size()); i++) {
partitionColumnNames.add(partitionColumns.get(i).getName());
}
}
}
for (BusinessObjectDataNotificationRegistrationEntity businessObjectDataNotificationRegistration : businessObjectDataNotificationRegistrationEntities) {
// Retrieve the job notification actions needed to be triggered.
for (NotificationActionEntity notificationActionEntity : businessObjectDataNotificationRegistration.getNotificationActions()) {
// Trigger the job action.
if (notificationActionEntity instanceof NotificationJobActionEntity) {
NotificationJobActionEntity notificationJobActionEntity = (NotificationJobActionEntity) notificationActionEntity;
BusinessObjectDataNotificationEventParamsDto notificationEventParams = new BusinessObjectDataNotificationEventParamsDto();
notificationEventParams.setBusinessObjectDataNotificationRegistration(businessObjectDataNotificationRegistration);
notificationEventParams.setNotificationJobAction(notificationJobActionEntity);
notificationEventParams.setEventType(notificationEventType);
notificationEventParams.setBusinessObjectData(businessObjectData);
notificationEventParams.setPartitionColumnNames(partitionColumnNames);
notificationEventParams.setStorageName(businessObjectDataNotificationRegistration.getStorage() == null ? null : businessObjectDataNotificationRegistration.getStorage().getName());
notificationEventParams.setPartitionValues(partitionValues);
notificationEventParams.setNewBusinessObjectDataStatus(newBusinessObjectDataStatus);
notificationEventParams.setOldBusinessObjectDataStatus(oldBusinessObjectDataStatus);
notificationActions.add(triggerNotificationAction(NotificationTypeEntity.NOTIFICATION_TYPE_BDATA, notificationEventType, notificationEventParams));
}
}
}
return notificationActions;
}
Aggregations