Search in sources :

Example 1 with StandardizationPolicy

use of com.thinkbiganalytics.policy.standardization.StandardizationPolicy in project kylo by Teradata.

the class CleanseAndValidateRow method standardizeAndValidateField.

StandardizationAndValidationResult standardizeAndValidateField(FieldPolicy fieldPolicy, Object value, HCatDataType dataType, Map<Class, Class> validatorParamType) {
    StandardizationAndValidationResult result = new StandardizationAndValidationResult(value);
    List<BaseFieldPolicy> fieldPolicies = fieldPolicy.getAllPolicies();
    int standardizerCount = 0;
    for (BaseFieldPolicy p : fieldPolicies) {
        if (p instanceof StandardizationPolicy) {
            standardizerCount++;
        }
    }
    boolean validateNullValues = false;
    int processedStandardizers = 0;
    for (BaseFieldPolicy p : fieldPolicies) {
        boolean isEmpty = ((result.getFieldValue() == null) || (StringUtils.isEmpty(result.getFieldValue().toString())));
        if (p instanceof StandardizationPolicy) {
            processedStandardizers++;
            StandardizationPolicy standardizationPolicy = (StandardizationPolicy) p;
            boolean shouldStandardize = true;
            if (isEmpty && !(standardizationPolicy instanceof AcceptsEmptyValues)) {
                shouldStandardize = false;
            }
            if (!standardizationPolicy.accepts(result.getFieldValue())) {
                shouldStandardize = false;
            }
            if (shouldStandardize) {
                Object newValue = result.getFieldValue();
                try {
                    newValue = standardizationPolicy.convertRawValue(result.getFieldValue());
                } catch (Exception e) {
                    log.error("Standardizer '{}' threw exception while attempting to standardize value, original value will be kept. Exception: {}", standardizationPolicy.getClass(), e);
                }
                // If this is the last standardizer for this field and the standardized value is returned as a String, and target column is not String, then validate and convert it to correct type
                if (newValue != null && dataType.getConvertibleType() != newValue.getClass() && standardizerCount == processedStandardizers) {
                    try {
                        // Date and timestamp fields can be valid as strings
                        boolean isValueOk = dataType.isStringValueValidForHiveType(newValue.toString());
                        if (!isValueOk) {
                            // if the current string is not in a correct format attempt to convert it
                            try {
                                newValue = dataType.toNativeValue(newValue.toString());
                            } catch (RuntimeException e) {
                                result.addValidationResult(ValidationResult.failField("incompatible", dataType.getName(), "Not convertible to " + dataType.getNativeType()));
                            }
                        }
                    } catch (InvalidFormatException e) {
                        log.warn("Could not convert value {} to correct type {}", newValue.toString(), dataType.getConvertibleType().getName());
                    }
                }
                result.setFieldValue(newValue);
            }
        }
        if (p instanceof ValidationPolicy) {
            ValidationPolicy validationPolicy = (ValidationPolicy) p;
            // not null validator
            if (!isEmpty || validateNullValues || validationPolicy instanceof NotNullValidator) {
                ValidationResult validationResult = validateValue(validationPolicy, dataType, result.getFieldValue(), validatorParamType);
                if (isEmpty && validationPolicy instanceof NotNullValidator) {
                    validateNullValues = validationPolicy != VALID_RESULT;
                }
                // only need to add those that are invalid
                if (validationResult != VALID_RESULT) {
                    result.addValidationResult(validationResult);
                    // exit out of processing if invalid records found.
                    break;
                }
            }
            // reset the failOnEmpty flag back to false
            if (!(validationPolicy instanceof NotNullValidator)) {
                validateNullValues = false;
            }
        }
    }
    ValidationResult finalValidationCheck = finalValidationCheck(fieldPolicy, dataType, result.getFieldValue());
    if (finalValidationCheck != VALID_RESULT) {
        result.addValidationResult(finalValidationCheck);
    }
    return result;
}
Also used : NotNullValidator(com.thinkbiganalytics.policy.validation.NotNullValidator) AcceptsEmptyValues(com.thinkbiganalytics.policy.standardization.AcceptsEmptyValues) StandardizationAndValidationResult(com.thinkbiganalytics.spark.datavalidator.StandardizationAndValidationResult) ValidationResult(com.thinkbiganalytics.policy.validation.ValidationResult) InvalidFormatException(com.thinkbiganalytics.spark.util.InvalidFormatException) BaseFieldPolicy(com.thinkbiganalytics.policy.BaseFieldPolicy) InvalidFormatException(com.thinkbiganalytics.spark.util.InvalidFormatException) StandardizationAndValidationResult(com.thinkbiganalytics.spark.datavalidator.StandardizationAndValidationResult) ValidationPolicy(com.thinkbiganalytics.policy.validation.ValidationPolicy) StandardizationPolicy(com.thinkbiganalytics.policy.standardization.StandardizationPolicy)

Example 2 with StandardizationPolicy

use of com.thinkbiganalytics.policy.standardization.StandardizationPolicy in project kylo by Teradata.

the class FieldPolicyTransformer method getStandardizationAndValidationPolicies.

public List<BaseFieldPolicy> getStandardizationAndValidationPolicies() {
    List<BaseFieldPolicy> policies = new ArrayList<>();
    List<FieldStandardizationRule> standardization = uiFieldPolicy.getStandardization();
    List<FieldValidationRule> validation = uiFieldPolicy.getValidation();
    List<BaseUiPolicyRule> allUiPolicies = new ArrayList<>();
    if (standardization != null) {
        allUiPolicies.addAll(standardization);
    }
    if (validation != null) {
        allUiPolicies.addAll(validation);
    }
    // ensure the sequence is set
    int idx = 0;
    for (BaseUiPolicyRule rule : allUiPolicies) {
        if (rule.getSequence() == null) {
            rule.setSequence(idx);
        }
        idx++;
    }
    Collections.sort(allUiPolicies, new Comparator<BaseUiPolicyRule>() {

        @Override
        public int compare(BaseUiPolicyRule o1, BaseUiPolicyRule o2) {
            if (o1 == null && o2 == null) {
                return 0;
            }
            if (o1 == null && o2 != null) {
                return 1;
            }
            if (o1 != null && o2 == null) {
                return -1;
            }
            Integer sq1 = o1.getSequence();
            Integer sq2 = o2.getSequence();
            return sq1.compareTo(sq2);
        }
    });
    if (allUiPolicies != null) {
        for (BaseUiPolicyRule rule : allUiPolicies) {
            try {
                if (rule instanceof FieldStandardizationRule) {
                    StandardizationPolicy policy = StandardizationAnnotationTransformer.instance().fromUiModel((FieldStandardizationRule) rule);
                    policies.add(policy);
                    if (listener != null) {
                        listener.onAddStandardizationPolicy(policy);
                    }
                } else if (rule instanceof FieldValidationRule) {
                    ValidationPolicy policy = ValidatorAnnotationTransformer.instance().fromUiModel((FieldValidationRule) rule);
                    policies.add(policy);
                    if (listener != null) {
                        listener.onAddValidationPolicy(policy);
                    }
                }
            } catch (PolicyTransformException e) {
                throw new RuntimeException(e);
            }
        }
    }
    return policies;
}
Also used : FieldValidationRule(com.thinkbiganalytics.policy.rest.model.FieldValidationRule) ArrayList(java.util.ArrayList) ValidationPolicy(com.thinkbiganalytics.policy.validation.ValidationPolicy) BaseUiPolicyRule(com.thinkbiganalytics.policy.rest.model.BaseUiPolicyRule) StandardizationPolicy(com.thinkbiganalytics.policy.standardization.StandardizationPolicy) FieldStandardizationRule(com.thinkbiganalytics.policy.rest.model.FieldStandardizationRule)

Example 3 with StandardizationPolicy

use of com.thinkbiganalytics.policy.standardization.StandardizationPolicy in project kylo by Teradata.

the class CleanseAndValidateRowTest method exceptionsShouldNotStopStandardization.

@Test
public void exceptionsShouldNotStopStandardization() {
    StandardizationPolicy standardizer = EXCEPTION_POLICY;
    String fieldName = "field1";
    List<BaseFieldPolicy> policies = new ArrayList<>();
    policies.add(standardizer);
    FieldPolicy fieldPolicy = FieldPolicyBuilder.newBuilder().addPolicies(policies).tableName("emp").fieldName(fieldName).feedFieldName(fieldName).build();
    HCatDataType fieldDataType = HCatDataType.createFromDataType(fieldName, "string");
    StandardizationAndValidationResult result = validator.standardizeAndValidateField(fieldPolicy, "aafooaa", fieldDataType, new HashMap<Class, Class>());
    assertEquals(result.getFieldValue(), "aafooaa");
}
Also used : FieldPolicy(com.thinkbiganalytics.policy.FieldPolicy) BaseFieldPolicy(com.thinkbiganalytics.policy.BaseFieldPolicy) StandardizationPolicy(com.thinkbiganalytics.policy.standardization.StandardizationPolicy) HCatDataType(com.thinkbiganalytics.spark.validation.HCatDataType) ArrayList(java.util.ArrayList) BaseFieldPolicy(com.thinkbiganalytics.policy.BaseFieldPolicy) StandardizationAndValidationResult(com.thinkbiganalytics.spark.datavalidator.StandardizationAndValidationResult) Test(org.junit.Test)

Aggregations

StandardizationPolicy (com.thinkbiganalytics.policy.standardization.StandardizationPolicy)3 BaseFieldPolicy (com.thinkbiganalytics.policy.BaseFieldPolicy)2 ValidationPolicy (com.thinkbiganalytics.policy.validation.ValidationPolicy)2 StandardizationAndValidationResult (com.thinkbiganalytics.spark.datavalidator.StandardizationAndValidationResult)2 ArrayList (java.util.ArrayList)2 FieldPolicy (com.thinkbiganalytics.policy.FieldPolicy)1 BaseUiPolicyRule (com.thinkbiganalytics.policy.rest.model.BaseUiPolicyRule)1 FieldStandardizationRule (com.thinkbiganalytics.policy.rest.model.FieldStandardizationRule)1 FieldValidationRule (com.thinkbiganalytics.policy.rest.model.FieldValidationRule)1 AcceptsEmptyValues (com.thinkbiganalytics.policy.standardization.AcceptsEmptyValues)1 NotNullValidator (com.thinkbiganalytics.policy.validation.NotNullValidator)1 ValidationResult (com.thinkbiganalytics.policy.validation.ValidationResult)1 InvalidFormatException (com.thinkbiganalytics.spark.util.InvalidFormatException)1 HCatDataType (com.thinkbiganalytics.spark.validation.HCatDataType)1 Test (org.junit.Test)1