use of com.thinkbiganalytics.policy.standardization.StandardizationPolicy in project kylo by Teradata.
the class CleanseAndValidateRow method standardizeAndValidateField.
StandardizationAndValidationResult standardizeAndValidateField(FieldPolicy fieldPolicy, Object value, HCatDataType dataType, Map<Class, Class> validatorParamType) {
StandardizationAndValidationResult result = new StandardizationAndValidationResult(value);
List<BaseFieldPolicy> fieldPolicies = fieldPolicy.getAllPolicies();
int standardizerCount = 0;
for (BaseFieldPolicy p : fieldPolicies) {
if (p instanceof StandardizationPolicy) {
standardizerCount++;
}
}
boolean validateNullValues = false;
int processedStandardizers = 0;
for (BaseFieldPolicy p : fieldPolicies) {
boolean isEmpty = ((result.getFieldValue() == null) || (StringUtils.isEmpty(result.getFieldValue().toString())));
if (p instanceof StandardizationPolicy) {
processedStandardizers++;
StandardizationPolicy standardizationPolicy = (StandardizationPolicy) p;
boolean shouldStandardize = true;
if (isEmpty && !(standardizationPolicy instanceof AcceptsEmptyValues)) {
shouldStandardize = false;
}
if (!standardizationPolicy.accepts(result.getFieldValue())) {
shouldStandardize = false;
}
if (shouldStandardize) {
Object newValue = result.getFieldValue();
try {
newValue = standardizationPolicy.convertRawValue(result.getFieldValue());
} catch (Exception e) {
log.error("Standardizer '{}' threw exception while attempting to standardize value, original value will be kept. Exception: {}", standardizationPolicy.getClass(), e);
}
// If this is the last standardizer for this field and the standardized value is returned as a String, and target column is not String, then validate and convert it to correct type
if (newValue != null && dataType.getConvertibleType() != newValue.getClass() && standardizerCount == processedStandardizers) {
try {
// Date and timestamp fields can be valid as strings
boolean isValueOk = dataType.isStringValueValidForHiveType(newValue.toString());
if (!isValueOk) {
// if the current string is not in a correct format attempt to convert it
try {
newValue = dataType.toNativeValue(newValue.toString());
} catch (RuntimeException e) {
result.addValidationResult(ValidationResult.failField("incompatible", dataType.getName(), "Not convertible to " + dataType.getNativeType()));
}
}
} catch (InvalidFormatException e) {
log.warn("Could not convert value {} to correct type {}", newValue.toString(), dataType.getConvertibleType().getName());
}
}
result.setFieldValue(newValue);
}
}
if (p instanceof ValidationPolicy) {
ValidationPolicy validationPolicy = (ValidationPolicy) p;
// not null validator
if (!isEmpty || validateNullValues || validationPolicy instanceof NotNullValidator) {
ValidationResult validationResult = validateValue(validationPolicy, dataType, result.getFieldValue(), validatorParamType);
if (isEmpty && validationPolicy instanceof NotNullValidator) {
validateNullValues = validationPolicy != VALID_RESULT;
}
// only need to add those that are invalid
if (validationResult != VALID_RESULT) {
result.addValidationResult(validationResult);
// exit out of processing if invalid records found.
break;
}
}
// reset the failOnEmpty flag back to false
if (!(validationPolicy instanceof NotNullValidator)) {
validateNullValues = false;
}
}
}
ValidationResult finalValidationCheck = finalValidationCheck(fieldPolicy, dataType, result.getFieldValue());
if (finalValidationCheck != VALID_RESULT) {
result.addValidationResult(finalValidationCheck);
}
return result;
}
use of com.thinkbiganalytics.policy.standardization.StandardizationPolicy in project kylo by Teradata.
the class FieldPolicyTransformer method getStandardizationAndValidationPolicies.
public List<BaseFieldPolicy> getStandardizationAndValidationPolicies() {
List<BaseFieldPolicy> policies = new ArrayList<>();
List<FieldStandardizationRule> standardization = uiFieldPolicy.getStandardization();
List<FieldValidationRule> validation = uiFieldPolicy.getValidation();
List<BaseUiPolicyRule> allUiPolicies = new ArrayList<>();
if (standardization != null) {
allUiPolicies.addAll(standardization);
}
if (validation != null) {
allUiPolicies.addAll(validation);
}
// ensure the sequence is set
int idx = 0;
for (BaseUiPolicyRule rule : allUiPolicies) {
if (rule.getSequence() == null) {
rule.setSequence(idx);
}
idx++;
}
Collections.sort(allUiPolicies, new Comparator<BaseUiPolicyRule>() {
@Override
public int compare(BaseUiPolicyRule o1, BaseUiPolicyRule o2) {
if (o1 == null && o2 == null) {
return 0;
}
if (o1 == null && o2 != null) {
return 1;
}
if (o1 != null && o2 == null) {
return -1;
}
Integer sq1 = o1.getSequence();
Integer sq2 = o2.getSequence();
return sq1.compareTo(sq2);
}
});
if (allUiPolicies != null) {
for (BaseUiPolicyRule rule : allUiPolicies) {
try {
if (rule instanceof FieldStandardizationRule) {
StandardizationPolicy policy = StandardizationAnnotationTransformer.instance().fromUiModel((FieldStandardizationRule) rule);
policies.add(policy);
if (listener != null) {
listener.onAddStandardizationPolicy(policy);
}
} else if (rule instanceof FieldValidationRule) {
ValidationPolicy policy = ValidatorAnnotationTransformer.instance().fromUiModel((FieldValidationRule) rule);
policies.add(policy);
if (listener != null) {
listener.onAddValidationPolicy(policy);
}
}
} catch (PolicyTransformException e) {
throw new RuntimeException(e);
}
}
}
return policies;
}
use of com.thinkbiganalytics.policy.standardization.StandardizationPolicy in project kylo by Teradata.
the class CleanseAndValidateRowTest method exceptionsShouldNotStopStandardization.
@Test
public void exceptionsShouldNotStopStandardization() {
StandardizationPolicy standardizer = EXCEPTION_POLICY;
String fieldName = "field1";
List<BaseFieldPolicy> policies = new ArrayList<>();
policies.add(standardizer);
FieldPolicy fieldPolicy = FieldPolicyBuilder.newBuilder().addPolicies(policies).tableName("emp").fieldName(fieldName).feedFieldName(fieldName).build();
HCatDataType fieldDataType = HCatDataType.createFromDataType(fieldName, "string");
StandardizationAndValidationResult result = validator.standardizeAndValidateField(fieldPolicy, "aafooaa", fieldDataType, new HashMap<Class, Class>());
assertEquals(result.getFieldValue(), "aafooaa");
}
Aggregations