use of com.thinkbiganalytics.policy.BaseFieldPolicy in project kylo by Teradata.
the class CleanseAndValidateRow method standardizeAndValidateField.
StandardizationAndValidationResult standardizeAndValidateField(FieldPolicy fieldPolicy, Object value, HCatDataType dataType, Map<Class, Class> validatorParamType) {
StandardizationAndValidationResult result = new StandardizationAndValidationResult(value);
List<BaseFieldPolicy> fieldPolicies = fieldPolicy.getAllPolicies();
int standardizerCount = 0;
for (BaseFieldPolicy p : fieldPolicies) {
if (p instanceof StandardizationPolicy) {
standardizerCount++;
}
}
boolean validateNullValues = false;
int processedStandardizers = 0;
for (BaseFieldPolicy p : fieldPolicies) {
boolean isEmpty = ((result.getFieldValue() == null) || (StringUtils.isEmpty(result.getFieldValue().toString())));
if (p instanceof StandardizationPolicy) {
processedStandardizers++;
StandardizationPolicy standardizationPolicy = (StandardizationPolicy) p;
boolean shouldStandardize = true;
if (isEmpty && !(standardizationPolicy instanceof AcceptsEmptyValues)) {
shouldStandardize = false;
}
if (!standardizationPolicy.accepts(result.getFieldValue())) {
shouldStandardize = false;
}
if (shouldStandardize) {
Object newValue = result.getFieldValue();
try {
newValue = standardizationPolicy.convertRawValue(result.getFieldValue());
} catch (Exception e) {
log.error("Standardizer '{}' threw exception while attempting to standardize value, original value will be kept. Exception: {}", standardizationPolicy.getClass(), e);
}
// If this is the last standardizer for this field and the standardized value is returned as a String, and target column is not String, then validate and convert it to correct type
if (newValue != null && dataType.getConvertibleType() != newValue.getClass() && standardizerCount == processedStandardizers) {
try {
// Date and timestamp fields can be valid as strings
boolean isValueOk = dataType.isStringValueValidForHiveType(newValue.toString());
if (!isValueOk) {
// if the current string is not in a correct format attempt to convert it
try {
newValue = dataType.toNativeValue(newValue.toString());
} catch (RuntimeException e) {
result.addValidationResult(ValidationResult.failField("incompatible", dataType.getName(), "Not convertible to " + dataType.getNativeType()));
}
}
} catch (InvalidFormatException e) {
log.warn("Could not convert value {} to correct type {}", newValue.toString(), dataType.getConvertibleType().getName());
}
}
result.setFieldValue(newValue);
}
}
if (p instanceof ValidationPolicy) {
ValidationPolicy validationPolicy = (ValidationPolicy) p;
// not null validator
if (!isEmpty || validateNullValues || validationPolicy instanceof NotNullValidator) {
ValidationResult validationResult = validateValue(validationPolicy, dataType, result.getFieldValue(), validatorParamType);
if (isEmpty && validationPolicy instanceof NotNullValidator) {
validateNullValues = validationPolicy != VALID_RESULT;
}
// only need to add those that are invalid
if (validationResult != VALID_RESULT) {
result.addValidationResult(validationResult);
// exit out of processing if invalid records found.
break;
}
}
// reset the failOnEmpty flag back to false
if (!(validationPolicy instanceof NotNullValidator)) {
validateNullValues = false;
}
}
}
ValidationResult finalValidationCheck = finalValidationCheck(fieldPolicy, dataType, result.getFieldValue());
if (finalValidationCheck != VALID_RESULT) {
result.addValidationResult(finalValidationCheck);
}
return result;
}
use of com.thinkbiganalytics.policy.BaseFieldPolicy in project kylo by Teradata.
the class CleanseAndValidateRowTest method rangeValidate.
private ValidationResult rangeValidate(Number min, Number max, String dataType, String value) {
RangeValidator validatorPolicy = new RangeValidator(min, max);
List<BaseFieldPolicy> policies = new ArrayList<>();
policies.add(validatorPolicy);
FieldPolicy fieldPolicy = FieldPolicyBuilder.newBuilder().addPolicies(policies).tableName("emp").fieldName("field1").feedFieldName("field1").addPolicies(policies).build();
StandardizationAndValidationResult result = validator.standardizeAndValidateField(fieldPolicy, value, HCatDataType.createFromDataType("field1", dataType), new HashMap<Class, Class>());
return result.getFinalValidationResult();
}
use of com.thinkbiganalytics.policy.BaseFieldPolicy in project kylo by Teradata.
the class CleanseAndValidateRowTest method mixedStandardizeAndValidate.
@Test
public void mixedStandardizeAndValidate() {
String fieldValue = "TeSt_fiELd";
String fieldName = "field1";
List<BaseFieldPolicy> policies = new ArrayList<>();
policies.add(UppercaseStandardizer.instance());
policies.add(new CharacterValidator("UPPERCASE"));
policies.add(LowercaseStandardizer.instance());
policies.add(new CharacterValidator("LOWERCASE"));
policies.add(UppercaseStandardizer.instance());
policies.add(new CharacterValidator("UPPERCASE"));
policies.add(LowercaseStandardizer.instance());
policies.add(new CharacterValidator("LOWERCASE"));
FieldPolicy fieldPolicy = FieldPolicyBuilder.newBuilder().addPolicies(policies).tableName("emp").fieldName(fieldName).feedFieldName(fieldName).build();
HCatDataType fieldDataType = HCatDataType.createFromDataType(fieldName, "string");
StandardizationAndValidationResult result = validator.standardizeAndValidateField(fieldPolicy, fieldValue, fieldDataType, new HashMap<Class, Class>());
assertEquals(StandardDataValidator.VALID_RESULT, result.getFinalValidationResult());
assertEquals("test_field", result.getFieldValue());
}
use of com.thinkbiganalytics.policy.BaseFieldPolicy in project kylo by Teradata.
the class CleanseAndValidateRowTest method invalidStandardizeAndValidate.
@Test
public void invalidStandardizeAndValidate() {
String fieldName = "field1";
List<BaseFieldPolicy> policies = new ArrayList<>();
policies.add(new SimpleRegexReplacer("(?i)foo", "bar"));
policies.add(new LookupValidator("blah"));
policies.add(new SimpleRegexReplacer("(?i)bar", "test"));
policies.add(new LookupValidator("aatestaa"));
FieldPolicy fieldPolicy = FieldPolicyBuilder.newBuilder().addPolicies(policies).tableName("emp").fieldName(fieldName).feedFieldName(fieldName).build();
HCatDataType fieldDataType = HCatDataType.createFromDataType(fieldName, "string");
StandardizationAndValidationResult result = validator.standardizeAndValidateField(fieldPolicy, "aafooaa", fieldDataType, new HashMap<Class, Class>());
assertEquals("aabaraa", result.getFieldValue());
assertNotEquals(StandardDataValidator.VALID_RESULT, result.getFinalValidationResult());
}
use of com.thinkbiganalytics.policy.BaseFieldPolicy in project kylo by Teradata.
the class CleanseAndValidateRowTest method standardizeRegex.
@Test
public void standardizeRegex() {
SimpleRegexReplacer standardizer = new SimpleRegexReplacer("(?i)foo", "bar");
String fieldName = "field1";
List<BaseFieldPolicy> policies = new ArrayList<>();
policies.add(standardizer);
FieldPolicy fieldPolicy = FieldPolicyBuilder.newBuilder().addPolicies(policies).tableName("emp").fieldName(fieldName).feedFieldName(fieldName).build();
HCatDataType fieldDataType = HCatDataType.createFromDataType(fieldName, "string");
StandardizationAndValidationResult result = validator.standardizeAndValidateField(fieldPolicy, "aafooaa", fieldDataType, new HashMap<Class, Class>());
assertEquals(result.getFieldValue(), "aabaraa");
result = validator.standardizeAndValidateField(fieldPolicy, null, fieldDataType, new HashMap<Class, Class>());
assertNull(result.getFieldValue());
result = validator.standardizeAndValidateField(fieldPolicy, "", fieldDataType, new HashMap<Class, Class>());
assertEquals(result.getFieldValue(), "");
}
Aggregations