Search in sources :

Example 11 with FieldPolicy

use of com.thinkbiganalytics.policy.FieldPolicy in project kylo by Teradata.

the class CleanseAndValidateRowTest method standardizeAndValidate.

@Test
public void standardizeAndValidate() {
    String fieldName = "field1";
    List<BaseFieldPolicy> policies = new ArrayList<>();
    policies.add(new SimpleRegexReplacer("(?i)foo", "bar"));
    policies.add(new LookupValidator("aabaraa"));
    policies.add(new SimpleRegexReplacer("(?i)bar", "test"));
    policies.add(new LookupValidator("aatestaa"));
    FieldPolicy fieldPolicy = FieldPolicyBuilder.newBuilder().addPolicies(policies).tableName("emp").fieldName(fieldName).feedFieldName(fieldName).build();
    HCatDataType fieldDataType = HCatDataType.createFromDataType(fieldName, "string");
    StandardizationAndValidationResult result = validator.standardizeAndValidateField(fieldPolicy, "aafooaa", fieldDataType, new HashMap<Class, Class>());
    assertEquals(result.getFieldValue(), "aatestaa");
    assertEquals(StandardDataValidator.VALID_RESULT, result.getFinalValidationResult());
}
Also used : FieldPolicy(com.thinkbiganalytics.policy.FieldPolicy) BaseFieldPolicy(com.thinkbiganalytics.policy.BaseFieldPolicy) HCatDataType(com.thinkbiganalytics.spark.validation.HCatDataType) ArrayList(java.util.ArrayList) LookupValidator(com.thinkbiganalytics.policy.validation.LookupValidator) BaseFieldPolicy(com.thinkbiganalytics.policy.BaseFieldPolicy) SimpleRegexReplacer(com.thinkbiganalytics.policy.standardization.SimpleRegexReplacer) StandardizationAndValidationResult(com.thinkbiganalytics.spark.datavalidator.StandardizationAndValidationResult) Test(org.junit.Test)

Example 12 with FieldPolicy

use of com.thinkbiganalytics.policy.FieldPolicy in project kylo by Teradata.

the class CleanseAndValidateRowTest method nullValueStandardizeAndValidate.

@Test
public void nullValueStandardizeAndValidate() {
    String fieldName = "field1";
    List<BaseFieldPolicy> policies = new ArrayList<>();
    policies.add(new SimpleRegexReplacer("(?i)foo", "bar"));
    policies.add(new LookupValidator("blah"));
    policies.add(new SimpleRegexReplacer("(?i)bar", "test"));
    policies.add(new LookupValidator("aatestaa"));
    FieldPolicy fieldPolicy = FieldPolicyBuilder.newBuilder().addPolicies(policies).tableName("emp").fieldName(fieldName).feedFieldName(fieldName).build();
    HCatDataType fieldDataType = HCatDataType.createFromDataType(fieldName, "string");
    StandardizationAndValidationResult result = validator.standardizeAndValidateField(fieldPolicy, null, fieldDataType, new HashMap<Class, Class>());
    assertEquals(StandardDataValidator.VALID_RESULT, result.getFinalValidationResult());
}
Also used : FieldPolicy(com.thinkbiganalytics.policy.FieldPolicy) BaseFieldPolicy(com.thinkbiganalytics.policy.BaseFieldPolicy) HCatDataType(com.thinkbiganalytics.spark.validation.HCatDataType) ArrayList(java.util.ArrayList) LookupValidator(com.thinkbiganalytics.policy.validation.LookupValidator) BaseFieldPolicy(com.thinkbiganalytics.policy.BaseFieldPolicy) SimpleRegexReplacer(com.thinkbiganalytics.policy.standardization.SimpleRegexReplacer) StandardizationAndValidationResult(com.thinkbiganalytics.spark.datavalidator.StandardizationAndValidationResult) Test(org.junit.Test)

Example 13 with FieldPolicy

use of com.thinkbiganalytics.policy.FieldPolicy in project kylo by Teradata.

the class CleanseAndValidateRowTest method exceptionsShouldNotStopStandardization.

@Test
public void exceptionsShouldNotStopStandardization() {
    StandardizationPolicy standardizer = EXCEPTION_POLICY;
    String fieldName = "field1";
    List<BaseFieldPolicy> policies = new ArrayList<>();
    policies.add(standardizer);
    FieldPolicy fieldPolicy = FieldPolicyBuilder.newBuilder().addPolicies(policies).tableName("emp").fieldName(fieldName).feedFieldName(fieldName).build();
    HCatDataType fieldDataType = HCatDataType.createFromDataType(fieldName, "string");
    StandardizationAndValidationResult result = validator.standardizeAndValidateField(fieldPolicy, "aafooaa", fieldDataType, new HashMap<Class, Class>());
    assertEquals(result.getFieldValue(), "aafooaa");
}
Also used : FieldPolicy(com.thinkbiganalytics.policy.FieldPolicy) BaseFieldPolicy(com.thinkbiganalytics.policy.BaseFieldPolicy) StandardizationPolicy(com.thinkbiganalytics.policy.standardization.StandardizationPolicy) HCatDataType(com.thinkbiganalytics.spark.validation.HCatDataType) ArrayList(java.util.ArrayList) BaseFieldPolicy(com.thinkbiganalytics.policy.BaseFieldPolicy) StandardizationAndValidationResult(com.thinkbiganalytics.spark.datavalidator.StandardizationAndValidationResult) Test(org.junit.Test)

Example 14 with FieldPolicy

use of com.thinkbiganalytics.policy.FieldPolicy in project kylo by Teradata.

the class CleanseAndValidateRowTest method standardizeShouldNotChangeType.

@Test
public void standardizeShouldNotChangeType() {
    String fieldName = "field1";
    List<BaseFieldPolicy> policies = new ArrayList<>();
    policies.add(ADD_ONE_STANDARDISATION_POLICY);
    policies.add(ADD_ONE_STANDARDISATION_POLICY);
    FieldPolicy fieldPolicy = FieldPolicyBuilder.newBuilder().addPolicies(policies).tableName("temp").fieldName(fieldName).feedFieldName(fieldName).build();
    HCatDataType fieldDataType = HCatDataType.createFromDataType(fieldName, "int");
    StandardizationAndValidationResult result = validator.standardizeAndValidateField(fieldPolicy, 0, fieldDataType, new HashMap<Class, Class>());
    assertEquals(2, result.getFieldValue());
    assertEquals(StandardDataValidator.VALID_RESULT, result.getFinalValidationResult());
}
Also used : FieldPolicy(com.thinkbiganalytics.policy.FieldPolicy) BaseFieldPolicy(com.thinkbiganalytics.policy.BaseFieldPolicy) HCatDataType(com.thinkbiganalytics.spark.validation.HCatDataType) ArrayList(java.util.ArrayList) BaseFieldPolicy(com.thinkbiganalytics.policy.BaseFieldPolicy) StandardizationAndValidationResult(com.thinkbiganalytics.spark.datavalidator.StandardizationAndValidationResult) Test(org.junit.Test)

Example 15 with FieldPolicy

use of com.thinkbiganalytics.policy.FieldPolicy in project kylo by Teradata.

the class CleanseAndValidateRowTest method notNullValidate.

private ValidationResult notNullValidate(String dataType, String value, boolean allowEmptyString, boolean trimString) {
    NotNullValidator validatorPolicy = new NotNullValidator(allowEmptyString, trimString);
    List<BaseFieldPolicy> policies = new ArrayList<>();
    policies.add(validatorPolicy);
    FieldPolicy fieldPolicy = FieldPolicyBuilder.newBuilder().addPolicies(policies).tableName("emp").fieldName("field1").feedFieldName("field1").build();
    StandardizationAndValidationResult result = validator.standardizeAndValidateField(fieldPolicy, value, HCatDataType.createFromDataType("field1", dataType), new HashMap<Class, Class>());
    return result.getFinalValidationResult();
}
Also used : FieldPolicy(com.thinkbiganalytics.policy.FieldPolicy) BaseFieldPolicy(com.thinkbiganalytics.policy.BaseFieldPolicy) ArrayList(java.util.ArrayList) NotNullValidator(com.thinkbiganalytics.policy.validation.NotNullValidator) BaseFieldPolicy(com.thinkbiganalytics.policy.BaseFieldPolicy) StandardizationAndValidationResult(com.thinkbiganalytics.spark.datavalidator.StandardizationAndValidationResult)

Aggregations

FieldPolicy (com.thinkbiganalytics.policy.FieldPolicy)16 ArrayList (java.util.ArrayList)12 BaseFieldPolicy (com.thinkbiganalytics.policy.BaseFieldPolicy)10 StandardizationAndValidationResult (com.thinkbiganalytics.spark.datavalidator.StandardizationAndValidationResult)10 HCatDataType (com.thinkbiganalytics.spark.validation.HCatDataType)9 Test (org.junit.Test)7 SimpleRegexReplacer (com.thinkbiganalytics.policy.standardization.SimpleRegexReplacer)4 LookupValidator (com.thinkbiganalytics.policy.validation.LookupValidator)3 HashMap (java.util.HashMap)3 StructField (org.apache.spark.sql.types.StructField)3 Nonnull (javax.annotation.Nonnull)2 FieldPoliciesJsonTransformer (com.thinkbiganalytics.policy.FieldPoliciesJsonTransformer)1 StandardizationPolicy (com.thinkbiganalytics.policy.standardization.StandardizationPolicy)1 CharacterValidator (com.thinkbiganalytics.policy.validation.CharacterValidator)1 NotNullValidator (com.thinkbiganalytics.policy.validation.NotNullValidator)1 RangeValidator (com.thinkbiganalytics.policy.validation.RangeValidator)1 ValidationResult (com.thinkbiganalytics.policy.validation.ValidationResult)1 DataSet (com.thinkbiganalytics.spark.DataSet)1 CleansedRowResult (com.thinkbiganalytics.spark.datavalidator.CleansedRowResult)1 FieldPolicyLoader (com.thinkbiganalytics.spark.policy.FieldPolicyLoader)1