Search in sources :

Example 1 with SimpleRegexReplacer

use of com.thinkbiganalytics.policy.standardization.SimpleRegexReplacer in project kylo by Teradata.

the class CleanseAndValidateRowTest method invalidStandardizeAndValidate.

@Test
public void invalidStandardizeAndValidate() {
    String fieldName = "field1";
    List<BaseFieldPolicy> policies = new ArrayList<>();
    policies.add(new SimpleRegexReplacer("(?i)foo", "bar"));
    policies.add(new LookupValidator("blah"));
    policies.add(new SimpleRegexReplacer("(?i)bar", "test"));
    policies.add(new LookupValidator("aatestaa"));
    FieldPolicy fieldPolicy = FieldPolicyBuilder.newBuilder().addPolicies(policies).tableName("emp").fieldName(fieldName).feedFieldName(fieldName).build();
    HCatDataType fieldDataType = HCatDataType.createFromDataType(fieldName, "string");
    StandardizationAndValidationResult result = validator.standardizeAndValidateField(fieldPolicy, "aafooaa", fieldDataType, new HashMap<Class, Class>());
    assertEquals("aabaraa", result.getFieldValue());
    assertNotEquals(StandardDataValidator.VALID_RESULT, result.getFinalValidationResult());
}
Also used : FieldPolicy(com.thinkbiganalytics.policy.FieldPolicy) BaseFieldPolicy(com.thinkbiganalytics.policy.BaseFieldPolicy) HCatDataType(com.thinkbiganalytics.spark.validation.HCatDataType) ArrayList(java.util.ArrayList) LookupValidator(com.thinkbiganalytics.policy.validation.LookupValidator) BaseFieldPolicy(com.thinkbiganalytics.policy.BaseFieldPolicy) SimpleRegexReplacer(com.thinkbiganalytics.policy.standardization.SimpleRegexReplacer) StandardizationAndValidationResult(com.thinkbiganalytics.spark.datavalidator.StandardizationAndValidationResult) Test(org.junit.Test)

Example 2 with SimpleRegexReplacer

use of com.thinkbiganalytics.policy.standardization.SimpleRegexReplacer in project kylo by Teradata.

the class CleanseAndValidateRowTest method standardizeRegex.

@Test
public void standardizeRegex() {
    SimpleRegexReplacer standardizer = new SimpleRegexReplacer("(?i)foo", "bar");
    String fieldName = "field1";
    List<BaseFieldPolicy> policies = new ArrayList<>();
    policies.add(standardizer);
    FieldPolicy fieldPolicy = FieldPolicyBuilder.newBuilder().addPolicies(policies).tableName("emp").fieldName(fieldName).feedFieldName(fieldName).build();
    HCatDataType fieldDataType = HCatDataType.createFromDataType(fieldName, "string");
    StandardizationAndValidationResult result = validator.standardizeAndValidateField(fieldPolicy, "aafooaa", fieldDataType, new HashMap<Class, Class>());
    assertEquals(result.getFieldValue(), "aabaraa");
    result = validator.standardizeAndValidateField(fieldPolicy, null, fieldDataType, new HashMap<Class, Class>());
    assertNull(result.getFieldValue());
    result = validator.standardizeAndValidateField(fieldPolicy, "", fieldDataType, new HashMap<Class, Class>());
    assertEquals(result.getFieldValue(), "");
}
Also used : FieldPolicy(com.thinkbiganalytics.policy.FieldPolicy) BaseFieldPolicy(com.thinkbiganalytics.policy.BaseFieldPolicy) HashMap(java.util.HashMap) HCatDataType(com.thinkbiganalytics.spark.validation.HCatDataType) ArrayList(java.util.ArrayList) SimpleRegexReplacer(com.thinkbiganalytics.policy.standardization.SimpleRegexReplacer) BaseFieldPolicy(com.thinkbiganalytics.policy.BaseFieldPolicy) StandardizationAndValidationResult(com.thinkbiganalytics.spark.datavalidator.StandardizationAndValidationResult) Test(org.junit.Test)

Example 3 with SimpleRegexReplacer

use of com.thinkbiganalytics.policy.standardization.SimpleRegexReplacer in project kylo by Teradata.

the class TestStandardizationTransform method testSimpleRegexReplacer.

@Test
public void testSimpleRegexReplacer() throws IOException {
    String regex = "\\p{Cc}";
    String replace = "REPLACE";
    SimpleRegexReplacer standardizer = new SimpleRegexReplacer(regex, replace);
    FieldStandardizationRule uiModel = StandardizationAnnotationTransformer.instance().toUIModel(standardizer);
    SimpleRegexReplacer convertedPolicy = fromUI(uiModel, SimpleRegexReplacer.class);
    Assert.assertEquals(regex, convertedPolicy.getPattern().pattern());
    Assert.assertEquals(replace, convertedPolicy.getReplacement());
    Assert.assertEquals(true, convertedPolicy.isValid());
}
Also used : SimpleRegexReplacer(com.thinkbiganalytics.policy.standardization.SimpleRegexReplacer) FieldStandardizationRule(com.thinkbiganalytics.policy.rest.model.FieldStandardizationRule) Test(org.junit.Test)

Example 4 with SimpleRegexReplacer

use of com.thinkbiganalytics.policy.standardization.SimpleRegexReplacer in project kylo by Teradata.

the class CleanseAndValidateRowTest method nullValueStandardizeAndValidate.

@Test
public void nullValueStandardizeAndValidate() {
    String fieldName = "field1";
    List<BaseFieldPolicy> policies = new ArrayList<>();
    policies.add(new SimpleRegexReplacer("(?i)foo", "bar"));
    policies.add(new LookupValidator("blah"));
    policies.add(new SimpleRegexReplacer("(?i)bar", "test"));
    policies.add(new LookupValidator("aatestaa"));
    FieldPolicy fieldPolicy = FieldPolicyBuilder.newBuilder().addPolicies(policies).tableName("emp").fieldName(fieldName).feedFieldName(fieldName).build();
    HCatDataType fieldDataType = HCatDataType.createFromDataType(fieldName, "string");
    StandardizationAndValidationResult result = validator.standardizeAndValidateField(fieldPolicy, null, fieldDataType, new HashMap<Class, Class>());
    assertEquals(StandardDataValidator.VALID_RESULT, result.getFinalValidationResult());
}
Also used : FieldPolicy(com.thinkbiganalytics.policy.FieldPolicy) BaseFieldPolicy(com.thinkbiganalytics.policy.BaseFieldPolicy) HCatDataType(com.thinkbiganalytics.spark.validation.HCatDataType) ArrayList(java.util.ArrayList) LookupValidator(com.thinkbiganalytics.policy.validation.LookupValidator) BaseFieldPolicy(com.thinkbiganalytics.policy.BaseFieldPolicy) SimpleRegexReplacer(com.thinkbiganalytics.policy.standardization.SimpleRegexReplacer) StandardizationAndValidationResult(com.thinkbiganalytics.spark.datavalidator.StandardizationAndValidationResult) Test(org.junit.Test)

Example 5 with SimpleRegexReplacer

use of com.thinkbiganalytics.policy.standardization.SimpleRegexReplacer in project kylo by Teradata.

the class CleanseAndValidateRowTest method standardizeAndValidate.

@Test
public void standardizeAndValidate() {
    String fieldName = "field1";
    List<BaseFieldPolicy> policies = new ArrayList<>();
    policies.add(new SimpleRegexReplacer("(?i)foo", "bar"));
    policies.add(new LookupValidator("aabaraa"));
    policies.add(new SimpleRegexReplacer("(?i)bar", "test"));
    policies.add(new LookupValidator("aatestaa"));
    FieldPolicy fieldPolicy = FieldPolicyBuilder.newBuilder().addPolicies(policies).tableName("emp").fieldName(fieldName).feedFieldName(fieldName).build();
    HCatDataType fieldDataType = HCatDataType.createFromDataType(fieldName, "string");
    StandardizationAndValidationResult result = validator.standardizeAndValidateField(fieldPolicy, "aafooaa", fieldDataType, new HashMap<Class, Class>());
    assertEquals(result.getFieldValue(), "aatestaa");
    assertEquals(StandardDataValidator.VALID_RESULT, result.getFinalValidationResult());
}
Also used : FieldPolicy(com.thinkbiganalytics.policy.FieldPolicy) BaseFieldPolicy(com.thinkbiganalytics.policy.BaseFieldPolicy) HCatDataType(com.thinkbiganalytics.spark.validation.HCatDataType) ArrayList(java.util.ArrayList) LookupValidator(com.thinkbiganalytics.policy.validation.LookupValidator) BaseFieldPolicy(com.thinkbiganalytics.policy.BaseFieldPolicy) SimpleRegexReplacer(com.thinkbiganalytics.policy.standardization.SimpleRegexReplacer) StandardizationAndValidationResult(com.thinkbiganalytics.spark.datavalidator.StandardizationAndValidationResult) Test(org.junit.Test)

Aggregations

SimpleRegexReplacer (com.thinkbiganalytics.policy.standardization.SimpleRegexReplacer)5 Test (org.junit.Test)5 BaseFieldPolicy (com.thinkbiganalytics.policy.BaseFieldPolicy)4 FieldPolicy (com.thinkbiganalytics.policy.FieldPolicy)4 StandardizationAndValidationResult (com.thinkbiganalytics.spark.datavalidator.StandardizationAndValidationResult)4 HCatDataType (com.thinkbiganalytics.spark.validation.HCatDataType)4 ArrayList (java.util.ArrayList)4 LookupValidator (com.thinkbiganalytics.policy.validation.LookupValidator)3 FieldStandardizationRule (com.thinkbiganalytics.policy.rest.model.FieldStandardizationRule)1 HashMap (java.util.HashMap)1