Search in sources :

Example 1 with InvalidFormatException

use of com.thinkbiganalytics.spark.util.InvalidFormatException in project kylo by Teradata.

the class HCatDataType method isValueConvertibleToType.

public boolean isValueConvertibleToType(Object val, boolean enforcePrecision) {
    try {
        if (val instanceof String) {
            String strVal = (String) val;
            if (strVal != null && !isnumeric) {
                if (convertibleType == Timestamp.class) {
                    return new TimestampValidator(true).validate(strVal);
                } else if (convertibleType == Date.class) {
                    return DateValidator.instance().validate(strVal);
                } else if (convertibleType == byte[].class) {
                    return true;
                }
            }
            Comparable nativeValue = toNativeValue(strVal);
            if (nativeValue != null) {
                if (isnumeric) {
                    if (min != null && min.compareTo(nativeValue) > 0) {
                        return false;
                    }
                    if (max != null && max.compareTo(nativeValue) < 0) {
                        return false;
                    }
                    if (digits != null && !(!enforcePrecision || validatePrecision(nativeValue))) {
                        return false;
                    }
                } else if (isstring && strVal.length() > maxlength) {
                    return false;
                }
            }
        } else {
            return val == null || val.getClass() == convertibleType || val instanceof Number && Number.class.isAssignableFrom(convertibleType);
        }
    } catch (InvalidFormatException | ClassCastException | IllegalArgumentException e) {
        return false;
    }
    return true;
}
Also used : TimestampValidator(com.thinkbiganalytics.policy.validation.TimestampValidator) InvalidFormatException(com.thinkbiganalytics.spark.util.InvalidFormatException) Date(java.sql.Date)

Example 2 with InvalidFormatException

use of com.thinkbiganalytics.spark.util.InvalidFormatException in project kylo by Teradata.

the class CleanseAndValidateRow method standardizeAndValidateField.

StandardizationAndValidationResult standardizeAndValidateField(FieldPolicy fieldPolicy, Object value, HCatDataType dataType, Map<Class, Class> validatorParamType) {
    StandardizationAndValidationResult result = new StandardizationAndValidationResult(value);
    List<BaseFieldPolicy> fieldPolicies = fieldPolicy.getAllPolicies();
    int standardizerCount = 0;
    for (BaseFieldPolicy p : fieldPolicies) {
        if (p instanceof StandardizationPolicy) {
            standardizerCount++;
        }
    }
    boolean validateNullValues = false;
    int processedStandardizers = 0;
    for (BaseFieldPolicy p : fieldPolicies) {
        boolean isEmpty = ((result.getFieldValue() == null) || (StringUtils.isEmpty(result.getFieldValue().toString())));
        if (p instanceof StandardizationPolicy) {
            processedStandardizers++;
            StandardizationPolicy standardizationPolicy = (StandardizationPolicy) p;
            boolean shouldStandardize = true;
            if (isEmpty && !(standardizationPolicy instanceof AcceptsEmptyValues)) {
                shouldStandardize = false;
            }
            if (!standardizationPolicy.accepts(result.getFieldValue())) {
                shouldStandardize = false;
            }
            if (shouldStandardize) {
                Object newValue = result.getFieldValue();
                try {
                    newValue = standardizationPolicy.convertRawValue(result.getFieldValue());
                } catch (Exception e) {
                    log.error("Standardizer '{}' threw exception while attempting to standardize value, original value will be kept. Exception: {}", standardizationPolicy.getClass(), e);
                }
                // If this is the last standardizer for this field and the standardized value is returned as a String, and target column is not String, then validate and convert it to correct type
                if (newValue != null && dataType.getConvertibleType() != newValue.getClass() && standardizerCount == processedStandardizers) {
                    try {
                        // Date and timestamp fields can be valid as strings
                        boolean isValueOk = dataType.isStringValueValidForHiveType(newValue.toString());
                        if (!isValueOk) {
                            // if the current string is not in a correct format attempt to convert it
                            try {
                                newValue = dataType.toNativeValue(newValue.toString());
                            } catch (RuntimeException e) {
                                result.addValidationResult(ValidationResult.failField("incompatible", dataType.getName(), "Not convertible to " + dataType.getNativeType()));
                            }
                        }
                    } catch (InvalidFormatException e) {
                        log.warn("Could not convert value {} to correct type {}", newValue.toString(), dataType.getConvertibleType().getName());
                    }
                }
                result.setFieldValue(newValue);
            }
        }
        if (p instanceof ValidationPolicy) {
            ValidationPolicy validationPolicy = (ValidationPolicy) p;
            // not null validator
            if (!isEmpty || validateNullValues || validationPolicy instanceof NotNullValidator) {
                ValidationResult validationResult = validateValue(validationPolicy, dataType, result.getFieldValue(), validatorParamType);
                if (isEmpty && validationPolicy instanceof NotNullValidator) {
                    validateNullValues = validationPolicy != VALID_RESULT;
                }
                // only need to add those that are invalid
                if (validationResult != VALID_RESULT) {
                    result.addValidationResult(validationResult);
                    // exit out of processing if invalid records found.
                    break;
                }
            }
            // reset the failOnEmpty flag back to false
            if (!(validationPolicy instanceof NotNullValidator)) {
                validateNullValues = false;
            }
        }
    }
    ValidationResult finalValidationCheck = finalValidationCheck(fieldPolicy, dataType, result.getFieldValue());
    if (finalValidationCheck != VALID_RESULT) {
        result.addValidationResult(finalValidationCheck);
    }
    return result;
}
Also used : NotNullValidator(com.thinkbiganalytics.policy.validation.NotNullValidator) AcceptsEmptyValues(com.thinkbiganalytics.policy.standardization.AcceptsEmptyValues) StandardizationAndValidationResult(com.thinkbiganalytics.spark.datavalidator.StandardizationAndValidationResult) ValidationResult(com.thinkbiganalytics.policy.validation.ValidationResult) InvalidFormatException(com.thinkbiganalytics.spark.util.InvalidFormatException) BaseFieldPolicy(com.thinkbiganalytics.policy.BaseFieldPolicy) InvalidFormatException(com.thinkbiganalytics.spark.util.InvalidFormatException) StandardizationAndValidationResult(com.thinkbiganalytics.spark.datavalidator.StandardizationAndValidationResult) ValidationPolicy(com.thinkbiganalytics.policy.validation.ValidationPolicy) StandardizationPolicy(com.thinkbiganalytics.policy.standardization.StandardizationPolicy)

Aggregations

InvalidFormatException (com.thinkbiganalytics.spark.util.InvalidFormatException)2 BaseFieldPolicy (com.thinkbiganalytics.policy.BaseFieldPolicy)1 AcceptsEmptyValues (com.thinkbiganalytics.policy.standardization.AcceptsEmptyValues)1 StandardizationPolicy (com.thinkbiganalytics.policy.standardization.StandardizationPolicy)1 NotNullValidator (com.thinkbiganalytics.policy.validation.NotNullValidator)1 TimestampValidator (com.thinkbiganalytics.policy.validation.TimestampValidator)1 ValidationPolicy (com.thinkbiganalytics.policy.validation.ValidationPolicy)1 ValidationResult (com.thinkbiganalytics.policy.validation.ValidationResult)1 StandardizationAndValidationResult (com.thinkbiganalytics.spark.datavalidator.StandardizationAndValidationResult)1 Date (java.sql.Date)1