use of org.talend.dataquality.domain.pattern.Pattern in project tdq-studio-se by Talend.
the class DbmsLanguage method getRegexPatternString.
/**
* Method "getRegexPatternString".
*
* @param indicator
* @return the regular expression or null if none was found
*/
public String getRegexPatternString(Indicator indicator) {
if (indicator instanceof PatternMatchingIndicator || (indicator instanceof UserDefIndicator && IndicatorCategoryHelper.isUserDefMatching(IndicatorCategoryHelper.getCategory(indicator.getIndicatorDefinition())))) {
IndicatorParameters parameters = indicator.getParameters();
if (parameters == null) {
return null;
}
Domain dataValidDomain = parameters.getDataValidDomain();
if (dataValidDomain == null) {
return null;
}
EList<Pattern> patterns = dataValidDomain.getPatterns();
for (Pattern pattern : patterns) {
Expression expression = this.getRegexp(pattern);
return expression == null ? null : expression.getBody();
}
}
return null;
}
use of org.talend.dataquality.domain.pattern.Pattern in project tdq-studio-se by Talend.
the class AnalysisExecutorHelper method checkMatchingIndicator.
/**
* DOC zhao Comment method "checkMatchingIndicator".
*
* @param indicator
* @return
*/
private static ReturnCode checkMatchingIndicator(Indicator indicator) {
ReturnCode rc = new ReturnCode(Boolean.TRUE);
Domain domain = indicator.getParameters().getDataValidDomain();
if (!domain.getBuiltInPatterns().isEmpty()) {
return rc;
}
List<Pattern> patterns = domain.getPatterns();
// check pattern matching indicator files' existence.
if (!patterns.isEmpty() && isDependentFileExist(patterns.toArray(new Pattern[patterns.size()]))) {
// Hot copy the pattern from separate file into built in.
hotCopyPatterns(indicator, patterns);
} else {
List<Pattern> builtInPatterns = indicator.getParameters().getDataValidDomain().getBuiltInPatterns();
if (builtInPatterns.isEmpty()) {
// $NON-NLS-1$
rc.setMessage(Messages.getString("AnalysisExecutor.BuiltInNoPatterns"));
rc.setOk(false);
return rc;
} else {
// Use built-in pattern instead.
patterns.clear();
}
}
return rc;
}
use of org.talend.dataquality.domain.pattern.Pattern in project tdq-studio-se by Talend.
the class ModelElementIndicatorRule method patternRule.
public static boolean patternRule(IndicatorEnum indicatorType, ModelElement me, ExecutionLanguage language, IIndicatorNode node) {
int javaType = 0;
boolean isDeliFileColumn = !(me instanceof TdColumn) && me instanceof MetadataColumn;
int isTeradataInterval = -1;
if (me instanceof TdColumn) {
javaType = ((TdColumn) me).getSqlDataType().getJavaDataType();
// Added yyin 20121211 TDQ-6099:
isTeradataInterval = Java2SqlType.isTeradataIntervalType(((TdColumn) me).getSqlDataType().getName());
// ~
} else if (isDeliFileColumn) {
javaType = TalendTypeConvert.convertToJDBCType(((MetadataColumn) me).getTalendType());
}
DataminingType dataminingType = MetadataHelper.getDataminingType(me);
if (dataminingType == null || isDeliFileColumn) {
dataminingType = MetadataHelper.getDefaultDataminingType(javaType);
}
// MOD qiongli 2012-4-25 TDQ-2699
Connection connection = null;
if (me instanceof TdColumn) {
connection = ConnectionHelper.getTdDataProvider((TdColumn) me);
} else if (me instanceof MetadataColumn) {
connection = ConnectionHelper.getTdDataProvider((MetadataColumn) me);
}
Indicator indicator = null;
if (node != null) {
indicator = node.getIndicatorInstance();
}
boolean isSQLEngine = ExecutionLanguage.SQL.equals(language);
boolean isJavaEngine = ExecutionLanguage.JAVA.equals(language);
DbmsLanguage dbmsLanguage = DbmsLanguageFactory.createDbmsLanguage(connection, language);
if (javaType == Types.LONGVARCHAR && isSQLEngine) {
if (connection != null && ConnectionHelper.isDb2(connection)) {
return enableLongVarchar(indicatorType, dataminingType, me);
}
}
// MOD qiongli 2012-8-10 TDQ-5907 need to disabled indicators for hive with sql engine.
boolean isHiveSQL = connection == null ? false : ConnectionHelper.isHive(connection) && isSQLEngine;
// MOD msjian 2013-5-15 TDQ-7275 need to disabled indicators for teradata with sql engine.
boolean isTeradataSQL = connection == null ? false : ConnectionHelper.isTeradata(connection) && isSQLEngine;
// MOD msjian 2016-8-25 TDQ-12349 need to disabled indicators for ingres with sql engine.
boolean isIngres = connection == null ? false : ConnectionHelper.isIngress(connection) && isSQLEngine;
// MOD msjian 2016-8-25 TDQ-12464 need to disabled indicators for ingres with sql engine.
boolean isSybase = connection == null ? false : ConnectionHelper.isSybase(connection) && isSQLEngine;
// MOD qiongli 2013-8-27 TDQ-2104 disabled soundex indicators for hive with sql engine.
boolean isVerticaSQL = connection == null ? false : ConnectionHelper.isVertica(connection) && isSQLEngine;
switch(indicatorType) {
case CountsIndicatorEnum:
case RowCountIndicatorEnum:
case NullCountIndicatorEnum:
case DistinctCountIndicatorEnum:
case UniqueIndicatorEnum:
case DuplicateCountIndicatorEnum:
// if (dataminingType == DataminingType.NOMINAL) {
return true;
case DefValueCountIndicatorEnum:
Expression initialValue = null;
if (me instanceof TdColumn) {
initialValue = ((TdColumn) me).getInitialValue();
}
if (initialValue != null && initialValue.getBody() != null) {
// non nullable numeric column give a non null default value as ''
return initialValue.getBody().length() != 0 || Java2SqlType.isTextInSQL(javaType);
}
break;
case BlankCountIndicatorEnum:
// MOD xwang 2011-07-29 bug TDQ-1731 disable blank count checkable for other data type but Text
if (!Java2SqlType.isTextInSQL(javaType)) {
return false;
} else if (isTeradataInterval == Java2SqlType.TERADATA_INTERVAL_TO && isSQLEngine) {
// Added yyin 20121212 TDQ-6099: disable for Teradata's interval_xx_to_xx type.
return false;
} else {
return true;
}
case TextIndicatorEnum:
case MinLengthIndicatorEnum:
case MinLengthWithNullIndicatorEnum:
case MinLengthWithBlankIndicatorEnum:
case MinLengthWithBlankNullIndicatorEnum:
case MaxLengthIndicatorEnum:
case MaxLengthWithNullIndicatorEnum:
case MaxLengthWithBlankIndicatorEnum:
case MaxLengthWithBlankNullIndicatorEnum:
case AverageLengthIndicatorEnum:
case AverageLengthWithNullIndicatorEnum:
case AverageLengthWithBlankIndicatorEnum:
case AverageLengthWithNullBlankIndicatorEnum:
if (Java2SqlType.isTextInSQL(javaType)) {
// Added yyin 20121212 TDQ-6099: disable for Teradata's interval_xx_to_xx type.
if (isTeradataInterval > 0) {
return false;
}
if (dataminingType == DataminingType.NOMINAL || dataminingType == DataminingType.UNSTRUCTURED_TEXT) {
return true;
}
}
break;
case EastAsiaPatternFreqIndicatorEnum:
case EastAsiaPatternLowFreqIndicatorEnum:
if (isSQLEngine && isEmpryExpression(indicator, dbmsLanguage)) {
return false;
} else if (isJavaEngine) {
return true;
}
case BenfordLawFrequencyIndicatorEnum:
// disable the benford for interval type: both sql and java
if (isTeradataInterval > 0) {
return false;
}
case PatternFreqIndicatorEnum:
case PatternLowFreqIndicatorEnum:
if (isTeradataSQL || isIngres || isSybase) {
return false;
}
case ModeIndicatorEnum:
// Added yyin 20121212 TDQ-6099: disable for Teradata's interval_xx_to_xx type.
if (isTeradataInterval == Java2SqlType.TERADATA_INTERVAL_TO && isSQLEngine) {
return false;
}
case FrequencyIndicatorEnum:
case LowFrequencyIndicatorEnum:
if (dataminingType == DataminingType.NOMINAL || dataminingType == DataminingType.INTERVAL) {
return true;
}
break;
// MOD zshen 2010-01-27 Date Pattern frequency indicator
case DatePatternFreqIndicatorEnum:
if (isJavaEngine && (Java2SqlType.isDateInSQL(javaType) || Java2SqlType.isTextInSQL(javaType))) {
return true;
}
break;
// MOD mzhao 2009-03-05 Soundex frequency indicator
case SoundexIndicatorEnum:
case SoundexLowIndicatorEnum:
if (!Java2SqlType.isDateInSQL(javaType) && !Java2SqlType.isNumbericInSQL(javaType) && (dataminingType == DataminingType.NOMINAL || dataminingType == DataminingType.INTERVAL)) {
if (isHiveSQL || isVerticaSQL) {
return false;
}
// Added yyin 20121212 TDQ-6099: disable for Teradata's interval_xx_to_xx type.
if (isTeradataInterval > 0 && isSQLEngine) {
return false;
}
return true;
}
break;
case MeanIndicatorEnum:
case MedianIndicatorEnum:
case IQRIndicatorEnum:
case LowerQuartileIndicatorEnum:
case UpperQuartileIndicatorEnum:
// graphics and database yet.
if (Java2SqlType.isNumbericInSQL(javaType)) /* || Java2SqlType.isDateInSQL(javaType) */
{
if (dataminingType == DataminingType.INTERVAL) {
if (isHiveSQL && !(indicatorType == IndicatorEnum.MeanIndicatorEnum)) {
return false;
}
return true;
}
}
break;
case BoxIIndicatorEnum:
case RangeIndicatorEnum:
case MinValueIndicatorEnum:
case MaxValueIndicatorEnum:
// the graphics and database yet.
if (Java2SqlType.isNumbericInSQL(javaType) || Java2SqlType.isDateInSQL(javaType)) {
if (dataminingType == DataminingType.INTERVAL) {
return true;
}
}
break;
case DateFrequencyIndicatorEnum:
case WeekFrequencyIndicatorEnum:
case MonthFrequencyIndicatorEnum:
case QuarterFrequencyIndicatorEnum:
case YearFrequencyIndicatorEnum:
case DateLowFrequencyIndicatorEnum:
case WeekLowFrequencyIndicatorEnum:
case MonthLowFrequencyIndicatorEnum:
case QuarterLowFrequencyIndicatorEnum:
case YearLowFrequencyIndicatorEnum:
// Mod yyin 20120511 TDQ-5241
if (Java2SqlType.isDateInSQL(javaType) && !Java2SqlType.isTimeSQL(javaType) && (dataminingType == DataminingType.NOMINAL || dataminingType == DataminingType.INTERVAL)) {
return true;
}
break;
case BinFrequencyIndicatorEnum:
case BinLowFrequencyIndicatorEnum:
if (Java2SqlType.isNumbericInSQL(javaType) && (dataminingType == DataminingType.NOMINAL || dataminingType == DataminingType.INTERVAL)) {
return true;
}
break;
case ValidPhoneCountIndicatorEnum:
case PossiblePhoneCountIndicatorEnum:
case ValidRegCodeCountIndicatorEnum:
case InvalidRegCodeCountIndicatorEnum:
case WellFormE164PhoneCountIndicatorEnum:
case WellFormIntePhoneCountIndicatorEnum:
case WellFormNationalPhoneCountIndicatorEnum:
case PhoneNumbStatisticsIndicatorEnum:
case FormatFreqPieIndictorEnum:
if (isJavaEngine && (dataminingType == DataminingType.NOMINAL || dataminingType == DataminingType.INTERVAL)) {
return true;
}
break;
case SqlPatternMatchingIndicatorEnum:
if (node == null) {
return false;
}
if (!isSQLEngine) {
return false;
}
Pattern pattern = IndicatorHelper.getPattern(indicator);
Expression returnExpression = dbmsLanguage.getRegexp(pattern);
if (returnExpression != null) {
return true;
}
break;
case RegexpMatchingIndicatorEnum:
if (node == null) {
return false;
}
pattern = IndicatorHelper.getPattern(indicator);
returnExpression = dbmsLanguage.getRegexp(pattern);
if (returnExpression != null) {
return true;
}
break;
case UserDefinedIndicatorEnum:
// judge language
if (node == null) {
return false;
}
Indicator judi = null;
try {
judi = UDIHelper.adaptToJavaUDI(indicator);
} catch (Throwable e) {
return false;
}
if (judi != null) {
indicator = judi;
}
returnExpression = dbmsLanguage.getExpression(indicator);
if (isJavaEngine && judi == null || isSQLEngine && returnExpression == null) {
return false;
}
return true;
default:
return false;
}
return false;
}
use of org.talend.dataquality.domain.pattern.Pattern in project tdq-studio-se by Talend.
the class FileSystemImportWriter method updatePatternInAnaParams.
/**
* if there is a same name pattern in current workspace,update the pattern in imported analysis IndicatorParameters.
*
* @param systemSupplyModelElement
* @param analysis
*/
private void updatePatternInAnaParams(ModelElement systemSupplyModelElement, Analysis analysis) {
if (analysis.getResults() != null) {
EList<Indicator> indicators = analysis.getResults().getIndicators();
IndicatorParameters parameters = null;
for (Indicator indicator : indicators) {
// AllMatchIndicator is in column set analysis.
if (indicator instanceof AllMatchIndicator) {
EList<RegexpMatchingIndicator> list = ((AllMatchIndicator) indicator).getCompositeRegexMatchingIndicators();
for (RegexpMatchingIndicator regMatchIndicator : list) {
parameters = regMatchIndicator.getParameters();
removOldAddSysPatternInAnaParams(parameters, (Pattern) systemSupplyModelElement, analysis);
}
} else if (indicator instanceof PatternMatchingIndicator) {
parameters = ((PatternMatchingIndicator) indicator).getParameters();
removOldAddSysPatternInAnaParams(parameters, (Pattern) systemSupplyModelElement, analysis);
}
}
}
}
use of org.talend.dataquality.domain.pattern.Pattern in project tdq-studio-se by Talend.
the class FileSystemImportWriter method removOldAddSysPatternInAnaParams.
/**
* remove the old pattern from IndicatorParameters of imported analysis ,then add the current workspace pattern into
* IndicatorParameters.
*
* @param indParameters
* @param sysPattern
* @param analysis
*/
private void removOldAddSysPatternInAnaParams(IndicatorParameters indParameters, Pattern sysPattern, Analysis analysis) {
if (null == indParameters || null == indParameters.getDataValidDomain()) {
return;
}
EList<Pattern> patterns = indParameters.getDataValidDomain().getPatterns();
Iterator<Pattern> itPatterns = patterns.iterator();
while (itPatterns.hasNext()) {
Pattern oldPattern = itPatterns.next();
if (oldPattern.eResource() == null) {
URI oldPatternUri = EObjectHelper.getURI(oldPattern);
URI sysPatternUri = EObjectHelper.getURI(sysPattern);
if (oldPatternUri != null && sysPatternUri != null && sysPatternUri.lastSegment().equals(oldPatternUri.lastSegment())) {
itPatterns.remove();
indParameters.getDataValidDomain().getPatterns().add(sysPattern);
log.info("Pattern '" + sysPattern.getName() + "' is updated in Analysis '" + analysis.getName() + "'");
break;
}
}
}
}
Aggregations