Search in sources :

Example 26 with Pattern

use of org.talend.dataquality.domain.pattern.Pattern in project tdq-studio-se by Talend.

the class DbmsLanguage method getRegexPatternString.

/**
 * Method "getRegexPatternString".
 *
 * @param indicator
 * @return the regular expression or null if none was found
 */
public String getRegexPatternString(Indicator indicator) {
    if (indicator instanceof PatternMatchingIndicator || (indicator instanceof UserDefIndicator && IndicatorCategoryHelper.isUserDefMatching(IndicatorCategoryHelper.getCategory(indicator.getIndicatorDefinition())))) {
        IndicatorParameters parameters = indicator.getParameters();
        if (parameters == null) {
            return null;
        }
        Domain dataValidDomain = parameters.getDataValidDomain();
        if (dataValidDomain == null) {
            return null;
        }
        EList<Pattern> patterns = dataValidDomain.getPatterns();
        for (Pattern pattern : patterns) {
            Expression expression = this.getRegexp(pattern);
            return expression == null ? null : expression.getBody();
        }
    }
    return null;
}
Also used : Pattern(org.talend.dataquality.domain.pattern.Pattern) PatternMatchingIndicator(org.talend.dataquality.indicators.PatternMatchingIndicator) TdExpression(org.talend.cwm.relational.TdExpression) RegularExpression(org.talend.dataquality.domain.pattern.RegularExpression) Expression(orgomg.cwm.objectmodel.core.Expression) IndicatorParameters(org.talend.dataquality.indicators.IndicatorParameters) Domain(org.talend.dataquality.domain.Domain) UserDefIndicator(org.talend.dataquality.indicators.sql.UserDefIndicator)

Example 27 with Pattern

use of org.talend.dataquality.domain.pattern.Pattern in project tdq-studio-se by Talend.

the class AnalysisExecutorHelper method checkMatchingIndicator.

/**
 * DOC zhao Comment method "checkMatchingIndicator".
 *
 * @param indicator
 * @return
 */
private static ReturnCode checkMatchingIndicator(Indicator indicator) {
    ReturnCode rc = new ReturnCode(Boolean.TRUE);
    Domain domain = indicator.getParameters().getDataValidDomain();
    if (!domain.getBuiltInPatterns().isEmpty()) {
        return rc;
    }
    List<Pattern> patterns = domain.getPatterns();
    // check pattern matching indicator files' existence.
    if (!patterns.isEmpty() && isDependentFileExist(patterns.toArray(new Pattern[patterns.size()]))) {
        // Hot copy the pattern from separate file into built in.
        hotCopyPatterns(indicator, patterns);
    } else {
        List<Pattern> builtInPatterns = indicator.getParameters().getDataValidDomain().getBuiltInPatterns();
        if (builtInPatterns.isEmpty()) {
            // $NON-NLS-1$
            rc.setMessage(Messages.getString("AnalysisExecutor.BuiltInNoPatterns"));
            rc.setOk(false);
            return rc;
        } else {
            // Use built-in pattern instead.
            patterns.clear();
        }
    }
    return rc;
}
Also used : Pattern(org.talend.dataquality.domain.pattern.Pattern) ReturnCode(org.talend.utils.sugars.ReturnCode) Domain(org.talend.dataquality.domain.Domain)

Example 28 with Pattern

use of org.talend.dataquality.domain.pattern.Pattern in project tdq-studio-se by Talend.

the class ModelElementIndicatorRule method patternRule.

public static boolean patternRule(IndicatorEnum indicatorType, ModelElement me, ExecutionLanguage language, IIndicatorNode node) {
    int javaType = 0;
    boolean isDeliFileColumn = !(me instanceof TdColumn) && me instanceof MetadataColumn;
    int isTeradataInterval = -1;
    if (me instanceof TdColumn) {
        javaType = ((TdColumn) me).getSqlDataType().getJavaDataType();
        // Added yyin 20121211 TDQ-6099:
        isTeradataInterval = Java2SqlType.isTeradataIntervalType(((TdColumn) me).getSqlDataType().getName());
    // ~
    } else if (isDeliFileColumn) {
        javaType = TalendTypeConvert.convertToJDBCType(((MetadataColumn) me).getTalendType());
    }
    DataminingType dataminingType = MetadataHelper.getDataminingType(me);
    if (dataminingType == null || isDeliFileColumn) {
        dataminingType = MetadataHelper.getDefaultDataminingType(javaType);
    }
    // MOD qiongli 2012-4-25 TDQ-2699
    Connection connection = null;
    if (me instanceof TdColumn) {
        connection = ConnectionHelper.getTdDataProvider((TdColumn) me);
    } else if (me instanceof MetadataColumn) {
        connection = ConnectionHelper.getTdDataProvider((MetadataColumn) me);
    }
    Indicator indicator = null;
    if (node != null) {
        indicator = node.getIndicatorInstance();
    }
    boolean isSQLEngine = ExecutionLanguage.SQL.equals(language);
    boolean isJavaEngine = ExecutionLanguage.JAVA.equals(language);
    DbmsLanguage dbmsLanguage = DbmsLanguageFactory.createDbmsLanguage(connection, language);
    if (javaType == Types.LONGVARCHAR && isSQLEngine) {
        if (connection != null && ConnectionHelper.isDb2(connection)) {
            return enableLongVarchar(indicatorType, dataminingType, me);
        }
    }
    // MOD qiongli 2012-8-10 TDQ-5907 need to disabled indicators for hive with sql engine.
    boolean isHiveSQL = connection == null ? false : ConnectionHelper.isHive(connection) && isSQLEngine;
    // MOD msjian 2013-5-15 TDQ-7275 need to disabled indicators for teradata with sql engine.
    boolean isTeradataSQL = connection == null ? false : ConnectionHelper.isTeradata(connection) && isSQLEngine;
    // MOD msjian 2016-8-25 TDQ-12349 need to disabled indicators for ingres with sql engine.
    boolean isIngres = connection == null ? false : ConnectionHelper.isIngress(connection) && isSQLEngine;
    // MOD msjian 2016-8-25 TDQ-12464 need to disabled indicators for ingres with sql engine.
    boolean isSybase = connection == null ? false : ConnectionHelper.isSybase(connection) && isSQLEngine;
    // MOD qiongli 2013-8-27 TDQ-2104 disabled soundex indicators for hive with sql engine.
    boolean isVerticaSQL = connection == null ? false : ConnectionHelper.isVertica(connection) && isSQLEngine;
    switch(indicatorType) {
        case CountsIndicatorEnum:
        case RowCountIndicatorEnum:
        case NullCountIndicatorEnum:
        case DistinctCountIndicatorEnum:
        case UniqueIndicatorEnum:
        case DuplicateCountIndicatorEnum:
            // if (dataminingType == DataminingType.NOMINAL) {
            return true;
        case DefValueCountIndicatorEnum:
            Expression initialValue = null;
            if (me instanceof TdColumn) {
                initialValue = ((TdColumn) me).getInitialValue();
            }
            if (initialValue != null && initialValue.getBody() != null) {
                // non nullable numeric column give a non null default value as ''
                return initialValue.getBody().length() != 0 || Java2SqlType.isTextInSQL(javaType);
            }
            break;
        case BlankCountIndicatorEnum:
            // MOD xwang 2011-07-29 bug TDQ-1731 disable blank count checkable for other data type but Text
            if (!Java2SqlType.isTextInSQL(javaType)) {
                return false;
            } else if (isTeradataInterval == Java2SqlType.TERADATA_INTERVAL_TO && isSQLEngine) {
                // Added yyin 20121212 TDQ-6099: disable for Teradata's interval_xx_to_xx type.
                return false;
            } else {
                return true;
            }
        case TextIndicatorEnum:
        case MinLengthIndicatorEnum:
        case MinLengthWithNullIndicatorEnum:
        case MinLengthWithBlankIndicatorEnum:
        case MinLengthWithBlankNullIndicatorEnum:
        case MaxLengthIndicatorEnum:
        case MaxLengthWithNullIndicatorEnum:
        case MaxLengthWithBlankIndicatorEnum:
        case MaxLengthWithBlankNullIndicatorEnum:
        case AverageLengthIndicatorEnum:
        case AverageLengthWithNullIndicatorEnum:
        case AverageLengthWithBlankIndicatorEnum:
        case AverageLengthWithNullBlankIndicatorEnum:
            if (Java2SqlType.isTextInSQL(javaType)) {
                // Added yyin 20121212 TDQ-6099: disable for Teradata's interval_xx_to_xx type.
                if (isTeradataInterval > 0) {
                    return false;
                }
                if (dataminingType == DataminingType.NOMINAL || dataminingType == DataminingType.UNSTRUCTURED_TEXT) {
                    return true;
                }
            }
            break;
        case EastAsiaPatternFreqIndicatorEnum:
        case EastAsiaPatternLowFreqIndicatorEnum:
            if (isSQLEngine && isEmpryExpression(indicator, dbmsLanguage)) {
                return false;
            } else if (isJavaEngine) {
                return true;
            }
        case BenfordLawFrequencyIndicatorEnum:
            // disable the benford for interval type: both sql and java
            if (isTeradataInterval > 0) {
                return false;
            }
        case PatternFreqIndicatorEnum:
        case PatternLowFreqIndicatorEnum:
            if (isTeradataSQL || isIngres || isSybase) {
                return false;
            }
        case ModeIndicatorEnum:
            // Added yyin 20121212 TDQ-6099: disable for Teradata's interval_xx_to_xx type.
            if (isTeradataInterval == Java2SqlType.TERADATA_INTERVAL_TO && isSQLEngine) {
                return false;
            }
        case FrequencyIndicatorEnum:
        case LowFrequencyIndicatorEnum:
            if (dataminingType == DataminingType.NOMINAL || dataminingType == DataminingType.INTERVAL) {
                return true;
            }
            break;
        // MOD zshen 2010-01-27 Date Pattern frequency indicator
        case DatePatternFreqIndicatorEnum:
            if (isJavaEngine && (Java2SqlType.isDateInSQL(javaType) || Java2SqlType.isTextInSQL(javaType))) {
                return true;
            }
            break;
        // MOD mzhao 2009-03-05 Soundex frequency indicator
        case SoundexIndicatorEnum:
        case SoundexLowIndicatorEnum:
            if (!Java2SqlType.isDateInSQL(javaType) && !Java2SqlType.isNumbericInSQL(javaType) && (dataminingType == DataminingType.NOMINAL || dataminingType == DataminingType.INTERVAL)) {
                if (isHiveSQL || isVerticaSQL) {
                    return false;
                }
                // Added yyin 20121212 TDQ-6099: disable for Teradata's interval_xx_to_xx type.
                if (isTeradataInterval > 0 && isSQLEngine) {
                    return false;
                }
                return true;
            }
            break;
        case MeanIndicatorEnum:
        case MedianIndicatorEnum:
        case IQRIndicatorEnum:
        case LowerQuartileIndicatorEnum:
        case UpperQuartileIndicatorEnum:
            // graphics and database yet.
            if (Java2SqlType.isNumbericInSQL(javaType)) /* || Java2SqlType.isDateInSQL(javaType) */
            {
                if (dataminingType == DataminingType.INTERVAL) {
                    if (isHiveSQL && !(indicatorType == IndicatorEnum.MeanIndicatorEnum)) {
                        return false;
                    }
                    return true;
                }
            }
            break;
        case BoxIIndicatorEnum:
        case RangeIndicatorEnum:
        case MinValueIndicatorEnum:
        case MaxValueIndicatorEnum:
            // the graphics and database yet.
            if (Java2SqlType.isNumbericInSQL(javaType) || Java2SqlType.isDateInSQL(javaType)) {
                if (dataminingType == DataminingType.INTERVAL) {
                    return true;
                }
            }
            break;
        case DateFrequencyIndicatorEnum:
        case WeekFrequencyIndicatorEnum:
        case MonthFrequencyIndicatorEnum:
        case QuarterFrequencyIndicatorEnum:
        case YearFrequencyIndicatorEnum:
        case DateLowFrequencyIndicatorEnum:
        case WeekLowFrequencyIndicatorEnum:
        case MonthLowFrequencyIndicatorEnum:
        case QuarterLowFrequencyIndicatorEnum:
        case YearLowFrequencyIndicatorEnum:
            // Mod yyin 20120511 TDQ-5241
            if (Java2SqlType.isDateInSQL(javaType) && !Java2SqlType.isTimeSQL(javaType) && (dataminingType == DataminingType.NOMINAL || dataminingType == DataminingType.INTERVAL)) {
                return true;
            }
            break;
        case BinFrequencyIndicatorEnum:
        case BinLowFrequencyIndicatorEnum:
            if (Java2SqlType.isNumbericInSQL(javaType) && (dataminingType == DataminingType.NOMINAL || dataminingType == DataminingType.INTERVAL)) {
                return true;
            }
            break;
        case ValidPhoneCountIndicatorEnum:
        case PossiblePhoneCountIndicatorEnum:
        case ValidRegCodeCountIndicatorEnum:
        case InvalidRegCodeCountIndicatorEnum:
        case WellFormE164PhoneCountIndicatorEnum:
        case WellFormIntePhoneCountIndicatorEnum:
        case WellFormNationalPhoneCountIndicatorEnum:
        case PhoneNumbStatisticsIndicatorEnum:
        case FormatFreqPieIndictorEnum:
            if (isJavaEngine && (dataminingType == DataminingType.NOMINAL || dataminingType == DataminingType.INTERVAL)) {
                return true;
            }
            break;
        case SqlPatternMatchingIndicatorEnum:
            if (node == null) {
                return false;
            }
            if (!isSQLEngine) {
                return false;
            }
            Pattern pattern = IndicatorHelper.getPattern(indicator);
            Expression returnExpression = dbmsLanguage.getRegexp(pattern);
            if (returnExpression != null) {
                return true;
            }
            break;
        case RegexpMatchingIndicatorEnum:
            if (node == null) {
                return false;
            }
            pattern = IndicatorHelper.getPattern(indicator);
            returnExpression = dbmsLanguage.getRegexp(pattern);
            if (returnExpression != null) {
                return true;
            }
            break;
        case UserDefinedIndicatorEnum:
            // judge language
            if (node == null) {
                return false;
            }
            Indicator judi = null;
            try {
                judi = UDIHelper.adaptToJavaUDI(indicator);
            } catch (Throwable e) {
                return false;
            }
            if (judi != null) {
                indicator = judi;
            }
            returnExpression = dbmsLanguage.getExpression(indicator);
            if (isJavaEngine && judi == null || isSQLEngine && returnExpression == null) {
                return false;
            }
            return true;
        default:
            return false;
    }
    return false;
}
Also used : DataminingType(org.talend.dataquality.indicators.DataminingType) DbmsLanguage(org.talend.dq.dbms.DbmsLanguage) MetadataColumn(org.talend.core.model.metadata.builder.connection.MetadataColumn) Pattern(org.talend.dataquality.domain.pattern.Pattern) TdColumn(org.talend.cwm.relational.TdColumn) Expression(orgomg.cwm.objectmodel.core.Expression) Connection(org.talend.core.model.metadata.builder.connection.Connection) ModelElementIndicator(org.talend.dataprofiler.core.model.ModelElementIndicator) Indicator(org.talend.dataquality.indicators.Indicator)

Example 29 with Pattern

use of org.talend.dataquality.domain.pattern.Pattern in project tdq-studio-se by Talend.

the class FileSystemImportWriter method updatePatternInAnaParams.

/**
 * if there is a same name pattern in current workspace,update the pattern in imported analysis IndicatorParameters.
 *
 * @param systemSupplyModelElement
 * @param analysis
 */
private void updatePatternInAnaParams(ModelElement systemSupplyModelElement, Analysis analysis) {
    if (analysis.getResults() != null) {
        EList<Indicator> indicators = analysis.getResults().getIndicators();
        IndicatorParameters parameters = null;
        for (Indicator indicator : indicators) {
            // AllMatchIndicator is in column set analysis.
            if (indicator instanceof AllMatchIndicator) {
                EList<RegexpMatchingIndicator> list = ((AllMatchIndicator) indicator).getCompositeRegexMatchingIndicators();
                for (RegexpMatchingIndicator regMatchIndicator : list) {
                    parameters = regMatchIndicator.getParameters();
                    removOldAddSysPatternInAnaParams(parameters, (Pattern) systemSupplyModelElement, analysis);
                }
            } else if (indicator instanceof PatternMatchingIndicator) {
                parameters = ((PatternMatchingIndicator) indicator).getParameters();
                removOldAddSysPatternInAnaParams(parameters, (Pattern) systemSupplyModelElement, analysis);
            }
        }
    }
}
Also used : Pattern(org.talend.dataquality.domain.pattern.Pattern) PatternMatchingIndicator(org.talend.dataquality.indicators.PatternMatchingIndicator) AllMatchIndicator(org.talend.dataquality.indicators.columnset.AllMatchIndicator) IndicatorParameters(org.talend.dataquality.indicators.IndicatorParameters) RegexpMatchingIndicator(org.talend.dataquality.indicators.RegexpMatchingIndicator) Indicator(org.talend.dataquality.indicators.Indicator) RegexpMatchingIndicator(org.talend.dataquality.indicators.RegexpMatchingIndicator) AllMatchIndicator(org.talend.dataquality.indicators.columnset.AllMatchIndicator) PatternMatchingIndicator(org.talend.dataquality.indicators.PatternMatchingIndicator)

Example 30 with Pattern

use of org.talend.dataquality.domain.pattern.Pattern in project tdq-studio-se by Talend.

the class FileSystemImportWriter method removOldAddSysPatternInAnaParams.

/**
 * remove the old pattern from IndicatorParameters of imported analysis ,then add the current workspace pattern into
 * IndicatorParameters.
 *
 * @param indParameters
 * @param sysPattern
 * @param analysis
 */
private void removOldAddSysPatternInAnaParams(IndicatorParameters indParameters, Pattern sysPattern, Analysis analysis) {
    if (null == indParameters || null == indParameters.getDataValidDomain()) {
        return;
    }
    EList<Pattern> patterns = indParameters.getDataValidDomain().getPatterns();
    Iterator<Pattern> itPatterns = patterns.iterator();
    while (itPatterns.hasNext()) {
        Pattern oldPattern = itPatterns.next();
        if (oldPattern.eResource() == null) {
            URI oldPatternUri = EObjectHelper.getURI(oldPattern);
            URI sysPatternUri = EObjectHelper.getURI(sysPattern);
            if (oldPatternUri != null && sysPatternUri != null && sysPatternUri.lastSegment().equals(oldPatternUri.lastSegment())) {
                itPatterns.remove();
                indParameters.getDataValidDomain().getPatterns().add(sysPattern);
                log.info("Pattern '" + sysPattern.getName() + "' is updated in Analysis '" + analysis.getName() + "'");
                break;
            }
        }
    }
}
Also used : Pattern(org.talend.dataquality.domain.pattern.Pattern) URI(org.eclipse.emf.common.util.URI)

Aggregations

Pattern (org.talend.dataquality.domain.pattern.Pattern)75 RegularExpression (org.talend.dataquality.domain.pattern.RegularExpression)27 Test (org.junit.Test)19 TdExpression (org.talend.cwm.relational.TdExpression)18 Domain (org.talend.dataquality.domain.Domain)16 Analysis (org.talend.dataquality.analysis.Analysis)15 PatternComponent (org.talend.dataquality.domain.pattern.PatternComponent)15 IndicatorParameters (org.talend.dataquality.indicators.IndicatorParameters)14 ArrayList (java.util.ArrayList)13 PatternMatchingIndicator (org.talend.dataquality.indicators.PatternMatchingIndicator)13 IFile (org.eclipse.core.resources.IFile)11 TDQPatternItem (org.talend.dataquality.properties.TDQPatternItem)10 Expression (orgomg.cwm.objectmodel.core.Expression)10 IFolder (org.eclipse.core.resources.IFolder)9 DatabaseConnection (org.talend.core.model.metadata.builder.connection.DatabaseConnection)9 Indicator (org.talend.dataquality.indicators.Indicator)9 Item (org.talend.core.model.properties.Item)7 TdColumn (org.talend.cwm.relational.TdColumn)7 RegexpMatchingIndicator (org.talend.dataquality.indicators.RegexpMatchingIndicator)6 IndicatorDefinition (org.talend.dataquality.indicators.definition.IndicatorDefinition)6