Search in sources :

Example 16 with Expression

use of orgomg.cwm.objectmodel.core.Expression in project tdq-studio-se by Talend.

the class DbmsLanguage method getRegexp.

/**
 * Method "getRegexp".
 *
 * When in SQL engine, retrieve the regex with the matching Dbms Language. If null, return the regex with default
 * SQL (even if it's null)
 *
 * @param pattern a pattern
 * @return the body of the regular expression applicable to this dbms or null
 */
public Expression getRegexp(Pattern pattern) {
    // MOD by zhsne for bug 17172 2010.12.10
    Expression expression = null;
    Expression defaultExpression = null;
    EList<PatternComponent> components = pattern.getComponents();
    for (PatternComponent patternComponent : components) {
        if (patternComponent != null) {
            expression = this.getExpression(patternComponent);
            if (expression != null && DbmsLanguageFactory.compareDbmsLanguage(this.dbmsName, expression.getLanguage())) {
                // return this db's expression
                return expression;
            } else if (expression != null && DbmsLanguageFactory.compareDbmsLanguage(ExecutionLanguage.SQL.getName(), expression.getLanguage())) {
                // remember the default expression
                defaultExpression = expression;
            }
        }
    }
    // if can not find itself, return the default one.
    return defaultExpression;
}
Also used : TdExpression(org.talend.cwm.relational.TdExpression) RegularExpression(org.talend.dataquality.domain.pattern.RegularExpression) Expression(orgomg.cwm.objectmodel.core.Expression) PatternComponent(org.talend.dataquality.domain.pattern.PatternComponent)

Example 17 with Expression

use of orgomg.cwm.objectmodel.core.Expression in project tdq-studio-se by Talend.

the class DbmsLanguage method getRegexPatternString.

/**
 * Method "getRegexPatternString".
 *
 * @param indicator
 * @return the regular expression or null if none was found
 */
public String getRegexPatternString(Indicator indicator) {
    if (indicator instanceof PatternMatchingIndicator || (indicator instanceof UserDefIndicator && IndicatorCategoryHelper.isUserDefMatching(IndicatorCategoryHelper.getCategory(indicator.getIndicatorDefinition())))) {
        IndicatorParameters parameters = indicator.getParameters();
        if (parameters == null) {
            return null;
        }
        Domain dataValidDomain = parameters.getDataValidDomain();
        if (dataValidDomain == null) {
            return null;
        }
        EList<Pattern> patterns = dataValidDomain.getPatterns();
        for (Pattern pattern : patterns) {
            Expression expression = this.getRegexp(pattern);
            return expression == null ? null : expression.getBody();
        }
    }
    return null;
}
Also used : Pattern(org.talend.dataquality.domain.pattern.Pattern) PatternMatchingIndicator(org.talend.dataquality.indicators.PatternMatchingIndicator) TdExpression(org.talend.cwm.relational.TdExpression) RegularExpression(org.talend.dataquality.domain.pattern.RegularExpression) Expression(orgomg.cwm.objectmodel.core.Expression) IndicatorParameters(org.talend.dataquality.indicators.IndicatorParameters) Domain(org.talend.dataquality.domain.Domain) UserDefIndicator(org.talend.dataquality.indicators.sql.UserDefIndicator)

Example 18 with Expression

use of orgomg.cwm.objectmodel.core.Expression in project tdq-studio-se by Talend.

the class DbmsLanguage method getTableQueryExpression.

/**
 * Get the query Expression for one table
 *
 * @param column
 * @param where
 * @return
 */
public Expression getTableQueryExpression(TdColumn column, String where) {
    ModelElement columnSet = ColumnHelper.getColumnOwnerAsColumnSet(column);
    Schema parentSchema = SchemaHelper.getParentSchema(columnSet);
    Catalog parentCatalog = CatalogHelper.getParentCatalog(columnSet);
    if (parentSchema != null) {
        parentCatalog = CatalogHelper.getParentCatalog(parentSchema);
    }
    String schemaName = parentSchema == null ? null : parentSchema.getName();
    String catalogName = parentCatalog == null ? null : parentCatalog.getName();
    String qualifiedName = this.toQualifiedName(catalogName, schemaName, columnSet.getName());
    Expression queryExpression = CoreFactory.eINSTANCE.createExpression();
    String expressionBody = getQuerySql(ASTERISK, qualifiedName, where);
    queryExpression.setBody(expressionBody);
    queryExpression.setLanguage(this.getDbmsName());
    return queryExpression;
}
Also used : ModelElement(orgomg.cwm.objectmodel.core.ModelElement) TdExpression(org.talend.cwm.relational.TdExpression) RegularExpression(org.talend.dataquality.domain.pattern.RegularExpression) Expression(orgomg.cwm.objectmodel.core.Expression) Schema(orgomg.cwm.resource.relational.Schema) Catalog(orgomg.cwm.resource.relational.Catalog)

Example 19 with Expression

use of orgomg.cwm.objectmodel.core.Expression in project tdq-studio-se by Talend.

the class DbmsLanguage method getTableQueryExpression.

/**
 * Get the query Expression for one table of column
 *
 * @param column
 * @param where
 * @return
 */
public Expression getTableQueryExpression(MetadataTable metadataTable, String where) {
    Schema parentSchema = SchemaHelper.getParentSchema(metadataTable);
    Catalog parentCatalog = CatalogHelper.getParentCatalog(metadataTable);
    if (parentSchema != null) {
        parentCatalog = CatalogHelper.getParentCatalog(parentSchema);
    }
    String schemaName = parentSchema == null ? null : parentSchema.getName();
    String catalogName = parentCatalog == null ? null : parentCatalog.getName();
    String qualifiedName = this.toQualifiedName(catalogName, schemaName, metadataTable.getName());
    Expression queryExpression = CoreFactory.eINSTANCE.createExpression();
    String expressionBody = getQuerySql(getSelectColumnsStr(metadataTable), qualifiedName, where);
    queryExpression.setBody(expressionBody);
    queryExpression.setLanguage(this.getDbmsName());
    return queryExpression;
}
Also used : TdExpression(org.talend.cwm.relational.TdExpression) RegularExpression(org.talend.dataquality.domain.pattern.RegularExpression) Expression(orgomg.cwm.objectmodel.core.Expression) Schema(orgomg.cwm.resource.relational.Schema) Catalog(orgomg.cwm.resource.relational.Catalog)

Example 20 with Expression

use of orgomg.cwm.objectmodel.core.Expression in project tdq-studio-se by Talend.

the class ModelElementIndicatorRule method patternRule.

public static boolean patternRule(IndicatorEnum indicatorType, ModelElement me, ExecutionLanguage language, IIndicatorNode node) {
    int javaType = 0;
    boolean isDeliFileColumn = !(me instanceof TdColumn) && me instanceof MetadataColumn;
    int isTeradataInterval = -1;
    if (me instanceof TdColumn) {
        javaType = ((TdColumn) me).getSqlDataType().getJavaDataType();
        // Added yyin 20121211 TDQ-6099:
        isTeradataInterval = Java2SqlType.isTeradataIntervalType(((TdColumn) me).getSqlDataType().getName());
    // ~
    } else if (isDeliFileColumn) {
        javaType = TalendTypeConvert.convertToJDBCType(((MetadataColumn) me).getTalendType());
    }
    DataminingType dataminingType = MetadataHelper.getDataminingType(me);
    if (dataminingType == null || isDeliFileColumn) {
        dataminingType = MetadataHelper.getDefaultDataminingType(javaType);
    }
    // MOD qiongli 2012-4-25 TDQ-2699
    Connection connection = null;
    if (me instanceof TdColumn) {
        connection = ConnectionHelper.getTdDataProvider((TdColumn) me);
    } else if (me instanceof MetadataColumn) {
        connection = ConnectionHelper.getTdDataProvider((MetadataColumn) me);
    }
    Indicator indicator = null;
    if (node != null) {
        indicator = node.getIndicatorInstance();
    }
    boolean isSQLEngine = ExecutionLanguage.SQL.equals(language);
    boolean isJavaEngine = ExecutionLanguage.JAVA.equals(language);
    DbmsLanguage dbmsLanguage = DbmsLanguageFactory.createDbmsLanguage(connection, language);
    if (javaType == Types.LONGVARCHAR && isSQLEngine) {
        if (connection != null && ConnectionHelper.isDb2(connection)) {
            return enableLongVarchar(indicatorType, dataminingType, me);
        }
    }
    // MOD qiongli 2012-8-10 TDQ-5907 need to disabled indicators for hive with sql engine.
    boolean isHiveSQL = connection == null ? false : ConnectionHelper.isHive(connection) && isSQLEngine;
    // MOD msjian 2013-5-15 TDQ-7275 need to disabled indicators for teradata with sql engine.
    boolean isTeradataSQL = connection == null ? false : ConnectionHelper.isTeradata(connection) && isSQLEngine;
    // MOD msjian 2016-8-25 TDQ-12349 need to disabled indicators for ingres with sql engine.
    boolean isIngres = connection == null ? false : ConnectionHelper.isIngress(connection) && isSQLEngine;
    // MOD msjian 2016-8-25 TDQ-12464 need to disabled indicators for ingres with sql engine.
    boolean isSybase = connection == null ? false : ConnectionHelper.isSybase(connection) && isSQLEngine;
    // MOD qiongli 2013-8-27 TDQ-2104 disabled soundex indicators for hive with sql engine.
    boolean isVerticaSQL = connection == null ? false : ConnectionHelper.isVertica(connection) && isSQLEngine;
    switch(indicatorType) {
        case CountsIndicatorEnum:
        case RowCountIndicatorEnum:
        case NullCountIndicatorEnum:
        case DistinctCountIndicatorEnum:
        case UniqueIndicatorEnum:
        case DuplicateCountIndicatorEnum:
            // if (dataminingType == DataminingType.NOMINAL) {
            return true;
        case DefValueCountIndicatorEnum:
            Expression initialValue = null;
            if (me instanceof TdColumn) {
                initialValue = ((TdColumn) me).getInitialValue();
            }
            if (initialValue != null && initialValue.getBody() != null) {
                // non nullable numeric column give a non null default value as ''
                return initialValue.getBody().length() != 0 || Java2SqlType.isTextInSQL(javaType);
            }
            break;
        case BlankCountIndicatorEnum:
            // MOD xwang 2011-07-29 bug TDQ-1731 disable blank count checkable for other data type but Text
            if (!Java2SqlType.isTextInSQL(javaType)) {
                return false;
            } else if (isTeradataInterval == Java2SqlType.TERADATA_INTERVAL_TO && isSQLEngine) {
                // Added yyin 20121212 TDQ-6099: disable for Teradata's interval_xx_to_xx type.
                return false;
            } else {
                return true;
            }
        case TextIndicatorEnum:
        case MinLengthIndicatorEnum:
        case MinLengthWithNullIndicatorEnum:
        case MinLengthWithBlankIndicatorEnum:
        case MinLengthWithBlankNullIndicatorEnum:
        case MaxLengthIndicatorEnum:
        case MaxLengthWithNullIndicatorEnum:
        case MaxLengthWithBlankIndicatorEnum:
        case MaxLengthWithBlankNullIndicatorEnum:
        case AverageLengthIndicatorEnum:
        case AverageLengthWithNullIndicatorEnum:
        case AverageLengthWithBlankIndicatorEnum:
        case AverageLengthWithNullBlankIndicatorEnum:
            if (Java2SqlType.isTextInSQL(javaType)) {
                // Added yyin 20121212 TDQ-6099: disable for Teradata's interval_xx_to_xx type.
                if (isTeradataInterval > 0) {
                    return false;
                }
                if (dataminingType == DataminingType.NOMINAL || dataminingType == DataminingType.UNSTRUCTURED_TEXT) {
                    return true;
                }
            }
            break;
        case EastAsiaPatternFreqIndicatorEnum:
        case EastAsiaPatternLowFreqIndicatorEnum:
            if (isSQLEngine && isEmpryExpression(indicator, dbmsLanguage)) {
                return false;
            } else if (isJavaEngine) {
                return true;
            }
        case BenfordLawFrequencyIndicatorEnum:
            // disable the benford for interval type: both sql and java
            if (isTeradataInterval > 0) {
                return false;
            }
        case PatternFreqIndicatorEnum:
        case PatternLowFreqIndicatorEnum:
            if (isTeradataSQL || isIngres || isSybase) {
                return false;
            }
        case ModeIndicatorEnum:
            // Added yyin 20121212 TDQ-6099: disable for Teradata's interval_xx_to_xx type.
            if (isTeradataInterval == Java2SqlType.TERADATA_INTERVAL_TO && isSQLEngine) {
                return false;
            }
        case FrequencyIndicatorEnum:
        case LowFrequencyIndicatorEnum:
            if (dataminingType == DataminingType.NOMINAL || dataminingType == DataminingType.INTERVAL) {
                return true;
            }
            break;
        // MOD zshen 2010-01-27 Date Pattern frequency indicator
        case DatePatternFreqIndicatorEnum:
            if (isJavaEngine && (Java2SqlType.isDateInSQL(javaType) || Java2SqlType.isTextInSQL(javaType))) {
                return true;
            }
            break;
        // MOD mzhao 2009-03-05 Soundex frequency indicator
        case SoundexIndicatorEnum:
        case SoundexLowIndicatorEnum:
            if (!Java2SqlType.isDateInSQL(javaType) && !Java2SqlType.isNumbericInSQL(javaType) && (dataminingType == DataminingType.NOMINAL || dataminingType == DataminingType.INTERVAL)) {
                if (isHiveSQL || isVerticaSQL) {
                    return false;
                }
                // Added yyin 20121212 TDQ-6099: disable for Teradata's interval_xx_to_xx type.
                if (isTeradataInterval > 0 && isSQLEngine) {
                    return false;
                }
                return true;
            }
            break;
        case MeanIndicatorEnum:
        case MedianIndicatorEnum:
        case IQRIndicatorEnum:
        case LowerQuartileIndicatorEnum:
        case UpperQuartileIndicatorEnum:
            // graphics and database yet.
            if (Java2SqlType.isNumbericInSQL(javaType)) /* || Java2SqlType.isDateInSQL(javaType) */
            {
                if (dataminingType == DataminingType.INTERVAL) {
                    if (isHiveSQL && !(indicatorType == IndicatorEnum.MeanIndicatorEnum)) {
                        return false;
                    }
                    return true;
                }
            }
            break;
        case BoxIIndicatorEnum:
        case RangeIndicatorEnum:
        case MinValueIndicatorEnum:
        case MaxValueIndicatorEnum:
            // the graphics and database yet.
            if (Java2SqlType.isNumbericInSQL(javaType) || Java2SqlType.isDateInSQL(javaType)) {
                if (dataminingType == DataminingType.INTERVAL) {
                    return true;
                }
            }
            break;
        case DateFrequencyIndicatorEnum:
        case WeekFrequencyIndicatorEnum:
        case MonthFrequencyIndicatorEnum:
        case QuarterFrequencyIndicatorEnum:
        case YearFrequencyIndicatorEnum:
        case DateLowFrequencyIndicatorEnum:
        case WeekLowFrequencyIndicatorEnum:
        case MonthLowFrequencyIndicatorEnum:
        case QuarterLowFrequencyIndicatorEnum:
        case YearLowFrequencyIndicatorEnum:
            // Mod yyin 20120511 TDQ-5241
            if (Java2SqlType.isDateInSQL(javaType) && !Java2SqlType.isTimeSQL(javaType) && (dataminingType == DataminingType.NOMINAL || dataminingType == DataminingType.INTERVAL)) {
                return true;
            }
            break;
        case BinFrequencyIndicatorEnum:
        case BinLowFrequencyIndicatorEnum:
            if (Java2SqlType.isNumbericInSQL(javaType) && (dataminingType == DataminingType.NOMINAL || dataminingType == DataminingType.INTERVAL)) {
                return true;
            }
            break;
        case ValidPhoneCountIndicatorEnum:
        case PossiblePhoneCountIndicatorEnum:
        case ValidRegCodeCountIndicatorEnum:
        case InvalidRegCodeCountIndicatorEnum:
        case WellFormE164PhoneCountIndicatorEnum:
        case WellFormIntePhoneCountIndicatorEnum:
        case WellFormNationalPhoneCountIndicatorEnum:
        case PhoneNumbStatisticsIndicatorEnum:
        case FormatFreqPieIndictorEnum:
            if (isJavaEngine && (dataminingType == DataminingType.NOMINAL || dataminingType == DataminingType.INTERVAL)) {
                return true;
            }
            break;
        case SqlPatternMatchingIndicatorEnum:
            if (node == null) {
                return false;
            }
            if (!isSQLEngine) {
                return false;
            }
            Pattern pattern = IndicatorHelper.getPattern(indicator);
            Expression returnExpression = dbmsLanguage.getRegexp(pattern);
            if (returnExpression != null) {
                return true;
            }
            break;
        case RegexpMatchingIndicatorEnum:
            if (node == null) {
                return false;
            }
            pattern = IndicatorHelper.getPattern(indicator);
            returnExpression = dbmsLanguage.getRegexp(pattern);
            if (returnExpression != null) {
                return true;
            }
            break;
        case UserDefinedIndicatorEnum:
            // judge language
            if (node == null) {
                return false;
            }
            Indicator judi = null;
            try {
                judi = UDIHelper.adaptToJavaUDI(indicator);
            } catch (Throwable e) {
                return false;
            }
            if (judi != null) {
                indicator = judi;
            }
            returnExpression = dbmsLanguage.getExpression(indicator);
            if (isJavaEngine && judi == null || isSQLEngine && returnExpression == null) {
                return false;
            }
            return true;
        default:
            return false;
    }
    return false;
}
Also used : DataminingType(org.talend.dataquality.indicators.DataminingType) DbmsLanguage(org.talend.dq.dbms.DbmsLanguage) MetadataColumn(org.talend.core.model.metadata.builder.connection.MetadataColumn) Pattern(org.talend.dataquality.domain.pattern.Pattern) TdColumn(org.talend.cwm.relational.TdColumn) Expression(orgomg.cwm.objectmodel.core.Expression) Connection(org.talend.core.model.metadata.builder.connection.Connection) ModelElementIndicator(org.talend.dataprofiler.core.model.ModelElementIndicator) Indicator(org.talend.dataquality.indicators.Indicator)

Aggregations

Expression (orgomg.cwm.objectmodel.core.Expression)71 TdExpression (org.talend.cwm.relational.TdExpression)42 RegularExpression (org.talend.dataquality.domain.pattern.RegularExpression)37 Test (org.junit.Test)25 Domain (org.talend.dataquality.domain.Domain)14 ArrayList (java.util.ArrayList)13 TdColumn (org.talend.cwm.relational.TdColumn)11 Pattern (org.talend.dataquality.domain.pattern.Pattern)10 Catalog (orgomg.cwm.resource.relational.Catalog)10 Indicator (org.talend.dataquality.indicators.Indicator)9 IndicatorParameters (org.talend.dataquality.indicators.IndicatorParameters)8 Analysis (org.talend.dataquality.analysis.Analysis)7 PatternMatchingIndicator (org.talend.dataquality.indicators.PatternMatchingIndicator)7 UDIndicatorDefinition (org.talend.dataquality.indicators.definition.userdefine.UDIndicatorDefinition)7 DbmsLanguage (org.talend.dq.dbms.DbmsLanguage)7 Connection (org.talend.core.model.metadata.builder.connection.Connection)6 DatabaseConnection (org.talend.core.model.metadata.builder.connection.DatabaseConnection)6 AnalysisContext (org.talend.dataquality.analysis.AnalysisContext)6 IndicatorDefinition (org.talend.dataquality.indicators.definition.IndicatorDefinition)6 ChartDataEntity (org.talend.dq.indicators.preview.table.ChartDataEntity)6