Search in sources :

Example 1 with DataminingType

use of org.talend.dataquality.indicators.DataminingType in project tdq-studio-se by Talend.

the class ColumnSetAnalysisDetailsPage method recomputeIndicators.

public void recomputeIndicators() {
    columnSetAnalysisHandler = new ColumnSetAnalysisHandler();
    columnSetAnalysisHandler.setAnalysis(getCurrentModelElement());
    stringDataFilter = columnSetAnalysisHandler.getStringDataFilter();
    analyzedColumns = columnSetAnalysisHandler.getAnalyzedColumns();
    if (columnSetAnalysisHandler.getSimpleStatIndicator() == null || columnSetAnalysisHandler.getSimpleStatIndicator().eIsProxy()) {
        ColumnsetFactory columnsetFactory = ColumnsetFactory.eINSTANCE;
        simpleStatIndicator = columnsetFactory.createSimpleStatIndicator();
        simpleStatIndicator.setRowCountIndicator(IndicatorsFactory.eINSTANCE.createRowCountIndicator());
        simpleStatIndicator.setDistinctCountIndicator(IndicatorsFactory.eINSTANCE.createDistinctCountIndicator());
        simpleStatIndicator.setDuplicateCountIndicator(IndicatorsFactory.eINSTANCE.createDuplicateCountIndicator());
        simpleStatIndicator.setUniqueCountIndicator(IndicatorsFactory.eINSTANCE.createUniqueCountIndicator());
    } else {
        simpleStatIndicator = (SimpleStatIndicator) columnSetAnalysisHandler.getSimpleStatIndicator();
    }
    if (columnSetAnalysisHandler.getAllmatchIndicator() == null) {
        ColumnsetFactory columnsetFactory = ColumnsetFactory.eINSTANCE;
        allMatchIndicator = columnsetFactory.createAllMatchIndicator();
        DefinitionHandler.getInstance().setDefaultIndicatorDefinition(allMatchIndicator);
    } else {
        allMatchIndicator = (AllMatchIndicator) columnSetAnalysisHandler.getAllmatchIndicator();
    }
    initializeIndicator(simpleStatIndicator);
    List<ModelElementIndicator> meIndicatorList = new ArrayList<ModelElementIndicator>();
    ModelElementIndicator currentIndicator = null;
    for (ModelElement element : analyzedColumns) {
        // MOD yyi 2011-02-16 17871:delimitefile
        MetadataColumn mdColumn = SwitchHelpers.METADATA_COLUMN_SWITCH.doSwitch(element);
        TdColumn tdColumn = SwitchHelpers.COLUMN_SWITCH.doSwitch(element);
        if (tdColumn == null && mdColumn == null) {
            continue;
        }
        if (tdColumn == null && mdColumn != null) {
            currentIndicator = ModelElementIndicatorHelper.createDFColumnIndicator(RepositoryNodeHelper.recursiveFind(mdColumn));
        } else if (tdColumn != null) {
            RepositoryNode recursiveFind = RepositoryNodeHelper.recursiveFind(tdColumn);
            if (recursiveFind == null) {
                recursiveFind = RepositoryNodeHelper.createRepositoryNode(tdColumn);
            }
            currentIndicator = ModelElementIndicatorHelper.createModelElementIndicator(recursiveFind);
        }
        DataminingType dataminingType = MetadataHelper.getDataminingType(element);
        MetadataHelper.setDataminingType(dataminingType == null ? DataminingType.NOMINAL : dataminingType, element);
        Collection<Indicator> indicatorList = columnSetAnalysisHandler.getRegexMathingIndicators(element);
        if (null != currentIndicator) {
            currentIndicator.setIndicators(indicatorList.toArray(new Indicator[indicatorList.size()]));
            meIndicatorList.add(currentIndicator);
        }
    }
    currentModelElementIndicators = meIndicatorList.toArray(new ModelElementIndicator[meIndicatorList.size()]);
}
Also used : DataminingType(org.talend.dataquality.indicators.DataminingType) ModelElement(orgomg.cwm.objectmodel.core.ModelElement) MetadataColumn(org.talend.core.model.metadata.builder.connection.MetadataColumn) ColumnSetAnalysisHandler(org.talend.dq.analysis.ColumnSetAnalysisHandler) TdColumn(org.talend.cwm.relational.TdColumn) ArrayList(java.util.ArrayList) ColumnsetFactory(org.talend.dataquality.indicators.columnset.ColumnsetFactory) RepositoryNode(org.talend.repository.model.RepositoryNode) IRepositoryNode(org.talend.repository.model.IRepositoryNode) RegexpMatchingIndicator(org.talend.dataquality.indicators.RegexpMatchingIndicator) AllMatchIndicator(org.talend.dataquality.indicators.columnset.AllMatchIndicator) ModelElementIndicator(org.talend.dataprofiler.core.model.ModelElementIndicator) Indicator(org.talend.dataquality.indicators.Indicator) CompositeIndicator(org.talend.dataquality.indicators.CompositeIndicator) SimpleStatIndicator(org.talend.dataquality.indicators.columnset.SimpleStatIndicator) ModelElementIndicator(org.talend.dataprofiler.core.model.ModelElementIndicator)

Example 2 with DataminingType

use of org.talend.dataquality.indicators.DataminingType in project tdq-studio-se by Talend.

the class IndicatorImpl method setDataminingType.

/**
 * <!-- begin-user-doc --> <!-- end-user-doc -->
 * @generated
 */
@Override
public void setDataminingType(DataminingType newDataminingType) {
    DataminingType oldDataminingType = dataminingType;
    dataminingType = newDataminingType == null ? DATAMINING_TYPE_EDEFAULT : newDataminingType;
    if (eNotificationRequired())
        eNotify(new ENotificationImpl(this, Notification.SET, IndicatorsPackage.INDICATOR__DATAMINING_TYPE, oldDataminingType, dataminingType));
}
Also used : DataminingType(org.talend.dataquality.indicators.DataminingType) ENotificationImpl(org.eclipse.emf.ecore.impl.ENotificationImpl)

Example 3 with DataminingType

use of org.talend.dataquality.indicators.DataminingType in project tdq-studio-se by Talend.

the class ModelElementIndicatorRule method patternRule.

public static boolean patternRule(IndicatorEnum indicatorType, ModelElement me, ExecutionLanguage language, IIndicatorNode node) {
    int javaType = 0;
    boolean isDeliFileColumn = !(me instanceof TdColumn) && me instanceof MetadataColumn;
    int isTeradataInterval = -1;
    if (me instanceof TdColumn) {
        javaType = ((TdColumn) me).getSqlDataType().getJavaDataType();
        // Added yyin 20121211 TDQ-6099:
        isTeradataInterval = Java2SqlType.isTeradataIntervalType(((TdColumn) me).getSqlDataType().getName());
    // ~
    } else if (isDeliFileColumn) {
        javaType = TalendTypeConvert.convertToJDBCType(((MetadataColumn) me).getTalendType());
    }
    DataminingType dataminingType = MetadataHelper.getDataminingType(me);
    if (dataminingType == null || isDeliFileColumn) {
        dataminingType = MetadataHelper.getDefaultDataminingType(javaType);
    }
    // MOD qiongli 2012-4-25 TDQ-2699
    Connection connection = null;
    if (me instanceof TdColumn) {
        connection = ConnectionHelper.getTdDataProvider((TdColumn) me);
    } else if (me instanceof MetadataColumn) {
        connection = ConnectionHelper.getTdDataProvider((MetadataColumn) me);
    }
    Indicator indicator = null;
    if (node != null) {
        indicator = node.getIndicatorInstance();
    }
    boolean isSQLEngine = ExecutionLanguage.SQL.equals(language);
    boolean isJavaEngine = ExecutionLanguage.JAVA.equals(language);
    DbmsLanguage dbmsLanguage = DbmsLanguageFactory.createDbmsLanguage(connection, language);
    if (javaType == Types.LONGVARCHAR && isSQLEngine) {
        if (connection != null && ConnectionHelper.isDb2(connection)) {
            return enableLongVarchar(indicatorType, dataminingType, me);
        }
    }
    // MOD qiongli 2012-8-10 TDQ-5907 need to disabled indicators for hive with sql engine.
    boolean isHiveSQL = connection == null ? false : ConnectionHelper.isHive(connection) && isSQLEngine;
    // MOD msjian 2013-5-15 TDQ-7275 need to disabled indicators for teradata with sql engine.
    boolean isTeradataSQL = connection == null ? false : ConnectionHelper.isTeradata(connection) && isSQLEngine;
    // MOD msjian 2016-8-25 TDQ-12349 need to disabled indicators for ingres with sql engine.
    boolean isIngres = connection == null ? false : ConnectionHelper.isIngress(connection) && isSQLEngine;
    // MOD msjian 2016-8-25 TDQ-12464 need to disabled indicators for ingres with sql engine.
    boolean isSybase = connection == null ? false : ConnectionHelper.isSybase(connection) && isSQLEngine;
    // MOD qiongli 2013-8-27 TDQ-2104 disabled soundex indicators for hive with sql engine.
    boolean isVerticaSQL = connection == null ? false : ConnectionHelper.isVertica(connection) && isSQLEngine;
    switch(indicatorType) {
        case CountsIndicatorEnum:
        case RowCountIndicatorEnum:
        case NullCountIndicatorEnum:
        case DistinctCountIndicatorEnum:
        case UniqueIndicatorEnum:
        case DuplicateCountIndicatorEnum:
            // if (dataminingType == DataminingType.NOMINAL) {
            return true;
        case DefValueCountIndicatorEnum:
            Expression initialValue = null;
            if (me instanceof TdColumn) {
                initialValue = ((TdColumn) me).getInitialValue();
            }
            if (initialValue != null && initialValue.getBody() != null) {
                // non nullable numeric column give a non null default value as ''
                return initialValue.getBody().length() != 0 || Java2SqlType.isTextInSQL(javaType);
            }
            break;
        case BlankCountIndicatorEnum:
            // MOD xwang 2011-07-29 bug TDQ-1731 disable blank count checkable for other data type but Text
            if (!Java2SqlType.isTextInSQL(javaType)) {
                return false;
            } else if (isTeradataInterval == Java2SqlType.TERADATA_INTERVAL_TO && isSQLEngine) {
                // Added yyin 20121212 TDQ-6099: disable for Teradata's interval_xx_to_xx type.
                return false;
            } else {
                return true;
            }
        case TextIndicatorEnum:
        case MinLengthIndicatorEnum:
        case MinLengthWithNullIndicatorEnum:
        case MinLengthWithBlankIndicatorEnum:
        case MinLengthWithBlankNullIndicatorEnum:
        case MaxLengthIndicatorEnum:
        case MaxLengthWithNullIndicatorEnum:
        case MaxLengthWithBlankIndicatorEnum:
        case MaxLengthWithBlankNullIndicatorEnum:
        case AverageLengthIndicatorEnum:
        case AverageLengthWithNullIndicatorEnum:
        case AverageLengthWithBlankIndicatorEnum:
        case AverageLengthWithNullBlankIndicatorEnum:
            if (Java2SqlType.isTextInSQL(javaType)) {
                // Added yyin 20121212 TDQ-6099: disable for Teradata's interval_xx_to_xx type.
                if (isTeradataInterval > 0) {
                    return false;
                }
                if (dataminingType == DataminingType.NOMINAL || dataminingType == DataminingType.UNSTRUCTURED_TEXT) {
                    return true;
                }
            }
            break;
        case EastAsiaPatternFreqIndicatorEnum:
        case EastAsiaPatternLowFreqIndicatorEnum:
            if (isSQLEngine && isEmpryExpression(indicator, dbmsLanguage)) {
                return false;
            } else if (isJavaEngine) {
                return true;
            }
        case BenfordLawFrequencyIndicatorEnum:
            // disable the benford for interval type: both sql and java
            if (isTeradataInterval > 0) {
                return false;
            }
        case PatternFreqIndicatorEnum:
        case PatternLowFreqIndicatorEnum:
            if (isTeradataSQL || isIngres || isSybase) {
                return false;
            }
        case ModeIndicatorEnum:
            // Added yyin 20121212 TDQ-6099: disable for Teradata's interval_xx_to_xx type.
            if (isTeradataInterval == Java2SqlType.TERADATA_INTERVAL_TO && isSQLEngine) {
                return false;
            }
        case FrequencyIndicatorEnum:
        case LowFrequencyIndicatorEnum:
            if (dataminingType == DataminingType.NOMINAL || dataminingType == DataminingType.INTERVAL) {
                return true;
            }
            break;
        // MOD zshen 2010-01-27 Date Pattern frequency indicator
        case DatePatternFreqIndicatorEnum:
            if (isJavaEngine && (Java2SqlType.isDateInSQL(javaType) || Java2SqlType.isTextInSQL(javaType))) {
                return true;
            }
            break;
        // MOD mzhao 2009-03-05 Soundex frequency indicator
        case SoundexIndicatorEnum:
        case SoundexLowIndicatorEnum:
            if (!Java2SqlType.isDateInSQL(javaType) && !Java2SqlType.isNumbericInSQL(javaType) && (dataminingType == DataminingType.NOMINAL || dataminingType == DataminingType.INTERVAL)) {
                if (isHiveSQL || isVerticaSQL) {
                    return false;
                }
                // Added yyin 20121212 TDQ-6099: disable for Teradata's interval_xx_to_xx type.
                if (isTeradataInterval > 0 && isSQLEngine) {
                    return false;
                }
                return true;
            }
            break;
        case MeanIndicatorEnum:
        case MedianIndicatorEnum:
        case IQRIndicatorEnum:
        case LowerQuartileIndicatorEnum:
        case UpperQuartileIndicatorEnum:
            // graphics and database yet.
            if (Java2SqlType.isNumbericInSQL(javaType)) /* || Java2SqlType.isDateInSQL(javaType) */
            {
                if (dataminingType == DataminingType.INTERVAL) {
                    if (isHiveSQL && !(indicatorType == IndicatorEnum.MeanIndicatorEnum)) {
                        return false;
                    }
                    return true;
                }
            }
            break;
        case BoxIIndicatorEnum:
        case RangeIndicatorEnum:
        case MinValueIndicatorEnum:
        case MaxValueIndicatorEnum:
            // the graphics and database yet.
            if (Java2SqlType.isNumbericInSQL(javaType) || Java2SqlType.isDateInSQL(javaType)) {
                if (dataminingType == DataminingType.INTERVAL) {
                    return true;
                }
            }
            break;
        case DateFrequencyIndicatorEnum:
        case WeekFrequencyIndicatorEnum:
        case MonthFrequencyIndicatorEnum:
        case QuarterFrequencyIndicatorEnum:
        case YearFrequencyIndicatorEnum:
        case DateLowFrequencyIndicatorEnum:
        case WeekLowFrequencyIndicatorEnum:
        case MonthLowFrequencyIndicatorEnum:
        case QuarterLowFrequencyIndicatorEnum:
        case YearLowFrequencyIndicatorEnum:
            // Mod yyin 20120511 TDQ-5241
            if (Java2SqlType.isDateInSQL(javaType) && !Java2SqlType.isTimeSQL(javaType) && (dataminingType == DataminingType.NOMINAL || dataminingType == DataminingType.INTERVAL)) {
                return true;
            }
            break;
        case BinFrequencyIndicatorEnum:
        case BinLowFrequencyIndicatorEnum:
            if (Java2SqlType.isNumbericInSQL(javaType) && (dataminingType == DataminingType.NOMINAL || dataminingType == DataminingType.INTERVAL)) {
                return true;
            }
            break;
        case ValidPhoneCountIndicatorEnum:
        case PossiblePhoneCountIndicatorEnum:
        case ValidRegCodeCountIndicatorEnum:
        case InvalidRegCodeCountIndicatorEnum:
        case WellFormE164PhoneCountIndicatorEnum:
        case WellFormIntePhoneCountIndicatorEnum:
        case WellFormNationalPhoneCountIndicatorEnum:
        case PhoneNumbStatisticsIndicatorEnum:
        case FormatFreqPieIndictorEnum:
            if (isJavaEngine && (dataminingType == DataminingType.NOMINAL || dataminingType == DataminingType.INTERVAL)) {
                return true;
            }
            break;
        case SqlPatternMatchingIndicatorEnum:
            if (node == null) {
                return false;
            }
            if (!isSQLEngine) {
                return false;
            }
            Pattern pattern = IndicatorHelper.getPattern(indicator);
            Expression returnExpression = dbmsLanguage.getRegexp(pattern);
            if (returnExpression != null) {
                return true;
            }
            break;
        case RegexpMatchingIndicatorEnum:
            if (node == null) {
                return false;
            }
            pattern = IndicatorHelper.getPattern(indicator);
            returnExpression = dbmsLanguage.getRegexp(pattern);
            if (returnExpression != null) {
                return true;
            }
            break;
        case UserDefinedIndicatorEnum:
            // judge language
            if (node == null) {
                return false;
            }
            Indicator judi = null;
            try {
                judi = UDIHelper.adaptToJavaUDI(indicator);
            } catch (Throwable e) {
                return false;
            }
            if (judi != null) {
                indicator = judi;
            }
            returnExpression = dbmsLanguage.getExpression(indicator);
            if (isJavaEngine && judi == null || isSQLEngine && returnExpression == null) {
                return false;
            }
            return true;
        default:
            return false;
    }
    return false;
}
Also used : DataminingType(org.talend.dataquality.indicators.DataminingType) DbmsLanguage(org.talend.dq.dbms.DbmsLanguage) MetadataColumn(org.talend.core.model.metadata.builder.connection.MetadataColumn) Pattern(org.talend.dataquality.domain.pattern.Pattern) TdColumn(org.talend.cwm.relational.TdColumn) Expression(orgomg.cwm.objectmodel.core.Expression) Connection(org.talend.core.model.metadata.builder.connection.Connection) ModelElementIndicator(org.talend.dataprofiler.core.model.ModelElementIndicator) Indicator(org.talend.dataquality.indicators.Indicator)

Example 4 with DataminingType

use of org.talend.dataquality.indicators.DataminingType in project tdq-studio-se by Talend.

the class ColumnCorrelationAnalysisHandler method setDatamingType.

/**
 * Method "setDatamingType".
 *
 * @param dataminingTypeLiteral the literal expression of the datamining type used for the analysis
 * @param column a column
 */
public void setDatamingType(String dataminingTypeLiteral, TdColumn column) {
    DataminingType type = DataminingType.get(dataminingTypeLiteral);
    MetadataHelper.setDataminingType(type, column);
    Resource resource = column.eResource();
    if (resource != null) {
        // tell that the resource has been modified.
        resource.setModified(true);
    // it would be better to handle modifications with EMF Commands
    // this.modifiedResources.add(resource);
    }
}
Also used : DataminingType(org.talend.dataquality.indicators.DataminingType) Resource(org.eclipse.emf.ecore.resource.Resource)

Example 5 with DataminingType

use of org.talend.dataquality.indicators.DataminingType in project tdq-studio-se by Talend.

the class ModelElementAnalysisHandler method setDatamingType.

/**
 * DOC xqliu Comment method "setDatamingType".
 *
 * @param dataminingTypeLiteral
 * @param modelElement
 */
public void setDatamingType(String dataminingTypeLiteral, ModelElement modelElement) {
    DataminingType type = DataminingType.get(dataminingTypeLiteral);
    if (modelElement instanceof MetadataColumn) {
        MetadataHelper.setDataminingType(type, modelElement);
    } else {
        return;
    }
    Resource resource = modelElement.eResource();
    if (resource != null) {
        // tell that the resource has been modified.
        resource.setModified(true);
    // it would be better to handle modifications with EMF Commands
    }
}
Also used : DataminingType(org.talend.dataquality.indicators.DataminingType) MetadataColumn(org.talend.core.model.metadata.builder.connection.MetadataColumn) Resource(org.eclipse.emf.ecore.resource.Resource)

Aggregations

DataminingType (org.talend.dataquality.indicators.DataminingType)19 TdColumn (org.talend.cwm.relational.TdColumn)8 ModelElement (orgomg.cwm.objectmodel.core.ModelElement)8 ModelElementIndicator (org.talend.dataprofiler.core.model.ModelElementIndicator)6 MetadataColumn (org.talend.core.model.metadata.builder.connection.MetadataColumn)5 Indicator (org.talend.dataquality.indicators.Indicator)4 Resource (org.eclipse.emf.ecore.resource.Resource)3 CCombo (org.eclipse.swt.custom.CCombo)3 TreeEditor (org.eclipse.swt.custom.TreeEditor)3 MouseAdapter (org.eclipse.swt.events.MouseAdapter)3 MouseEvent (org.eclipse.swt.events.MouseEvent)3 SelectionAdapter (org.eclipse.swt.events.SelectionAdapter)3 SelectionEvent (org.eclipse.swt.events.SelectionEvent)3 Label (org.eclipse.swt.widgets.Label)3 TreeItem (org.eclipse.swt.widgets.TreeItem)3 RepositoryNode (org.talend.repository.model.RepositoryNode)3 ArrayList (java.util.ArrayList)2 Connection (org.talend.core.model.metadata.builder.connection.Connection)2 ColumnsetFactory (org.talend.dataquality.indicators.columnset.ColumnsetFactory)2 IFolder (org.eclipse.core.resources.IFolder)1