Search in sources :

Example 1 with ColumnSetMultiValueIndicator

use of org.talend.dataquality.indicators.columnset.ColumnSetMultiValueIndicator in project tdq-studio-se by Talend.

the class IndicatorsComp method setInput.

// input composite indicator
public void setInput(Object... obj) {
    List<ColumnSetIndicatorUnit> indicatortList = new ArrayList<ColumnSetIndicatorUnit>();
    for (Object indicatorObj : obj) {
        // for SimpleStatIndicator, CountAvgNullIndicator, MinMaxDateIndicator, WeakCorrelationIndicator
        if (indicatorObj instanceof SimpleStatIndicator || indicatorObj instanceof CountAvgNullIndicator || indicatorObj instanceof MinMaxDateIndicator || indicatorObj instanceof WeakCorrelationIndicator) {
            columnSetIndicator = (ColumnSetMultiValueIndicator) indicatorObj;
            for (Indicator indicator : IndicatorHelper.getIndicatorLeavesBySingleNode(columnSetIndicator)) {
                IndicatorEnum indicatorEnum = IndicatorEnum.findIndicatorEnum(indicator.eClass());
                indicatortList.add(new ColumnSetIndicatorUnit(indicatorEnum, indicator));
            }
        // MOD msjian TDQ-8860: we always show the allMatchIndicator in the Indicators section
        } else if (indicatorObj instanceof AllMatchIndicator) {
            // for AllMatchIndicator
            AllMatchIndicator allMatchIndicator = (AllMatchIndicator) indicatorObj;
            indicatortList.add(new ColumnSetIndicatorUnit(IndicatorEnum.AllMatchIndicatorEnum, allMatchIndicator));
        // TDQ-8860~
        }
    // ~
    }
    setElements(indicatortList.toArray(new ColumnSetIndicatorUnit[indicatortList.size()]));
}
Also used : WeakCorrelationIndicator(org.talend.dataquality.indicators.columnset.WeakCorrelationIndicator) IndicatorEnum(org.talend.dq.nodes.indicator.type.IndicatorEnum) AllMatchIndicator(org.talend.dataquality.indicators.columnset.AllMatchIndicator) ArrayList(java.util.ArrayList) MinMaxDateIndicator(org.talend.dataquality.indicators.columnset.MinMaxDateIndicator) SimpleStatIndicator(org.talend.dataquality.indicators.columnset.SimpleStatIndicator) CountAvgNullIndicator(org.talend.dataquality.indicators.columnset.CountAvgNullIndicator) ColumnSetMultiValueIndicator(org.talend.dataquality.indicators.columnset.ColumnSetMultiValueIndicator) MinMaxDateIndicator(org.talend.dataquality.indicators.columnset.MinMaxDateIndicator) CountAvgNullIndicator(org.talend.dataquality.indicators.columnset.CountAvgNullIndicator) Indicator(org.talend.dataquality.indicators.Indicator) WeakCorrelationIndicator(org.talend.dataquality.indicators.columnset.WeakCorrelationIndicator) AllMatchIndicator(org.talend.dataquality.indicators.columnset.AllMatchIndicator) SimpleStatIndicator(org.talend.dataquality.indicators.columnset.SimpleStatIndicator) ColumnSetIndicatorUnit(org.talend.dataprofiler.core.ui.editor.preview.ColumnSetIndicatorUnit)

Example 2 with ColumnSetMultiValueIndicator

use of org.talend.dataquality.indicators.columnset.ColumnSetMultiValueIndicator in project tdq-studio-se by Talend.

the class ColumnSetAnalysisExecutor method runAnalysis.

/*
     * (non-Jsdoc)
     * 
     * @see org.talend.dq.analysis.AnalysisExecutor#runAnalysis(org.talend.dataquality.analysis.Analysis,
     * java.lang.String)
     */
@Override
protected boolean runAnalysis(Analysis analysis, String sqlStatement) {
    ColumnSetIndicatorEvaluator eval = createIndicatorEvaluator(analysis);
    eval.setMonitor(getMonitor());
    // --- add indicators
    EList<Indicator> indicators = analysis.getResults().getIndicators();
    for (Indicator indicator : indicators) {
        if (ColumnsetPackage.eINSTANCE.getColumnSetMultiValueIndicator().isSuperTypeOf(indicator.eClass())) {
            ColumnSetMultiValueIndicator colSetMultValIndicator = (ColumnSetMultiValueIndicator) indicator;
            colSetMultValIndicator.prepare();
            eval.storeIndicator(indicator.getName(), colSetMultValIndicator);
        }
    }
    TypedReturnCode<java.sql.Connection> connection = null;
    // MOD yyi 2011-02-22 17871:delimitefile
    if (!isDelimitedFile) {
        connection = initConnection(analysis, eval);
        if (!connection.isOk()) {
            return false;
        }
    }
    // when to close connection
    boolean closeAtTheEnd = true;
    ReturnCode rc = eval.evaluateIndicators(sqlStatement, closeAtTheEnd);
    // close connection
    if (connection != null) {
        if (POOLED_CONNECTION) {
            // release the pooled connection
            resetConnectionPool(analysis);
        } else {
            ConnectionUtils.closeConnection(connection.getObject());
        }
    }
    if (!rc.isOk()) {
        traceError(rc.getMessage());
    }
    if (getMonitor() != null) {
        getMonitor().worked(compIndicatorsWorked);
    }
    return rc.isOk();
}
Also used : ColumnSetIndicatorEvaluator(org.talend.dq.indicators.ColumnSetIndicatorEvaluator) TypedReturnCode(org.talend.utils.sugars.TypedReturnCode) ReturnCode(org.talend.utils.sugars.ReturnCode) Connection(java.sql.Connection) ColumnSetMultiValueIndicator(org.talend.dataquality.indicators.columnset.ColumnSetMultiValueIndicator) ColumnSetMultiValueIndicator(org.talend.dataquality.indicators.columnset.ColumnSetMultiValueIndicator) RegexpMatchingIndicator(org.talend.dataquality.indicators.RegexpMatchingIndicator) Indicator(org.talend.dataquality.indicators.Indicator) AllMatchIndicator(org.talend.dataquality.indicators.columnset.AllMatchIndicator)

Example 3 with ColumnSetMultiValueIndicator

use of org.talend.dataquality.indicators.columnset.ColumnSetMultiValueIndicator in project tdq-studio-se by Talend.

the class MultiColumnAnalysisExecutor method instantiateQuery.

/**
 * DOC scorreia Comment method "instantiateQuery".
 *
 * @param indicator
 */
private void instantiateQuery(Indicator indicator) {
    if (ColumnsetPackage.eINSTANCE.getColumnSetMultiValueIndicator().isSuperTypeOf(indicator.eClass())) {
        ColumnSetMultiValueIndicator colSetMultValIndicator = (ColumnSetMultiValueIndicator) indicator;
        final EList<ModelElement> analyzedColumns = colSetMultValIndicator.getAnalyzedColumns();
        final EList<String> numericFunctions = initializeNumericFunctions(colSetMultValIndicator);
        final EList<String> dateFunctions = initializeDateFunctions(colSetMultValIndicator);
        // ADD msjian 2011-5-30 17479: Excel Odbc connection can not run well on the correlation analysis
        // note: this feature is not supported now, if support, delete this
        // $NON-NLS-1$
        final String caseStr = "SUM(CASE WHEN {0} IS NULL THEN 1 ELSE 0 END)";
        if (// $NON-NLS-1$
        "EXCEL".equals(dbms().getDbmsName()) && (dateFunctions.contains(caseStr) || numericFunctions.contains(caseStr))) {
            // $NON-NLS-1$
            setError(Messages.getString("MultiColumnAnalysisExecutor.errMessage"));
            Display.getDefault().syncExec(new Runnable() {

                public void run() {
                    MessageDialog.openWarning(PlatformUI.getWorkbench().getActiveWorkbenchWindow().getShell(), Messages.getString("MultiColumnAnalysisExecutor.errTitle"), // $NON-NLS-1$
                    getErrorMessage());
                    return;
                }
            });
        }
        // ~
        // get indicator definition
        final Expression sqlGenericExpression = dbms().getSqlExpression(indicator.getIndicatorDefinition());
        // separate nominal from numeric columns
        List<String> nominalColumns = new ArrayList<String>();
        for (ModelElement column : colSetMultValIndicator.getNominalColumns()) {
            nominalColumns.add(getQuotedColumnName(column));
        }
        List<String> computedColumns = new ArrayList<String>();
        for (ModelElement column : colSetMultValIndicator.getNumericColumns()) {
            // call functions for each column
            for (String f : numericFunctions) {
                computedColumns.add(replaceVariablesLow(f, getQuotedColumnName(column)));
            }
        }
        for (ModelElement column : colSetMultValIndicator.getDateColumns()) {
            // call functions for each column
            for (String f : dateFunctions) {
                computedColumns.add(replaceVariablesLow(f, getQuotedColumnName(column)));
            }
        }
        // add count(*)
        computedColumns.add(colSetMultValIndicator.getCountAll());
        // MOD msjian TDQ-7254: fix the columnset analysis run get error. the columnset analysis don't need to
        // consider the datamining type.
        List<String> columns = new ArrayList<String>();
        // if the analysis type is columnset, use analyzed columns
        if (AnalysisType.COLUMN_SET == cachedAnalysis.getParameters().getAnalysisType()) {
            for (ModelElement column : analyzedColumns) {
                columns.add(getQuotedColumnName(column));
            }
        } else {
            columns = nominalColumns;
        }
        // TDQ-7254~
        String selectItems = createSelect(columns, computedColumns);
        String grpByClause = createGroupBy(columns);
        // all columns must belong to the same table
        TdColumn firstColumn = SwitchHelpers.COLUMN_SWITCH.doSwitch(analyzedColumns.get(0));
        String tableName = dbms().getQueryColumnSetWithPrefix(firstColumn);
        this.catalogOrSchema = dbms().getCatalogOrSchemaName(firstColumn);
        // definition is SELECT &lt;%=__COLUMN_NAMES__%> FROM &lt;%=__TABLE_NAME__%> GROUP BY
        // &lt;%=__GROUP_BY_ALIAS__%>
        String sqlExpr = dbms().fillGenericQueryWithColumnTableAndAlias(sqlGenericExpression.getBody(), selectItems, tableName, grpByClause);
        // handle data filter
        String stringDataFilter = ContextHelper.getDataFilterWithoutContext(cachedAnalysis);
        if (stringDataFilter == null) {
            stringDataFilter = PluginConstant.EMPTY_STRING;
        }
        sqlExpr = dbms().addWhereToStatement(sqlExpr, stringDataFilter);
        indicator.setInstantiatedExpression(BooleanExpressionHelper.createTdExpression(sqlGenericExpression.getLanguage(), sqlExpr));
        // MOD qiongli 2011-3-30 feature 19192.allow drill down for sql engine.
        if (ColumnsetPackage.eINSTANCE.getSimpleStatIndicator().isSuperTypeOf(indicator.eClass())) {
            SimpleStatIndicator simpleIndicator = (SimpleStatIndicator) indicator;
            // MOD TDQ-7287 lost some columns(type!=norminal) when view values in column set ana. yyin 20130514
            String columnsName = createSelect(columns, new ArrayList<String>());
            for (Indicator leafIndicator : simpleIndicator.getLeafIndicators()) {
                final Expression leafSqlGenericExpression = dbms().getSqlExpression(leafIndicator.getIndicatorDefinition());
                String leafSqlExpr = dbms().fillGenericQueryWithColumnTableAndAlias(leafSqlGenericExpression.getBody(), columnsName, tableName, grpByClause);
                leafSqlExpr = dbms().addWhereToStatement(leafSqlExpr, stringDataFilter);
                leafIndicator.setInstantiatedExpression(BooleanExpressionHelper.createTdExpression(leafSqlGenericExpression.getLanguage(), leafSqlExpr));
            }
        }
    }
}
Also used : ArrayList(java.util.ArrayList) ColumnSetMultiValueIndicator(org.talend.dataquality.indicators.columnset.ColumnSetMultiValueIndicator) ColumnSetMultiValueIndicator(org.talend.dataquality.indicators.columnset.ColumnSetMultiValueIndicator) Indicator(org.talend.dataquality.indicators.Indicator) AllMatchIndicator(org.talend.dataquality.indicators.columnset.AllMatchIndicator) SimpleStatIndicator(org.talend.dataquality.indicators.columnset.SimpleStatIndicator) ModelElement(orgomg.cwm.objectmodel.core.ModelElement) TdColumn(org.talend.cwm.relational.TdColumn) Expression(orgomg.cwm.objectmodel.core.Expression) SimpleStatIndicator(org.talend.dataquality.indicators.columnset.SimpleStatIndicator)

Example 4 with ColumnSetMultiValueIndicator

use of org.talend.dataquality.indicators.columnset.ColumnSetMultiValueIndicator in project tdq-studio-se by Talend.

the class CorrelationAnalysisDetailsPage method saveAnalysis.

/**
 * @param outputFolder
 * @throws DataprofilerCoreException
 */
@Override
public void saveAnalysis() throws DataprofilerCoreException {
    for (Domain domain : getCurrentModelElement().getParameters().getDataFilter()) {
        domain.setName(getCurrentModelElement().getName());
    }
    IRepositoryViewObject reposObject = null;
    Connection tdProvider = null;
    correlationAnalysisHandler.clearAnalysis();
    columnSetMultiIndicator.getAnalyzedColumns().clear();
    // set execute engine
    Analysis analysis = correlationAnalysisHandler.getAnalysis();
    analysis.getParameters().setExecutionLanguage(ExecutionLanguage.get(execLang));
    // set data filter
    correlationAnalysisHandler.setStringDataFilter(dataFilterComp.getDataFilterString());
    // save analysis
    List<RepositoryNode> repositoryNodeList = treeViewer.getColumnSetMultiValueList();
    if (repositoryNodeList != null && !repositoryNodeList.isEmpty()) {
        reposObject = repositoryNodeList.get(0).getObject();
        tdProvider = ((ConnectionItem) reposObject.getProperty().getItem()).getConnection();
        analysis.getContext().setConnection(tdProvider);
        List<TdColumn> columnLst = new ArrayList<TdColumn>();
        for (RepositoryNode repNode : repositoryNodeList) {
            columnLst.add((TdColumn) ((MetadataColumnRepositoryObject) repNode.getObject()).getTdColumn());
        }
        columnSetMultiIndicator.getAnalyzedColumns().addAll(columnLst);
        correlationAnalysisHandler.addIndicator(columnLst, columnSetMultiIndicator);
    } else {
        analysis.getContext().setConnection(null);
        // MOD by zshen for bug 12042.
        ColumnsetFactory columnsetFactory = ColumnsetFactory.eINSTANCE;
        ColumnSetMultiValueIndicator columnSetMultiValueIndicator = null;
        if (ColumnsetPackage.eINSTANCE.getCountAvgNullIndicator() == columnSetMultiIndicator.eClass()) {
            columnSetMultiValueIndicator = columnsetFactory.createCountAvgNullIndicator();
        }
        if (ColumnsetPackage.eINSTANCE.getMinMaxDateIndicator() == columnSetMultiIndicator.eClass()) {
            columnSetMultiValueIndicator = columnsetFactory.createMinMaxDateIndicator();
        }
        if (ColumnsetPackage.eINSTANCE.getWeakCorrelationIndicator() == columnSetMultiIndicator.eClass()) {
            columnSetMultiValueIndicator = columnsetFactory.createWeakCorrelationIndicator();
        }
        fillSimpleIndicators(columnSetMultiValueIndicator);
        analysis.getResults().getIndicators().add(columnSetMultiValueIndicator);
    // ~12042
    }
    // save the number of connections per analysis
    this.saveNumberOfConnectionsPerAnalysis();
    // 2011.1.12 MOD by zhsne to unify anlysis and connection id when saving.
    this.nameText.setText(analysis.getName());
    // ~
    // MOD yyi 2012-02-08 TDQ-4621:Explicitly set true for updating dependencies.
    ReturnCode saved = ElementWriterFactory.getInstance().createAnalysisWrite().save(getCurrentRepNode().getObject().getProperty().getItem(), true);
    // MOD yyi 2012-02-03 TDQ-3602:Avoid to rewriting all analyzes after saving, no reason to update all analyzes
    // which is depended in the referred connection.
    // Extract saving log function.
    // @see org.talend.dataprofiler.core.ui.editor.analysis.AbstractAnalysisMetadataPage#logSaved(ReturnCode)
    logSaved(saved);
    treeViewer.setDirty(false);
    dataFilterComp.setDirty(false);
}
Also used : ReturnCode(org.talend.utils.sugars.ReturnCode) Connection(org.talend.core.model.metadata.builder.connection.Connection) ArrayList(java.util.ArrayList) ColumnsetFactory(org.talend.dataquality.indicators.columnset.ColumnsetFactory) RepositoryNode(org.talend.repository.model.RepositoryNode) IRepositoryNode(org.talend.repository.model.IRepositoryNode) ColumnSetMultiValueIndicator(org.talend.dataquality.indicators.columnset.ColumnSetMultiValueIndicator) TdColumn(org.talend.cwm.relational.TdColumn) Analysis(org.talend.dataquality.analysis.Analysis) IRepositoryViewObject(org.talend.core.model.repository.IRepositoryViewObject) MetadataColumnRepositoryObject(org.talend.core.repository.model.repositoryObject.MetadataColumnRepositoryObject) Domain(org.talend.dataquality.domain.Domain)

Example 5 with ColumnSetMultiValueIndicator

use of org.talend.dataquality.indicators.columnset.ColumnSetMultiValueIndicator in project tdq-studio-se by Talend.

the class MultiColAnalysisCreationTest method getIndicator.

/**
 * DOC scorreia Comment method "getIndicators".
 *
 * @param column
 * @return
 */
private ColumnSetMultiValueIndicator getIndicator(List<TdColumn> columns) {
    ColumnSetMultiValueIndicator ind = ColumnsetFactory.eINSTANCE.createColumnSetMultiValueIndicator();
    ind.getAnalyzedColumns().addAll(columns);
    boolean definitionSet = DefinitionHandler.getInstance().setDefaultIndicatorDefinition(ind);
    if (log.isDebugEnabled()) {
        // $NON-NLS-1$ //$NON-NLS-2$
        log.debug("Definition set for " + ind.getName() + ": " + definitionSet);
    }
    for (String f : NUMERICFUNC) {
        ind.getNumericFunctions().add(f);
    }
    return ind;
}
Also used : ColumnSetMultiValueIndicator(org.talend.dataquality.indicators.columnset.ColumnSetMultiValueIndicator)

Aggregations

ColumnSetMultiValueIndicator (org.talend.dataquality.indicators.columnset.ColumnSetMultiValueIndicator)9 Indicator (org.talend.dataquality.indicators.Indicator)6 AllMatchIndicator (org.talend.dataquality.indicators.columnset.AllMatchIndicator)5 TdColumn (org.talend.cwm.relational.TdColumn)4 ModelElement (orgomg.cwm.objectmodel.core.ModelElement)4 ArrayList (java.util.ArrayList)3 EObject (org.eclipse.emf.ecore.EObject)2 RegexpMatchingIndicator (org.talend.dataquality.indicators.RegexpMatchingIndicator)2 SimpleStatIndicator (org.talend.dataquality.indicators.columnset.SimpleStatIndicator)2 ReturnCode (org.talend.utils.sugars.ReturnCode)2 ColumnSet (orgomg.cwm.resource.relational.ColumnSet)2 Connection (java.sql.Connection)1 Connection (org.talend.core.model.metadata.builder.connection.Connection)1 IRepositoryViewObject (org.talend.core.model.repository.IRepositoryViewObject)1 MetadataColumnRepositoryObject (org.talend.core.repository.model.repositoryObject.MetadataColumnRepositoryObject)1 ColumnSetIndicatorUnit (org.talend.dataprofiler.core.ui.editor.preview.ColumnSetIndicatorUnit)1 Analysis (org.talend.dataquality.analysis.Analysis)1 AnalysisType (org.talend.dataquality.analysis.AnalysisType)1 Domain (org.talend.dataquality.domain.Domain)1 DataminingType (org.talend.dataquality.indicators.DataminingType)1