Search in sources :

Example 6 with UniqueCountIndicator

use of org.talend.dataquality.indicators.UniqueCountIndicator in project tdq-studio-se by Talend.

the class CountsIndicatorImpl method basicSetUniqueCountIndicator.

/**
 * <!-- begin-user-doc --> <!-- end-user-doc -->
 * @generated
 */
public NotificationChain basicSetUniqueCountIndicator(UniqueCountIndicator newUniqueCountIndicator, NotificationChain msgs) {
    UniqueCountIndicator oldUniqueCountIndicator = uniqueCountIndicator;
    uniqueCountIndicator = newUniqueCountIndicator;
    if (eNotificationRequired()) {
        ENotificationImpl notification = new ENotificationImpl(this, Notification.SET, IndicatorsPackage.COUNTS_INDICATOR__UNIQUE_COUNT_INDICATOR, oldUniqueCountIndicator, newUniqueCountIndicator);
        if (msgs == null)
            msgs = notification;
        else
            msgs.add(notification);
    }
    return msgs;
}
Also used : ENotificationImpl(org.eclipse.emf.ecore.impl.ENotificationImpl) UniqueCountIndicator(org.talend.dataquality.indicators.UniqueCountIndicator)

Example 7 with UniqueCountIndicator

use of org.talend.dataquality.indicators.UniqueCountIndicator in project tdq-studio-se by Talend.

the class IndicatorEvaluationMain method main.

/**
 * DOC scorreia Comment method "main".
 *
 * @param args
 */
public static void main(String[] args) {
    TypedProperties connectionParams = PropertiesLoader.getProperties(IndicatorEvaluator.class, "db.properties");
    String driverClassName = connectionParams.getProperty("driver");
    String dbUrl = connectionParams.getProperty("url");
    try {
        TimeTracer tt = new TimeTracer("Indicator evaluation", null);
        tt.start();
        // create connection
        Connection connection = ConnectionUtils.createConnection(dbUrl, driverClassName, connectionParams);
        String database = "test";
        String tableName = "my_test";
        // --- columns to analyze
        String[] columnsArray = new String[] { // 0
        "my_int", // 1
        "my_double", // 2
        "my_text", // 4
        "my_date", // 3
        "my_string", // 5
        "my_int_null" };
        List<String> columns = Arrays.asList(columnsArray);
        // store in file
        File file = new File("out/columnTest_0.1.ana");
        EMFUtil util = new EMFUtil();
        Resource resource = util.getResourceSet().createResource(URI.createFileURI(file.getAbsolutePath()));
        rContents = resource.getContents();
        evaluator.setConnection(connection);
        // --- create indicators
        RowCountIndicator rowCountIndicator = IndicatorsFactory.eINSTANCE.createRowCountIndicator();
        NullCountIndicator nullCountIndicator = IndicatorsFactory.eINSTANCE.createNullCountIndicator();
        DistinctCountIndicator distinctCountIndicator = IndicatorsFactory.eINSTANCE.createDistinctCountIndicator();
        DistinctCountIndicator distinctCountIndicator2 = IndicatorsFactory.eINSTANCE.createDistinctCountIndicator();
        UniqueCountIndicator uniqueCountIndicator = IndicatorsFactory.eINSTANCE.createUniqueCountIndicator();
        DuplicateCountIndicator duplicateCountIndicator = IndicatorsFactory.eINSTANCE.createDuplicateCountIndicator();
        BlankCountIndicator blankCountIndicator = IndicatorsFactory.eINSTANCE.createBlankCountIndicator();
        MinLengthIndicator minLengthIndicator = IndicatorsFactory.eINSTANCE.createMinLengthIndicator();
        MaxLengthIndicator maxLengthIndicator = IndicatorsFactory.eINSTANCE.createMaxLengthIndicator();
        AverageLengthIndicator averageLengthIndicator = IndicatorsFactory.eINSTANCE.createAverageLengthIndicator();
        AverageLengthIndicator averageLengthIndicator2 = IndicatorsFactory.eINSTANCE.createAverageLengthIndicator();
        ModeIndicator modeIndicator = IndicatorsFactory.eINSTANCE.createModeIndicator();
        FrequencyIndicator textFrequencyIndicator = IndicatorsFactory.eINSTANCE.createFrequencyIndicator();
        // store in freq indic
        // textFrequencyIndicator.setDistinctCountIndicator(distinctCountIndicator);
        // textFrequencyIndicator.setDistinctCountIndicator(distinctCountIndicator2);
        // textFrequencyIndicator.setUniqueCountIndicator(uniqueCountIndicator);
        // textFrequencyIndicator.setDuplicateCountIndicator(duplicateCountIndicator);
        // textFrequencyIndicator.setModeIndicator(modeIndicator);
        MeanIndicator doubleMeanIndicator = IndicatorsFactory.eINSTANCE.createMeanIndicator();
        MeanIndicator integerMeanIndicator = IndicatorsFactory.eINSTANCE.createMeanIndicator();
        MedianIndicator medianIndicator = IndicatorsFactory.eINSTANCE.createMedianIndicator();
        SumIndicator integerSumIndicator = IndicatorsFactory.eINSTANCE.createSumIndicator();
        addIndicator(columnsArray[0], medianIndicator);
        addIndicator(columnsArray[1], doubleMeanIndicator);
        addIndicator(columnsArray[2], blankCountIndicator);
        addIndicator(columnsArray[5], nullCountIndicator);
        // addIndicator(columnsArray[2], textFrequencyIndicator);
        // addIndicator(columnsArray[2], distinctCountIndicator); // probably not useful?
        // addIndicator(columnsArray[2], uniqueCountIndicator); // probably not useful?
        // addIndicator(columnsArray[2], duplicateCountIndicator); // probably not useful?
        // addIndicator(columnsArray[2], modeIndicator); // probably not useful?
        addIndicator(columnsArray[3], rowCountIndicator);
        addIndicator(columnsArray[5], integerSumIndicator);
        addIndicator(columnsArray[5], integerMeanIndicator);
        addIndicator(columnsArray[2], averageLengthIndicator);
        addIndicator(columnsArray[3], averageLengthIndicator2);
        addIndicator(columnsArray[3], minLengthIndicator);
        addIndicator(columnsArray[3], maxLengthIndicator);
        // build query on columns
        // TODO scorreia add filter somewhere here...
        String selectCols = sqlSelectColumns(database, tableName, columns);
        // --- create a description of the column set
        QueryExpression queryExpression = DatatypesFactory.eINSTANCE.createQueryExpression();
        queryExpression.setBody(selectCols);
        // TODO scorreia externalize this as a constant
        queryExpression.setLanguage("SQL");
        tt.start("compute");
        evaluator.setFetchSize(10000);
        evaluator.evaluateIndicators(selectCols, true);
        tt.end("compute");
        // Print indicators the median
        System.out.println("Median=" + medianIndicator.getMedian());
        System.out.println("# Unique values= " + textFrequencyIndicator.getUniqueValueCount());
        System.out.println("# Distinct values= " + textFrequencyIndicator.getDistinctValueCount());
        for (String col : columns) {
            printIndicators(evaluator.getIndicators(col));
        }
        tt.start("save");
        util.save();
        tt.end("saved in " + file.getAbsolutePath());
        tt.end();
        CwmResource cwmR = (CwmResource) resource;
        String id = cwmR.getID(medianIndicator);
        System.out.println("ecore util.getId= " + EcoreUtil.getID(medianIndicator));
        System.out.println("uuId= " + id);
    // test reload this file
    // LoadSerialData.main(args);
    } catch (SQLException e) {
        // TODO Auto-generated catch block
        log.error(e, e);
    } catch (InstantiationException e) {
        // TODO Auto-generated catch block
        log.error(e, e);
    } catch (IllegalAccessException e) {
        // TODO Auto-generated catch block
        log.error(e, e);
    } catch (ClassNotFoundException e) {
        // TODO Auto-generated catch block
        log.error(e, e);
    }
}
Also used : SumIndicator(org.talend.dataquality.indicators.SumIndicator) SQLException(java.sql.SQLException) BlankCountIndicator(org.talend.dataquality.indicators.BlankCountIndicator) MedianIndicator(org.talend.dataquality.indicators.MedianIndicator) UniqueCountIndicator(org.talend.dataquality.indicators.UniqueCountIndicator) ModeIndicator(org.talend.dataquality.indicators.ModeIndicator) NullCountIndicator(org.talend.dataquality.indicators.NullCountIndicator) FrequencyIndicator(org.talend.dataquality.indicators.FrequencyIndicator) DistinctCountIndicator(org.talend.dataquality.indicators.DistinctCountIndicator) RowCountIndicator(org.talend.dataquality.indicators.RowCountIndicator) QueryExpression(orgomg.cwm.foundation.datatypes.QueryExpression) MeanIndicator(org.talend.dataquality.indicators.MeanIndicator) CwmResource(org.talend.model.emf.CwmResource) DuplicateCountIndicator(org.talend.dataquality.indicators.DuplicateCountIndicator) MaxLengthIndicator(org.talend.dataquality.indicators.MaxLengthIndicator) Connection(java.sql.Connection) CwmResource(org.talend.model.emf.CwmResource) Resource(org.eclipse.emf.ecore.resource.Resource) MinLengthIndicator(org.talend.dataquality.indicators.MinLengthIndicator) TypedProperties(org.talend.utils.properties.TypedProperties) TimeTracer(org.talend.utils.time.TimeTracer) AverageLengthIndicator(org.talend.dataquality.indicators.AverageLengthIndicator) EMFUtil(org.talend.commons.emf.EMFUtil) File(java.io.File)

Example 8 with UniqueCountIndicator

use of org.talend.dataquality.indicators.UniqueCountIndicator in project tdq-studio-se by Talend.

the class ColumnSetDBMapTest method testSubListLongLongMapOfLongListOfObjectDataValidationCase1.

/**
 * Test method for
 * {@link org.talend.dataquality.indicators.mapdb.ColumnSetDBMap#subList(long, long, java.util.Map, org.talend.cwm.indicator.DataValidation)}
 * Case 1 Unique case
 */
@Test
public void testSubListLongLongMapOfLongListOfObjectDataValidationCase1() {
    ColumnSetDBMap dbMap1 = new ColumnSetDBMap();
    Assert.assertEquals(true, dbMap1.isEmpty());
    List<Object> keyList = new ArrayList<Object>();
    // $NON-NLS-1$
    keyList.add("id1");
    // $NON-NLS-1$
    keyList.add("name1");
    dbMap1.put(keyList, 1l);
    keyList = new ArrayList<Object>();
    // $NON-NLS-1$
    keyList.add("id2");
    // $NON-NLS-1$
    keyList.add("name2");
    dbMap1.put(keyList, 2l);
    Assert.assertEquals(2, dbMap1.size());
    UniqueCountIndicator uniqueCountIndicator = IndicatorsFactory.eINSTANCE.createUniqueCountIndicator();
    List<Object[]> subList = dbMap1.subList(0, 2, new HashMap<Long, List<Object>>(), IDataValidationFactory.INSTANCE.createValidation(uniqueCountIndicator));
    Assert.assertEquals(1, subList.size());
}
Also used : ColumnSetDBMap(org.talend.dataquality.indicators.mapdb.ColumnSetDBMap) ArrayList(java.util.ArrayList) UniqueCountIndicator(org.talend.dataquality.indicators.UniqueCountIndicator) List(java.util.List) ArrayList(java.util.ArrayList) Test(org.junit.Test)

Example 9 with UniqueCountIndicator

use of org.talend.dataquality.indicators.UniqueCountIndicator in project tdq-studio-se by Talend.

the class IndicatorEvaluator method executeSqlQuery.

@Override
protected ReturnCode executeSqlQuery(String sqlStatement) throws SQLException {
    ReturnCode ok = new ReturnCode(true);
    // check analyzed columns
    Set<String> columns = getAnalyzedElements();
    // feature 0010630 zshen:Make order unify which columns and columnName in the sqlStatement.mssqlOdbc need do
    // this
    List<String> columnlist = sortColumnName(columns, sqlStatement);
    if (columnlist.isEmpty()) {
        // $NON-NLS-1$
        ok.setReturnCode(Messages.getString("IndicatorEvaluator.DefineAnalyzedColumns"), false);
        return ok;
    }
    // ADD xqliu 2010-07-27 bug 13826
    Map<String, String> columnlistMap = buildColumnListMap(columnlist);
    // ~ 13826
    // create query statement
    // feature 0010630 zshen: Tables are not found when using Excel with ODBC connection
    Statement statement = createStatement();
    // MOD xqliu 2009-02-09 bug 6237
    if (continueRun()) {
        if (log.isInfoEnabled()) {
            // $NON-NLS-1$
            log.info("Executing query: " + sqlStatement);
        }
        statement.execute(sqlStatement);
    }
    // get the results
    ResultSet resultSet = statement.getResultSet();
    if (resultSet == null) {
        // $NON-NLS-1$
        String mess = Messages.getString("Evaluator.NoResultSet", sqlStatement);
        log.warn(mess);
        ok.setReturnCode(mess, false);
        statement.close();
        return ok;
    }
    // MOD qiongli TDQ-7282,check invalid judi.if there are invalid judis,return false code and show message later.
    ok = getMessageForInvalidJUDIs();
    int columnCount = resultSet.getMetaData().getColumnCount();
    int maxNumberRows = analysis.getParameters().getMaxNumberRows();
    // MOD mzhao feature: 12919, add capability to dill down data on Java engine.
    AnalysisResult anaResult = analysis.getResults();
    EMap<Indicator, AnalyzedDataSet> indicToRowMap = anaResult.getIndicToRowMap();
    indicToRowMap.clear();
    int recordIncrement = 0;
    // --- for each row
    int columnListSize = columnlist.size();
    label: while (resultSet.next()) {
        // feature 0010630 zshen: dislodge the Qualifiers from name of the column
        for (int i = 0; i < columnListSize; i++) {
            // MOD xqliu 2010-07-27 bug 13826
            String col = columnlist.get(i);
            List<Indicator> indicators = getIndicators(col);
            col = columnlistMap.get(col);
            // --- get content of column
            Object object = ResultSetUtils.getBigObject(resultSet, col);
            // FIXME this will slow down a lot the computation
            if (object != null && !(object instanceof String) && object.toString().indexOf("TIMESTAMP") > -1) {
                // $NON-NLS-1$
                object = resultSet.getTimestamp(col);
            }
            // TDQ-11299: fix the ClassCastException: java.sql.Date cannot be cast to java.lang.String
            if (object instanceof Date) {
                if (object instanceof Time) {
                    object = new TalendFormatTime((Time) object);
                } else {
                    object = new TalendFormatDate((Date) object);
                }
            }
            // --- give row to handle to indicators
            for (Indicator indicator : indicators) {
                // MOD xqliu 2009-02-09 bug 6237
                if (!continueRun()) {
                    break label;
                }
                // Added yyin 20120608 TDQ-3589
                if (indicator instanceof DuplicateCountIndicator) {
                    ((DuplicateCountIndicator) indicator).handle(object, resultSet, columnCount);
                } else {
                    // ~
                    indicator.handle(object);
                // ~MOD mzhao feature: 12919
                }
                AnalyzedDataSet analyzedDataSet = indicToRowMap.get(indicator);
                if (analyzedDataSet == null) {
                    analyzedDataSet = AnalysisFactory.eINSTANCE.createAnalyzedDataSet();
                    indicToRowMap.put(indicator, analyzedDataSet);
                    analyzedDataSet.setDataCount(maxNumberRows);
                    analyzedDataSet.setRecordSize(0);
                }
                // should store data for dirll down
                if (analysis.getParameters().isStoreData()) {
                    // current indicator is need to store the data
                    if (indicator.mustStoreRow()) {
                        List<Object[]> valueObjectList = initDataSet(indicator, indicToRowMap, object);
                        // MOD zshen add another loop to insert all of columnValue on the row into indicator.
                        recordIncrement = valueObjectList.size();
                        // MOD klliu 2011-06-30 bug 22523 whichever is Table or View,that finds columns should ues
                        // columnset
                        ColumnSet doSwitch = SwitchHelpers.COLUMN_SET_SWITCH.doSwitch(indicator.getAnalyzedElement().eContainer());
                        List<TdColumn> columnList = ColumnSetHelper.getColumns(doSwitch);
                        List<Object> inputRowList = new ArrayList<Object>();
                        for (int j = 0; j < columnCount; j++) {
                            String newcol = columnList.get(j).getName();
                            Object newobject = ResultSetUtils.getBigObject(resultSet, newcol);
                            // same format as result page.
                            if (newobject instanceof Date) {
                                if (newobject instanceof Time) {
                                    newobject = new TalendFormatTime((Time) newobject);
                                } else {
                                    newobject = new TalendFormatDate((Date) newobject);
                                }
                            }
                            if (indicator.isUsedMapDBMode()) {
                                inputRowList.add(newobject == null ? PluginConstant.NULL_STRING : newobject);
                                continue;
                            } else {
                                if (recordIncrement < maxNumberRows) {
                                    // data.
                                    if (recordIncrement < valueObjectList.size()) {
                                        // decide whether need to
                                        // increase
                                        // current array.
                                        valueObjectList.get(recordIncrement)[j] = newobject;
                                    } else {
                                        Object[] valueObject = new Object[columnCount];
                                        valueObject[j] = newobject;
                                        valueObjectList.add(valueObject);
                                    }
                                } else {
                                    break;
                                }
                            }
                        }
                        if (indicator.isUsedMapDBMode()) {
                            MapDBUtils.handleDrillDownData(object, inputRowList, indicator);
                        }
                    // ~
                    } else if (indicator instanceof UniqueCountIndicator && analysis.getResults().getIndicToRowMap().get(indicator).getData() != null) {
                        List<Object[]> removeValueObjectList = analysis.getResults().getIndicToRowMap().get(indicator).getData();
                        // MOD klliu 2011-06-30 bug 22523 whichever is Table or View,that finds columns should ues
                        // columnset
                        ColumnSet doSwitch = SwitchHelpers.COLUMN_SET_SWITCH.doSwitch(indicator.getAnalyzedElement().eContainer());
                        List<TdColumn> columnElementList = ColumnSetHelper.getColumns(doSwitch);
                        int offsetting = columnElementList.indexOf(indicator.getAnalyzedElement());
                        for (Object[] dataObject : removeValueObjectList) {
                            if (dataObject[offsetting].equals(object)) {
                                removeValueObjectList.remove(dataObject);
                                break;
                            }
                        }
                    }
                }
            }
        }
    }
    // --- release resultset
    resultSet.close();
    // --- release statement
    statement.close();
    // --- close connection
    getConnection().close();
    return ok;
}
Also used : UniqueCountIndicator(org.talend.dataquality.indicators.UniqueCountIndicator) Time(java.sql.Time) TalendFormatTime(org.talend.dataquality.indicators.mapdb.TalendFormatTime) ColumnSet(orgomg.cwm.resource.relational.ColumnSet) TalendFormatTime(org.talend.dataquality.indicators.mapdb.TalendFormatTime) ResultSet(java.sql.ResultSet) ArrayList(java.util.ArrayList) List(java.util.List) DuplicateCountIndicator(org.talend.dataquality.indicators.DuplicateCountIndicator) ReturnCode(org.talend.utils.sugars.ReturnCode) AnalyzedDataSet(org.talend.dataquality.analysis.AnalyzedDataSet) TalendFormatDate(org.talend.dataquality.indicators.mapdb.TalendFormatDate) Statement(java.sql.Statement) AnalysisResult(org.talend.dataquality.analysis.AnalysisResult) UniqueCountIndicator(org.talend.dataquality.indicators.UniqueCountIndicator) FrequencyIndicator(org.talend.dataquality.indicators.FrequencyIndicator) FormatFreqPieIndicator(org.talend.dataquality.indicators.FormatFreqPieIndicator) MaxLengthIndicator(org.talend.dataquality.indicators.MaxLengthIndicator) Indicator(org.talend.dataquality.indicators.Indicator) MinLengthIndicator(org.talend.dataquality.indicators.MinLengthIndicator) PatternLowFreqIndicator(org.talend.dataquality.indicators.PatternLowFreqIndicator) PatternFreqIndicator(org.talend.dataquality.indicators.PatternFreqIndicator) DuplicateCountIndicator(org.talend.dataquality.indicators.DuplicateCountIndicator) UserDefIndicator(org.talend.dataquality.indicators.sql.UserDefIndicator) Date(java.util.Date) TalendFormatDate(org.talend.dataquality.indicators.mapdb.TalendFormatDate) TdColumn(org.talend.cwm.relational.TdColumn)

Aggregations

UniqueCountIndicator (org.talend.dataquality.indicators.UniqueCountIndicator)9 DuplicateCountIndicator (org.talend.dataquality.indicators.DuplicateCountIndicator)5 ArrayList (java.util.ArrayList)4 RowCountIndicator (org.talend.dataquality.indicators.RowCountIndicator)4 List (java.util.List)3 AnalyzedDataSet (org.talend.dataquality.analysis.AnalyzedDataSet)3 DistinctCountIndicator (org.talend.dataquality.indicators.DistinctCountIndicator)3 Indicator (org.talend.dataquality.indicators.Indicator)3 MaxLengthIndicator (org.talend.dataquality.indicators.MaxLengthIndicator)3 MinLengthIndicator (org.talend.dataquality.indicators.MinLengthIndicator)3 ENotificationImpl (org.eclipse.emf.ecore.impl.ENotificationImpl)2 Test (org.junit.Test)2 AverageLengthIndicator (org.talend.dataquality.indicators.AverageLengthIndicator)2 BlankCountIndicator (org.talend.dataquality.indicators.BlankCountIndicator)2 FrequencyIndicator (org.talend.dataquality.indicators.FrequencyIndicator)2 MeanIndicator (org.talend.dataquality.indicators.MeanIndicator)2 MedianIndicator (org.talend.dataquality.indicators.MedianIndicator)2 ModeIndicator (org.talend.dataquality.indicators.ModeIndicator)2 NullCountIndicator (org.talend.dataquality.indicators.NullCountIndicator)2 UserDefIndicator (org.talend.dataquality.indicators.sql.UserDefIndicator)2