Search in sources :

Example 6 with DistinctCountIndicator

use of org.talend.dataquality.indicators.DistinctCountIndicator in project tdq-studio-se by Talend.

the class MultiColumnAnalysisExecutorTest method testCreateSqlStatement_3.

/**
 * test for leaf indicator's drill down sql , with interval type. MOD TDQ-7287 lost some columns(type!=norminal)
 * when view values in column set ana. yyin 20130514
 */
@Test
public void testCreateSqlStatement_3() {
    DistinctCountIndicator distinctCountIndicator = IndicatorsFactory.eINSTANCE.createDistinctCountIndicator();
    IndicatorDefinition definition = DefinitionFactory.eINSTANCE.createIndicatorDefinition();
    String body = "SELECT COUNT(*) FROM (SELECT DISTINCT <%=__COLUMN_NAMES__%> FROM <%=__TABLE_NAME__%> <%=__WHERE_CLAUSE__%>) A";
    TdExpression expression = RelationalFactory.eINSTANCE.createTdExpression();
    expression.setBody(body);
    // $NON-NLS-1$
    expression.setLanguage("SQL");
    definition.getSqlGenericExpression().add(expression);
    distinctCountIndicator.setIndicatorDefinition(definition);
    simpleStatIndicator.setDistinctCountIndicator(distinctCountIndicator);
    simpleStatIndicator.setDataminingType(DataminingType.NOMINAL);
    MultiColumnAnalysisExecutor multiColumnAnalysisExecutor = new MultiColumnAnalysisExecutor();
    // $NON-NLS-1$
    assertEquals("", multiColumnAnalysisExecutor.createSqlStatement(analysis));
    String viewValues = "SELECT COUNT(*) FROM (SELECT DISTINCT date_accnt_opened,product_id FROM tbi.customer ) A";
    assertEquals(viewValues, simpleStatIndicator.getLeafIndicators().get(0).getInstantiatedExpressions("SQL").getBody());
}
Also used : DistinctCountIndicator(org.talend.dataquality.indicators.DistinctCountIndicator) TdExpression(org.talend.cwm.relational.TdExpression) IndicatorDefinition(org.talend.dataquality.indicators.definition.IndicatorDefinition) Test(org.junit.Test)

Example 7 with DistinctCountIndicator

use of org.talend.dataquality.indicators.DistinctCountIndicator in project tdq-studio-se by Talend.

the class IndicatorEvaluationMain method main.

/**
 * DOC scorreia Comment method "main".
 *
 * @param args
 */
public static void main(String[] args) {
    TypedProperties connectionParams = PropertiesLoader.getProperties(IndicatorEvaluator.class, "db.properties");
    String driverClassName = connectionParams.getProperty("driver");
    String dbUrl = connectionParams.getProperty("url");
    try {
        TimeTracer tt = new TimeTracer("Indicator evaluation", null);
        tt.start();
        // create connection
        Connection connection = ConnectionUtils.createConnection(dbUrl, driverClassName, connectionParams);
        String database = "test";
        String tableName = "my_test";
        // --- columns to analyze
        String[] columnsArray = new String[] { // 0
        "my_int", // 1
        "my_double", // 2
        "my_text", // 4
        "my_date", // 3
        "my_string", // 5
        "my_int_null" };
        List<String> columns = Arrays.asList(columnsArray);
        // store in file
        File file = new File("out/columnTest_0.1.ana");
        EMFUtil util = new EMFUtil();
        Resource resource = util.getResourceSet().createResource(URI.createFileURI(file.getAbsolutePath()));
        rContents = resource.getContents();
        evaluator.setConnection(connection);
        // --- create indicators
        RowCountIndicator rowCountIndicator = IndicatorsFactory.eINSTANCE.createRowCountIndicator();
        NullCountIndicator nullCountIndicator = IndicatorsFactory.eINSTANCE.createNullCountIndicator();
        DistinctCountIndicator distinctCountIndicator = IndicatorsFactory.eINSTANCE.createDistinctCountIndicator();
        DistinctCountIndicator distinctCountIndicator2 = IndicatorsFactory.eINSTANCE.createDistinctCountIndicator();
        UniqueCountIndicator uniqueCountIndicator = IndicatorsFactory.eINSTANCE.createUniqueCountIndicator();
        DuplicateCountIndicator duplicateCountIndicator = IndicatorsFactory.eINSTANCE.createDuplicateCountIndicator();
        BlankCountIndicator blankCountIndicator = IndicatorsFactory.eINSTANCE.createBlankCountIndicator();
        MinLengthIndicator minLengthIndicator = IndicatorsFactory.eINSTANCE.createMinLengthIndicator();
        MaxLengthIndicator maxLengthIndicator = IndicatorsFactory.eINSTANCE.createMaxLengthIndicator();
        AverageLengthIndicator averageLengthIndicator = IndicatorsFactory.eINSTANCE.createAverageLengthIndicator();
        AverageLengthIndicator averageLengthIndicator2 = IndicatorsFactory.eINSTANCE.createAverageLengthIndicator();
        ModeIndicator modeIndicator = IndicatorsFactory.eINSTANCE.createModeIndicator();
        FrequencyIndicator textFrequencyIndicator = IndicatorsFactory.eINSTANCE.createFrequencyIndicator();
        // store in freq indic
        // textFrequencyIndicator.setDistinctCountIndicator(distinctCountIndicator);
        // textFrequencyIndicator.setDistinctCountIndicator(distinctCountIndicator2);
        // textFrequencyIndicator.setUniqueCountIndicator(uniqueCountIndicator);
        // textFrequencyIndicator.setDuplicateCountIndicator(duplicateCountIndicator);
        // textFrequencyIndicator.setModeIndicator(modeIndicator);
        MeanIndicator doubleMeanIndicator = IndicatorsFactory.eINSTANCE.createMeanIndicator();
        MeanIndicator integerMeanIndicator = IndicatorsFactory.eINSTANCE.createMeanIndicator();
        MedianIndicator medianIndicator = IndicatorsFactory.eINSTANCE.createMedianIndicator();
        SumIndicator integerSumIndicator = IndicatorsFactory.eINSTANCE.createSumIndicator();
        addIndicator(columnsArray[0], medianIndicator);
        addIndicator(columnsArray[1], doubleMeanIndicator);
        addIndicator(columnsArray[2], blankCountIndicator);
        addIndicator(columnsArray[5], nullCountIndicator);
        // addIndicator(columnsArray[2], textFrequencyIndicator);
        // addIndicator(columnsArray[2], distinctCountIndicator); // probably not useful?
        // addIndicator(columnsArray[2], uniqueCountIndicator); // probably not useful?
        // addIndicator(columnsArray[2], duplicateCountIndicator); // probably not useful?
        // addIndicator(columnsArray[2], modeIndicator); // probably not useful?
        addIndicator(columnsArray[3], rowCountIndicator);
        addIndicator(columnsArray[5], integerSumIndicator);
        addIndicator(columnsArray[5], integerMeanIndicator);
        addIndicator(columnsArray[2], averageLengthIndicator);
        addIndicator(columnsArray[3], averageLengthIndicator2);
        addIndicator(columnsArray[3], minLengthIndicator);
        addIndicator(columnsArray[3], maxLengthIndicator);
        // build query on columns
        // TODO scorreia add filter somewhere here...
        String selectCols = sqlSelectColumns(database, tableName, columns);
        // --- create a description of the column set
        QueryExpression queryExpression = DatatypesFactory.eINSTANCE.createQueryExpression();
        queryExpression.setBody(selectCols);
        // TODO scorreia externalize this as a constant
        queryExpression.setLanguage("SQL");
        tt.start("compute");
        evaluator.setFetchSize(10000);
        evaluator.evaluateIndicators(selectCols, true);
        tt.end("compute");
        // Print indicators the median
        System.out.println("Median=" + medianIndicator.getMedian());
        System.out.println("# Unique values= " + textFrequencyIndicator.getUniqueValueCount());
        System.out.println("# Distinct values= " + textFrequencyIndicator.getDistinctValueCount());
        for (String col : columns) {
            printIndicators(evaluator.getIndicators(col));
        }
        tt.start("save");
        util.save();
        tt.end("saved in " + file.getAbsolutePath());
        tt.end();
        CwmResource cwmR = (CwmResource) resource;
        String id = cwmR.getID(medianIndicator);
        System.out.println("ecore util.getId= " + EcoreUtil.getID(medianIndicator));
        System.out.println("uuId= " + id);
    // test reload this file
    // LoadSerialData.main(args);
    } catch (SQLException e) {
        // TODO Auto-generated catch block
        log.error(e, e);
    } catch (InstantiationException e) {
        // TODO Auto-generated catch block
        log.error(e, e);
    } catch (IllegalAccessException e) {
        // TODO Auto-generated catch block
        log.error(e, e);
    } catch (ClassNotFoundException e) {
        // TODO Auto-generated catch block
        log.error(e, e);
    }
}
Also used : SumIndicator(org.talend.dataquality.indicators.SumIndicator) SQLException(java.sql.SQLException) BlankCountIndicator(org.talend.dataquality.indicators.BlankCountIndicator) MedianIndicator(org.talend.dataquality.indicators.MedianIndicator) UniqueCountIndicator(org.talend.dataquality.indicators.UniqueCountIndicator) ModeIndicator(org.talend.dataquality.indicators.ModeIndicator) NullCountIndicator(org.talend.dataquality.indicators.NullCountIndicator) FrequencyIndicator(org.talend.dataquality.indicators.FrequencyIndicator) DistinctCountIndicator(org.talend.dataquality.indicators.DistinctCountIndicator) RowCountIndicator(org.talend.dataquality.indicators.RowCountIndicator) QueryExpression(orgomg.cwm.foundation.datatypes.QueryExpression) MeanIndicator(org.talend.dataquality.indicators.MeanIndicator) CwmResource(org.talend.model.emf.CwmResource) DuplicateCountIndicator(org.talend.dataquality.indicators.DuplicateCountIndicator) MaxLengthIndicator(org.talend.dataquality.indicators.MaxLengthIndicator) Connection(java.sql.Connection) CwmResource(org.talend.model.emf.CwmResource) Resource(org.eclipse.emf.ecore.resource.Resource) MinLengthIndicator(org.talend.dataquality.indicators.MinLengthIndicator) TypedProperties(org.talend.utils.properties.TypedProperties) TimeTracer(org.talend.utils.time.TimeTracer) AverageLengthIndicator(org.talend.dataquality.indicators.AverageLengthIndicator) EMFUtil(org.talend.commons.emf.EMFUtil) File(java.io.File)

Example 8 with DistinctCountIndicator

use of org.talend.dataquality.indicators.DistinctCountIndicator in project tdq-studio-se by Talend.

the class ColumnSetDBMapTest method testSubListLongLongMapOfLongListOfObjectDataValidationCase3.

/**
 * Test method for
 * {@link org.talend.dataquality.indicators.mapdb.ColumnSetDBMap#subList(long, long, java.util.Map, org.talend.cwm.indicator.DataValidation)}
 * Case 3 distinct case
 */
@Test
public void testSubListLongLongMapOfLongListOfObjectDataValidationCase3() {
    ColumnSetDBMap dbMap1 = new ColumnSetDBMap();
    Assert.assertEquals(true, dbMap1.isEmpty());
    List<Object> keyList = new ArrayList<Object>();
    // $NON-NLS-1$
    keyList.add("id1");
    // $NON-NLS-1$
    keyList.add("name1");
    dbMap1.put(keyList, 1l);
    keyList = new ArrayList<Object>();
    // $NON-NLS-1$
    keyList.add("id2");
    // $NON-NLS-1$
    keyList.add("name2");
    dbMap1.put(keyList, 2l);
    Assert.assertEquals(2, dbMap1.size());
    DistinctCountIndicator distinctCountIndicator = IndicatorsFactory.eINSTANCE.createDistinctCountIndicator();
    List<Object[]> subList = dbMap1.subList(0, 2, new HashMap<Long, List<Object>>(), IDataValidationFactory.INSTANCE.createValidation(distinctCountIndicator));
    Assert.assertEquals(2, subList.size());
}
Also used : DistinctCountIndicator(org.talend.dataquality.indicators.DistinctCountIndicator) ColumnSetDBMap(org.talend.dataquality.indicators.mapdb.ColumnSetDBMap) ArrayList(java.util.ArrayList) List(java.util.List) ArrayList(java.util.ArrayList) Test(org.junit.Test)

Aggregations

DistinctCountIndicator (org.talend.dataquality.indicators.DistinctCountIndicator)8 DuplicateCountIndicator (org.talend.dataquality.indicators.DuplicateCountIndicator)4 RowCountIndicator (org.talend.dataquality.indicators.RowCountIndicator)4 AverageLengthIndicator (org.talend.dataquality.indicators.AverageLengthIndicator)3 BlankCountIndicator (org.talend.dataquality.indicators.BlankCountIndicator)3 MaxLengthIndicator (org.talend.dataquality.indicators.MaxLengthIndicator)3 MeanIndicator (org.talend.dataquality.indicators.MeanIndicator)3 MedianIndicator (org.talend.dataquality.indicators.MedianIndicator)3 MinLengthIndicator (org.talend.dataquality.indicators.MinLengthIndicator)3 ModeIndicator (org.talend.dataquality.indicators.ModeIndicator)3 NullCountIndicator (org.talend.dataquality.indicators.NullCountIndicator)3 UniqueCountIndicator (org.talend.dataquality.indicators.UniqueCountIndicator)3 ArrayList (java.util.ArrayList)2 ENotificationImpl (org.eclipse.emf.ecore.impl.ENotificationImpl)2 Test (org.junit.Test)2 DefValueCountIndicator (org.talend.dataquality.indicators.DefValueCountIndicator)2 InvalidRegCodeCountIndicator (org.talend.dataquality.indicators.InvalidRegCodeCountIndicator)2 MaxValueIndicator (org.talend.dataquality.indicators.MaxValueIndicator)2 MinValueIndicator (org.talend.dataquality.indicators.MinValueIndicator)2 PossiblePhoneCountIndicator (org.talend.dataquality.indicators.PossiblePhoneCountIndicator)2