Search in sources :

Example 6 with NullCountIndicator

use of org.talend.dataquality.indicators.NullCountIndicator in project tdq-studio-se by Talend.

the class IndicatorHelper method getNullCountIndicator.

public static NullCountIndicator getNullCountIndicator(ModelElement modelElement, Map<ModelElement, List<Indicator>> elementToIndicator) {
    List<Indicator> list = elementToIndicator.get(modelElement);
    NullCountIndicator nullCountIndicator = null;
    if (list == null) {
        return nullCountIndicator;
    }
    for (Indicator indicator : list) {
        if (IndicatorsPackage.eINSTANCE.getNullCountIndicator().equals(indicator.eClass())) {
            nullCountIndicator = (NullCountIndicator) indicator;
        }
    }
    return nullCountIndicator;
}
Also used : UniqueCountIndicator(org.talend.dataquality.indicators.UniqueCountIndicator) MaxValueIndicator(org.talend.dataquality.indicators.MaxValueIndicator) ValidRegCodeCountIndicator(org.talend.dataquality.indicators.ValidRegCodeCountIndicator) PhoneNumbStatisticsIndicator(org.talend.dataquality.indicators.PhoneNumbStatisticsIndicator) MinLengthIndicator(org.talend.dataquality.indicators.MinLengthIndicator) MinValueIndicator(org.talend.dataquality.indicators.MinValueIndicator) AverageLengthIndicator(org.talend.dataquality.indicators.AverageLengthIndicator) MeanIndicator(org.talend.dataquality.indicators.MeanIndicator) DefValueCountIndicator(org.talend.dataquality.indicators.DefValueCountIndicator) WellFormE164PhoneCountIndicator(org.talend.dataquality.indicators.WellFormE164PhoneCountIndicator) RangeIndicator(org.talend.dataquality.indicators.RangeIndicator) JavaUserDefIndicator(org.talend.dataquality.indicators.sql.JavaUserDefIndicator) ValueIndicator(org.talend.dataquality.indicators.ValueIndicator) InvalidRegCodeCountIndicator(org.talend.dataquality.indicators.InvalidRegCodeCountIndicator) UserDefIndicator(org.talend.dataquality.indicators.sql.UserDefIndicator) ValidPhoneCountIndicator(org.talend.dataquality.indicators.ValidPhoneCountIndicator) RowCountIndicator(org.talend.dataquality.indicators.RowCountIndicator) WellFormIntePhoneCountIndicator(org.talend.dataquality.indicators.WellFormIntePhoneCountIndicator) FormatFreqPieIndicator(org.talend.dataquality.indicators.FormatFreqPieIndicator) PossiblePhoneCountIndicator(org.talend.dataquality.indicators.PossiblePhoneCountIndicator) ModeIndicator(org.talend.dataquality.indicators.ModeIndicator) PatternMatchingIndicator(org.talend.dataquality.indicators.PatternMatchingIndicator) MaxLengthIndicator(org.talend.dataquality.indicators.MaxLengthIndicator) RecordMatchingIndicator(org.talend.dataquality.indicators.columnset.RecordMatchingIndicator) NullCountIndicator(org.talend.dataquality.indicators.NullCountIndicator) MedianIndicator(org.talend.dataquality.indicators.MedianIndicator) Indicator(org.talend.dataquality.indicators.Indicator) CompositeIndicator(org.talend.dataquality.indicators.CompositeIndicator) BlankCountIndicator(org.talend.dataquality.indicators.BlankCountIndicator) DistinctCountIndicator(org.talend.dataquality.indicators.DistinctCountIndicator) DuplicateCountIndicator(org.talend.dataquality.indicators.DuplicateCountIndicator) IQRIndicator(org.talend.dataquality.indicators.IQRIndicator) BoxIndicator(org.talend.dataquality.indicators.BoxIndicator) WellFormNationalPhoneCountIndicator(org.talend.dataquality.indicators.WellFormNationalPhoneCountIndicator) WhereRuleIndicator(org.talend.dataquality.indicators.sql.WhereRuleIndicator) NullCountIndicator(org.talend.dataquality.indicators.NullCountIndicator)

Example 7 with NullCountIndicator

use of org.talend.dataquality.indicators.NullCountIndicator in project tdq-studio-se by Talend.

the class IndicatorEvaluationMain method main.

/**
 * DOC scorreia Comment method "main".
 *
 * @param args
 */
public static void main(String[] args) {
    TypedProperties connectionParams = PropertiesLoader.getProperties(IndicatorEvaluator.class, "db.properties");
    String driverClassName = connectionParams.getProperty("driver");
    String dbUrl = connectionParams.getProperty("url");
    try {
        TimeTracer tt = new TimeTracer("Indicator evaluation", null);
        tt.start();
        // create connection
        Connection connection = ConnectionUtils.createConnection(dbUrl, driverClassName, connectionParams);
        String database = "test";
        String tableName = "my_test";
        // --- columns to analyze
        String[] columnsArray = new String[] { // 0
        "my_int", // 1
        "my_double", // 2
        "my_text", // 4
        "my_date", // 3
        "my_string", // 5
        "my_int_null" };
        List<String> columns = Arrays.asList(columnsArray);
        // store in file
        File file = new File("out/columnTest_0.1.ana");
        EMFUtil util = new EMFUtil();
        Resource resource = util.getResourceSet().createResource(URI.createFileURI(file.getAbsolutePath()));
        rContents = resource.getContents();
        evaluator.setConnection(connection);
        // --- create indicators
        RowCountIndicator rowCountIndicator = IndicatorsFactory.eINSTANCE.createRowCountIndicator();
        NullCountIndicator nullCountIndicator = IndicatorsFactory.eINSTANCE.createNullCountIndicator();
        DistinctCountIndicator distinctCountIndicator = IndicatorsFactory.eINSTANCE.createDistinctCountIndicator();
        DistinctCountIndicator distinctCountIndicator2 = IndicatorsFactory.eINSTANCE.createDistinctCountIndicator();
        UniqueCountIndicator uniqueCountIndicator = IndicatorsFactory.eINSTANCE.createUniqueCountIndicator();
        DuplicateCountIndicator duplicateCountIndicator = IndicatorsFactory.eINSTANCE.createDuplicateCountIndicator();
        BlankCountIndicator blankCountIndicator = IndicatorsFactory.eINSTANCE.createBlankCountIndicator();
        MinLengthIndicator minLengthIndicator = IndicatorsFactory.eINSTANCE.createMinLengthIndicator();
        MaxLengthIndicator maxLengthIndicator = IndicatorsFactory.eINSTANCE.createMaxLengthIndicator();
        AverageLengthIndicator averageLengthIndicator = IndicatorsFactory.eINSTANCE.createAverageLengthIndicator();
        AverageLengthIndicator averageLengthIndicator2 = IndicatorsFactory.eINSTANCE.createAverageLengthIndicator();
        ModeIndicator modeIndicator = IndicatorsFactory.eINSTANCE.createModeIndicator();
        FrequencyIndicator textFrequencyIndicator = IndicatorsFactory.eINSTANCE.createFrequencyIndicator();
        // store in freq indic
        // textFrequencyIndicator.setDistinctCountIndicator(distinctCountIndicator);
        // textFrequencyIndicator.setDistinctCountIndicator(distinctCountIndicator2);
        // textFrequencyIndicator.setUniqueCountIndicator(uniqueCountIndicator);
        // textFrequencyIndicator.setDuplicateCountIndicator(duplicateCountIndicator);
        // textFrequencyIndicator.setModeIndicator(modeIndicator);
        MeanIndicator doubleMeanIndicator = IndicatorsFactory.eINSTANCE.createMeanIndicator();
        MeanIndicator integerMeanIndicator = IndicatorsFactory.eINSTANCE.createMeanIndicator();
        MedianIndicator medianIndicator = IndicatorsFactory.eINSTANCE.createMedianIndicator();
        SumIndicator integerSumIndicator = IndicatorsFactory.eINSTANCE.createSumIndicator();
        addIndicator(columnsArray[0], medianIndicator);
        addIndicator(columnsArray[1], doubleMeanIndicator);
        addIndicator(columnsArray[2], blankCountIndicator);
        addIndicator(columnsArray[5], nullCountIndicator);
        // addIndicator(columnsArray[2], textFrequencyIndicator);
        // addIndicator(columnsArray[2], distinctCountIndicator); // probably not useful?
        // addIndicator(columnsArray[2], uniqueCountIndicator); // probably not useful?
        // addIndicator(columnsArray[2], duplicateCountIndicator); // probably not useful?
        // addIndicator(columnsArray[2], modeIndicator); // probably not useful?
        addIndicator(columnsArray[3], rowCountIndicator);
        addIndicator(columnsArray[5], integerSumIndicator);
        addIndicator(columnsArray[5], integerMeanIndicator);
        addIndicator(columnsArray[2], averageLengthIndicator);
        addIndicator(columnsArray[3], averageLengthIndicator2);
        addIndicator(columnsArray[3], minLengthIndicator);
        addIndicator(columnsArray[3], maxLengthIndicator);
        // build query on columns
        // TODO scorreia add filter somewhere here...
        String selectCols = sqlSelectColumns(database, tableName, columns);
        // --- create a description of the column set
        QueryExpression queryExpression = DatatypesFactory.eINSTANCE.createQueryExpression();
        queryExpression.setBody(selectCols);
        // TODO scorreia externalize this as a constant
        queryExpression.setLanguage("SQL");
        tt.start("compute");
        evaluator.setFetchSize(10000);
        evaluator.evaluateIndicators(selectCols, true);
        tt.end("compute");
        // Print indicators the median
        System.out.println("Median=" + medianIndicator.getMedian());
        System.out.println("# Unique values= " + textFrequencyIndicator.getUniqueValueCount());
        System.out.println("# Distinct values= " + textFrequencyIndicator.getDistinctValueCount());
        for (String col : columns) {
            printIndicators(evaluator.getIndicators(col));
        }
        tt.start("save");
        util.save();
        tt.end("saved in " + file.getAbsolutePath());
        tt.end();
        CwmResource cwmR = (CwmResource) resource;
        String id = cwmR.getID(medianIndicator);
        System.out.println("ecore util.getId= " + EcoreUtil.getID(medianIndicator));
        System.out.println("uuId= " + id);
    // test reload this file
    // LoadSerialData.main(args);
    } catch (SQLException e) {
        // TODO Auto-generated catch block
        log.error(e, e);
    } catch (InstantiationException e) {
        // TODO Auto-generated catch block
        log.error(e, e);
    } catch (IllegalAccessException e) {
        // TODO Auto-generated catch block
        log.error(e, e);
    } catch (ClassNotFoundException e) {
        // TODO Auto-generated catch block
        log.error(e, e);
    }
}
Also used : SumIndicator(org.talend.dataquality.indicators.SumIndicator) SQLException(java.sql.SQLException) BlankCountIndicator(org.talend.dataquality.indicators.BlankCountIndicator) MedianIndicator(org.talend.dataquality.indicators.MedianIndicator) UniqueCountIndicator(org.talend.dataquality.indicators.UniqueCountIndicator) ModeIndicator(org.talend.dataquality.indicators.ModeIndicator) NullCountIndicator(org.talend.dataquality.indicators.NullCountIndicator) FrequencyIndicator(org.talend.dataquality.indicators.FrequencyIndicator) DistinctCountIndicator(org.talend.dataquality.indicators.DistinctCountIndicator) RowCountIndicator(org.talend.dataquality.indicators.RowCountIndicator) QueryExpression(orgomg.cwm.foundation.datatypes.QueryExpression) MeanIndicator(org.talend.dataquality.indicators.MeanIndicator) CwmResource(org.talend.model.emf.CwmResource) DuplicateCountIndicator(org.talend.dataquality.indicators.DuplicateCountIndicator) MaxLengthIndicator(org.talend.dataquality.indicators.MaxLengthIndicator) Connection(java.sql.Connection) CwmResource(org.talend.model.emf.CwmResource) Resource(org.eclipse.emf.ecore.resource.Resource) MinLengthIndicator(org.talend.dataquality.indicators.MinLengthIndicator) TypedProperties(org.talend.utils.properties.TypedProperties) TimeTracer(org.talend.utils.time.TimeTracer) AverageLengthIndicator(org.talend.dataquality.indicators.AverageLengthIndicator) EMFUtil(org.talend.commons.emf.EMFUtil) File(java.io.File)

Example 8 with NullCountIndicator

use of org.talend.dataquality.indicators.NullCountIndicator in project tdq-studio-se by Talend.

the class ItemRecordTest method testLoadProperty.

/**
 * Test method for {@link org.talend.dataprofiler.core.ui.imex.model.ItemRecord#loadProperty()}.
 *
 * @throws PersistenceException
 */
@Test
public void testLoadProperty() throws PersistenceException {
    chooseRightProject();
    // $NON-NLS-1$
    Property analysisProperty = createAnalysis("ItemRecordTestanalysis1");
    TDQAnalysisItem item = (TDQAnalysisItem) analysisProperty.getItem();
    Analysis analysis = item.getAnalysis();
    AnalysisResult createAnalysisResult = analysis.getResults();
    Assert.assertEquals(0, createAnalysisResult.getIndicators().size());
    // create Indicator
    RowCountIndicator rowCountIndicator = IndicatorsFactory.eINSTANCE.createRowCountIndicator();
    String rowCountPropertyID = EcoreUtil.generateUUID();
    // $NON-NLS-1$
    saveIndicatorDefintion(rowCountPropertyID, "ItemRecordWithRefreshedTestIndicatorDefinition1");
    rowCountIndicator.setIndicatorDefinition(((TDQIndicatorDefinitionItem) ProxyRepositoryFactory.getInstance().getLastVersion(rowCountPropertyID).getProperty().getItem()).getIndicatorDefinition());
    // $NON-NLS-1$
    Assert.assertNotNull("Row count indicator definition should not be null", rowCountIndicator.getIndicatorDefinition());
    Assert.assertEquals("ItemRecordWithRefreshedTestIndicatorDefinition1", // $NON-NLS-1$
    rowCountIndicator.getIndicatorDefinition().getLabel());
    createAnalysisResult.getIndicators().add(rowCountIndicator);
    Assert.assertEquals(1, createAnalysisResult.getIndicators().size());
    ReturnCode saveAnalysis = saveAnalysis(analysis);
    Assert.assertEquals(1, ((TDQAnalysisItem) analysisProperty.getItem()).getAnalysis().getResults().getIndicators().size());
    // $NON-NLS-1$
    Assert.assertTrue("The analysis first time saving is not work", saveAnalysis.isOk());
    File analysisFile = WorkspaceUtils.ifileToFile(PropertyHelper.getItemFile(analysisProperty));
    ItemRecord itemRecord = new ItemRecord(analysisFile);
    Assert.assertEquals(1, itemRecord.getDependencySet().size());
    // create Indicator
    NullCountIndicator nullCountIndicator = IndicatorsFactory.eINSTANCE.createNullCountIndicator();
    String nullCountPropertyID = EcoreUtil.generateUUID();
    // $NON-NLS-1$
    saveIndicatorDefintion(nullCountPropertyID, "ItemRecordWithRefreshedTestIndicatorDefinition2");
    nullCountIndicator.setIndicatorDefinition(((TDQIndicatorDefinitionItem) ProxyRepositoryFactory.getInstance().getLastVersion(nullCountPropertyID).getProperty().getItem()).getIndicatorDefinition());
    analysis = item.getAnalysis();
    analysis.getResults().getIndicators().add(nullCountIndicator);
    ReturnCode saveAnalysis2 = saveAnalysis(analysis);
    // $NON-NLS-1$
    Assert.assertTrue("The analysis second time saving is not work", saveAnalysis2.isOk());
    // get last resource so that the dependecy will not changed
    itemRecord = new ItemRecord(analysisFile);
    Assert.assertEquals(1, itemRecord.getDependencySet().size());
    // after clear the resource will be lastest so that the dependency is added
    ItemRecord.clear();
    itemRecord = new ItemRecord(analysisFile);
    Assert.assertEquals(2, itemRecord.getDependencySet().size());
}
Also used : ReturnCode(org.talend.utils.sugars.ReturnCode) Analysis(org.talend.dataquality.analysis.Analysis) RowCountIndicator(org.talend.dataquality.indicators.RowCountIndicator) Property(org.talend.core.model.properties.Property) File(java.io.File) AnalysisResult(org.talend.dataquality.analysis.AnalysisResult) NullCountIndicator(org.talend.dataquality.indicators.NullCountIndicator) TDQAnalysisItem(org.talend.dataquality.properties.TDQAnalysisItem) Test(org.junit.Test)

Example 9 with NullCountIndicator

use of org.talend.dataquality.indicators.NullCountIndicator in project tdq-studio-se by Talend.

the class IndicatorHelperTest method testGetNullCountIndicator2.

/**
 * Test method for
 * {@link org.talend.dataquality.helpers.IndicatorHelper#getNullCountIndicator(orgomg.cwm.objectmodel.core.ModelElement, java.util.Map)}
 * . get NullCountIndicator
 */
@Test
public void testGetNullCountIndicator2() {
    // TdColumn
    TdColumn column1 = RelationalFactory.eINSTANCE.createTdColumn();
    // ~
    // List
    List<Indicator> list = null;
    // ~
    // Map
    Map<ModelElement, List<Indicator>> elementToIndicator = new HashMap<ModelElement, List<Indicator>>();
    elementToIndicator.put(column1, list);
    // ~
    NullCountIndicator nullCountIndicator2 = IndicatorHelper.getNullCountIndicator(column1, elementToIndicator);
    assert (nullCountIndicator2 == null);
}
Also used : ModelElement(orgomg.cwm.objectmodel.core.ModelElement) TdColumn(org.talend.cwm.relational.TdColumn) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) List(java.util.List) NullCountIndicator(org.talend.dataquality.indicators.NullCountIndicator) Indicator(org.talend.dataquality.indicators.Indicator) RowCountIndicator(org.talend.dataquality.indicators.RowCountIndicator) NullCountIndicator(org.talend.dataquality.indicators.NullCountIndicator) Test(org.junit.Test)

Example 10 with NullCountIndicator

use of org.talend.dataquality.indicators.NullCountIndicator in project tdq-studio-se by Talend.

the class ColumnAnalysisSqlExecutor method setRowCountAndNullCount.

/**
 * DOC scorreia Comment method "setRowCountAndNullCount".
 *
 * @param elementToIndicator
 */
protected void setRowCountAndNullCount(Map<ModelElement, List<Indicator>> elementToIndicator) {
    Set<ModelElement> analyzedElements = elementToIndicator.keySet();
    for (ModelElement modelElement : analyzedElements) {
        // get row count indicator
        RowCountIndicator rowCount = IndicatorHelper.getRowCountIndicator(modelElement, elementToIndicator);
        // get null count indicator
        NullCountIndicator nullCount = IndicatorHelper.getNullCountIndicator(modelElement, elementToIndicator);
        List<Indicator> list = elementToIndicator.get(modelElement);
        for (Indicator ind : list) {
            // set row count value to each indicator
            if (rowCount != null && needPercentage(ind)) {
                ind.setCount(rowCount.getCount());
            }
            // set null count value to each indicator
            if (nullCount != null) {
                ind.setNullCount(nullCount.getNullCount());
            }
        }
    }
}
Also used : ModelElement(orgomg.cwm.objectmodel.core.ModelElement) RowCountIndicator(org.talend.dataquality.indicators.RowCountIndicator) NullCountIndicator(org.talend.dataquality.indicators.NullCountIndicator) RowCountIndicator(org.talend.dataquality.indicators.RowCountIndicator) NullCountIndicator(org.talend.dataquality.indicators.NullCountIndicator) Indicator(org.talend.dataquality.indicators.Indicator) CompositeIndicator(org.talend.dataquality.indicators.CompositeIndicator)

Aggregations

NullCountIndicator (org.talend.dataquality.indicators.NullCountIndicator)10 RowCountIndicator (org.talend.dataquality.indicators.RowCountIndicator)9 Indicator (org.talend.dataquality.indicators.Indicator)5 Test (org.junit.Test)4 AverageLengthIndicator (org.talend.dataquality.indicators.AverageLengthIndicator)4 BlankCountIndicator (org.talend.dataquality.indicators.BlankCountIndicator)4 DistinctCountIndicator (org.talend.dataquality.indicators.DistinctCountIndicator)4 DuplicateCountIndicator (org.talend.dataquality.indicators.DuplicateCountIndicator)4 MaxLengthIndicator (org.talend.dataquality.indicators.MaxLengthIndicator)4 MeanIndicator (org.talend.dataquality.indicators.MeanIndicator)4 MedianIndicator (org.talend.dataquality.indicators.MedianIndicator)4 MinLengthIndicator (org.talend.dataquality.indicators.MinLengthIndicator)4 ModeIndicator (org.talend.dataquality.indicators.ModeIndicator)4 ModelElement (orgomg.cwm.objectmodel.core.ModelElement)4 ArrayList (java.util.ArrayList)3 HashMap (java.util.HashMap)3 List (java.util.List)3 TdColumn (org.talend.cwm.relational.TdColumn)3 DefValueCountIndicator (org.talend.dataquality.indicators.DefValueCountIndicator)3 InvalidRegCodeCountIndicator (org.talend.dataquality.indicators.InvalidRegCodeCountIndicator)3