Search in sources :

Example 1 with UniqueCountIndicator

use of org.talend.dataquality.indicators.UniqueCountIndicator in project tdq-studio-se by Talend.

the class DBMapTest method testSubListLongLongMapDataValidationCase.

/**
 * Test method for
 * {@link org.talend.dataquality.indicators.mapdb.DBMap#subList(long, long, Map, org.talend.cwm.indicator.DataValidation)}
 * Case 1 fromIndex more than toIndex
 */
@Test
public void testSubListLongLongMapDataValidationCase() {
    DBMap<String, Long> dbMap1 = new DBMap<String, Long>();
    // $NON-NLS-1$
    Assert.assertEquals("{}", dbMap1.toString());
    // $NON-NLS-1$
    dbMap1.put("name1", 1l);
    // $NON-NLS-1$
    dbMap1.put("name2", 2l);
    // $NON-NLS-1$
    Assert.assertEquals("{name1=1, name2=2}", dbMap1.toString());
    UniqueCountIndicator uniqueCountIndicator = IndicatorsFactory.eINSTANCE.createUniqueCountIndicator();
    List<Object[]> subList = dbMap1.subList(0, 2, new HashMap<Long, String>(), IDataValidationFactory.INSTANCE.createValidation(uniqueCountIndicator));
    Assert.assertEquals(1, subList.size());
}
Also used : DBMap(org.talend.dataquality.indicators.mapdb.DBMap) UniqueCountIndicator(org.talend.dataquality.indicators.UniqueCountIndicator) Test(org.junit.Test)

Example 2 with UniqueCountIndicator

use of org.talend.dataquality.indicators.UniqueCountIndicator in project tdq-studio-se by Talend.

the class ColumnSetIndicatorEvaluator method storeDataSet.

/**
 * store data which from 'simpleIndicator.getListRows()' except RowCountIndicator.
 *
 * @param indicToRowMap
 */
private void storeDataSet() {
    EMap<Indicator, AnalyzedDataSet> indicToRowMap = analysis.getResults().getIndicToRowMap();
    for (Indicator indicator : analysis.getResults().getIndicators()) {
        if (indicator instanceof SimpleStatIndicator) {
            SimpleStatIndicator simpleIndicator = (SimpleStatIndicator) indicator;
            if (!analysis.getParameters().isStoreData()) {
                break;
            }
            if (simpleIndicator.isUsedMapDBMode() && AnalysisHelper.isJavaExecutionEngine(analysis)) {
            // nothing need to do
            } else {
                List<Object[]> listRows = simpleIndicator.getListRows();
                if (listRows == null || listRows.isEmpty()) {
                    break;
                }
                for (Indicator leafIndicator : simpleIndicator.getLeafIndicators()) {
                    if (leafIndicator instanceof RowCountIndicator) {
                        continue;
                    }
                    List<Object[]> dataList = new ArrayList<Object[]>();
                    AnalyzedDataSet analyzedDataSet = indicToRowMap.get(leafIndicator);
                    if (analyzedDataSet == null) {
                        analyzedDataSet = AnalysisFactory.eINSTANCE.createAnalyzedDataSet();
                        indicToRowMap.put(leafIndicator, analyzedDataSet);
                        analyzedDataSet.setDataCount(analysis.getParameters().getMaxNumberRows());
                        analyzedDataSet.setRecordSize(0);
                    }
                    for (int i = 0; i < listRows.size(); i++) {
                        // if (dataList.size() >= analyzedDataSet.getDataCount()) {
                        // break;
                        // }
                        Object[] object = listRows.get(i);
                        // the last element store the count value.
                        Object count = object[object.length > 0 ? object.length - 1 : 0];
                        if (leafIndicator instanceof DistinctCountIndicator) {
                            dataList.add(object);
                        } else if (leafIndicator instanceof UniqueCountIndicator) {
                            if (count != null && NumberUtils.isNumber(count + PluginConstant.EMPTY_STRING)) {
                                if (Long.valueOf(count + PluginConstant.EMPTY_STRING).longValue() == 1) {
                                    dataList.add(object);
                                }
                            }
                        } else if (leafIndicator instanceof DuplicateCountIndicator) {
                            if (count != null && NumberUtils.isNumber(count + PluginConstant.EMPTY_STRING)) {
                                if (Long.valueOf(count + PluginConstant.EMPTY_STRING).longValue() > 1) {
                                    dataList.add(object);
                                }
                            }
                        }
                    }
                    analyzedDataSet.setData(dataList);
                }
                // MOD sizhaoliu TDQ-7144 clear the listRows after usage for drill down
                if (!simpleIndicator.isStoreData()) {
                    simpleIndicator.setListRows(new ArrayList<Object[]>());
                }
            }
        }
        if (indicator instanceof AllMatchIndicator) {
            AllMatchIndicator allMatchIndicator = (AllMatchIndicator) indicator;
            if (!allMatchIndicator.isStoreData()) {
                allMatchIndicator.setListRows(new ArrayList<Object[]>());
            }
        }
    }
}
Also used : DuplicateCountIndicator(org.talend.dataquality.indicators.DuplicateCountIndicator) AnalyzedDataSet(org.talend.dataquality.analysis.AnalyzedDataSet) ArrayList(java.util.ArrayList) UniqueCountIndicator(org.talend.dataquality.indicators.UniqueCountIndicator) UniqueCountIndicator(org.talend.dataquality.indicators.UniqueCountIndicator) Indicator(org.talend.dataquality.indicators.Indicator) DistinctCountIndicator(org.talend.dataquality.indicators.DistinctCountIndicator) AllMatchIndicator(org.talend.dataquality.indicators.columnset.AllMatchIndicator) SimpleStatIndicator(org.talend.dataquality.indicators.columnset.SimpleStatIndicator) DuplicateCountIndicator(org.talend.dataquality.indicators.DuplicateCountIndicator) RowCountIndicator(org.talend.dataquality.indicators.RowCountIndicator) DistinctCountIndicator(org.talend.dataquality.indicators.DistinctCountIndicator) AllMatchIndicator(org.talend.dataquality.indicators.columnset.AllMatchIndicator) RowCountIndicator(org.talend.dataquality.indicators.RowCountIndicator) SimpleStatIndicator(org.talend.dataquality.indicators.columnset.SimpleStatIndicator)

Example 3 with UniqueCountIndicator

use of org.talend.dataquality.indicators.UniqueCountIndicator in project tdq-studio-se by Talend.

the class IndicatorHelper method getIndicatorValue.

/**
 * DOC bZhou Comment method "getIndicatorValue".
 *
 * @param indicator
 * @return
 */
public static String getIndicatorValue(Indicator indicator) {
    IndicatorsSwitch<String> mySwitch = new IndicatorsSwitch<String>() {

        @Override
        public String caseAverageLengthIndicator(AverageLengthIndicator object) {
            return createStandardNumber(object.getAverageLength());
        }

        @Override
        public String caseBlankCountIndicator(BlankCountIndicator object) {
            return String.valueOf(object.getBlankCount());
        }

        @Override
        public String caseDefValueCountIndicator(DefValueCountIndicator object) {
            return String.valueOf(object.getDefaultValCount());
        }

        @Override
        public String caseDistinctCountIndicator(DistinctCountIndicator object) {
            return String.valueOf(object.getDistinctValueCount());
        }

        @Override
        public String caseDuplicateCountIndicator(DuplicateCountIndicator object) {
            return String.valueOf(object.getDuplicateValueCount());
        }

        @Override
        public String caseMaxLengthIndicator(MaxLengthIndicator object) {
            return String.valueOf(object.getLength());
        }

        @Override
        public String caseMeanIndicator(MeanIndicator object) {
            return createStandardNumber(object.getMean());
        }

        @Override
        public String caseMedianIndicator(MedianIndicator object) {
            return createStandardNumber(object.getMedian());
        }

        @Override
        public String caseMinLengthIndicator(MinLengthIndicator object) {
            return String.valueOf(object.getLength());
        }

        @Override
        public String caseModeIndicator(ModeIndicator object) {
            return String.valueOf(object.getMode());
        }

        @Override
        public String caseNullCountIndicator(NullCountIndicator object) {
            return String.valueOf(object.getNullCount());
        }

        @Override
        public String casePatternMatchingIndicator(PatternMatchingIndicator object) {
            return String.valueOf(object.getMatchingValueCount());
        }

        @Override
        public String caseRowCountIndicator(RowCountIndicator object) {
            return String.valueOf(object.getCount());
        }

        @Override
        public String caseUniqueCountIndicator(UniqueCountIndicator object) {
            return String.valueOf(object.getUniqueValueCount());
        }

        @Override
        public String caseValueIndicator(ValueIndicator object) {
            return object.getValue();
        }

        @Override
        public String caseValidPhoneCountIndicator(ValidPhoneCountIndicator object) {
            return String.valueOf(object.getValidPhoneNumCount());
        }

        @Override
        public String casePossiblePhoneCountIndicator(PossiblePhoneCountIndicator object) {
            return String.valueOf(object.getPossiblePhoneCount());
        }

        @Override
        public String caseValidRegCodeCountIndicator(ValidRegCodeCountIndicator object) {
            return String.valueOf(object.getValidRegCount());
        }

        @Override
        public String caseInvalidRegCodeCountIndicator(InvalidRegCodeCountIndicator object) {
            return String.valueOf(object.getInvalidRegCount());
        }

        @Override
        public String caseWellFormE164PhoneCountIndicator(WellFormE164PhoneCountIndicator object) {
            return String.valueOf(object.getWellFormE164PhoneCount());
        }

        @Override
        public String caseWellFormIntePhoneCountIndicator(WellFormIntePhoneCountIndicator object) {
            return String.valueOf(object.getWellFormIntePhoneCount());
        }

        @Override
        public String caseWellFormNationalPhoneCountIndicator(WellFormNationalPhoneCountIndicator object) {
            return String.valueOf(object.getWellFormNatiPhoneCount());
        }
    };
    // TDQ-11114: consider the UDI type
    IndicatorSqlSwitch<String> sqlSwitch = new IndicatorSqlSwitch<String>() {

        @Override
        public String caseWhereRuleIndicator(WhereRuleIndicator object) {
            return String.valueOf(object.getUserCount());
        }

        @Override
        public String caseUserDefIndicator(UserDefIndicator object) {
            // TDQ-11114: get the correct value for user define realvalue indicator
            if (object instanceof UserDefIndicator) {
                UserDefIndicator userDefineIndicator = object;
                IndicatorCategory category = IndicatorCategoryHelper.getCategory(userDefineIndicator.getIndicatorDefinition());
                if (IndicatorCategoryHelper.isUserDefRealValue(category)) {
                    return String.valueOf(userDefineIndicator.getRealValue());
                }
            }
            // TDQ-11485: fix the match udi get result value error
            return String.valueOf(object.getIntegerValue());
        }

        @Override
        public String caseJavaUserDefIndicator(JavaUserDefIndicator object) {
            return String.valueOf(object.getUserCount());
        }
    };
    String result = mySwitch.doSwitch(indicator);
    return result == null ? sqlSwitch.doSwitch(indicator) : result;
}
Also used : BlankCountIndicator(org.talend.dataquality.indicators.BlankCountIndicator) MedianIndicator(org.talend.dataquality.indicators.MedianIndicator) UniqueCountIndicator(org.talend.dataquality.indicators.UniqueCountIndicator) ModeIndicator(org.talend.dataquality.indicators.ModeIndicator) NullCountIndicator(org.talend.dataquality.indicators.NullCountIndicator) DistinctCountIndicator(org.talend.dataquality.indicators.DistinctCountIndicator) WellFormIntePhoneCountIndicator(org.talend.dataquality.indicators.WellFormIntePhoneCountIndicator) PatternMatchingIndicator(org.talend.dataquality.indicators.PatternMatchingIndicator) WhereRuleIndicator(org.talend.dataquality.indicators.sql.WhereRuleIndicator) IndicatorCategory(org.talend.dataquality.indicators.definition.IndicatorCategory) IndicatorsSwitch(org.talend.dataquality.indicators.util.IndicatorsSwitch) RowCountIndicator(org.talend.dataquality.indicators.RowCountIndicator) MeanIndicator(org.talend.dataquality.indicators.MeanIndicator) JavaUserDefIndicator(org.talend.dataquality.indicators.sql.JavaUserDefIndicator) UserDefIndicator(org.talend.dataquality.indicators.sql.UserDefIndicator) IndicatorSqlSwitch(org.talend.dataquality.indicators.sql.util.IndicatorSqlSwitch) PossiblePhoneCountIndicator(org.talend.dataquality.indicators.PossiblePhoneCountIndicator) WellFormE164PhoneCountIndicator(org.talend.dataquality.indicators.WellFormE164PhoneCountIndicator) ValidPhoneCountIndicator(org.talend.dataquality.indicators.ValidPhoneCountIndicator) DuplicateCountIndicator(org.talend.dataquality.indicators.DuplicateCountIndicator) MaxLengthIndicator(org.talend.dataquality.indicators.MaxLengthIndicator) MaxValueIndicator(org.talend.dataquality.indicators.MaxValueIndicator) MinValueIndicator(org.talend.dataquality.indicators.MinValueIndicator) ValueIndicator(org.talend.dataquality.indicators.ValueIndicator) ValidRegCodeCountIndicator(org.talend.dataquality.indicators.ValidRegCodeCountIndicator) MinLengthIndicator(org.talend.dataquality.indicators.MinLengthIndicator) InvalidRegCodeCountIndicator(org.talend.dataquality.indicators.InvalidRegCodeCountIndicator) JavaUserDefIndicator(org.talend.dataquality.indicators.sql.JavaUserDefIndicator) DefValueCountIndicator(org.talend.dataquality.indicators.DefValueCountIndicator) AverageLengthIndicator(org.talend.dataquality.indicators.AverageLengthIndicator) WellFormNationalPhoneCountIndicator(org.talend.dataquality.indicators.WellFormNationalPhoneCountIndicator)

Example 4 with UniqueCountIndicator

use of org.talend.dataquality.indicators.UniqueCountIndicator in project tdq-studio-se by Talend.

the class ColumnSetMultiValueIndicatorImpl method basicSetUniqueCountIndicator.

/**
 * <!-- begin-user-doc --> <!-- end-user-doc -->
 * @generated
 */
public NotificationChain basicSetUniqueCountIndicator(UniqueCountIndicator newUniqueCountIndicator, NotificationChain msgs) {
    UniqueCountIndicator oldUniqueCountIndicator = uniqueCountIndicator;
    uniqueCountIndicator = newUniqueCountIndicator;
    if (eNotificationRequired()) {
        ENotificationImpl notification = new ENotificationImpl(this, Notification.SET, ColumnsetPackage.COLUMN_SET_MULTI_VALUE_INDICATOR__UNIQUE_COUNT_INDICATOR, oldUniqueCountIndicator, newUniqueCountIndicator);
        if (msgs == null)
            msgs = notification;
        else
            msgs.add(notification);
    }
    return msgs;
}
Also used : ENotificationImpl(org.eclipse.emf.ecore.impl.ENotificationImpl) UniqueCountIndicator(org.talend.dataquality.indicators.UniqueCountIndicator)

Example 5 with UniqueCountIndicator

use of org.talend.dataquality.indicators.UniqueCountIndicator in project tdq-studio-se by Talend.

the class DelimitedFileIndicatorEvaluator method handleByARow.

private ReturnCode handleByARow(String[] rowValues, long currentRow, List<ModelElement> analysisElementList, List<MetadataColumn> columnElementList, EMap<Indicator, AnalyzedDataSet> indicToRowMap) {
    ReturnCode returnCode = new ReturnCode(true);
    Object object = null;
    int maxNumberRows = analysis.getParameters().getMaxNumberRows();
    int recordIncrement = 0;
    element: for (int i = 0; i < analysisElementList.size(); i++) {
        MetadataColumn mColumn = (MetadataColumn) analysisElementList.get(i);
        Integer position = ColumnHelper.getColumnIndex(mColumn);
        // warning with a file of badly form
        if (position == null || position >= rowValues.length) {
            log.warn(// $NON-NLS-1$
            Messages.getString(// $NON-NLS-1$
            "DelimitedFileIndicatorEvaluator.incorrectData", mColumn.getLabel(), currentRow, delimitedFileconnection.getFilePath()));
            returnCode.setOk(false);
            continue;
        }
        object = TalendTypeConvert.convertToObject(mColumn.getTalendType(), rowValues[position], mColumn.getPattern());
        List<Indicator> indicators = getIndicators(mColumn.getLabel());
        for (Indicator indicator : indicators) {
            if (!continueRun()) {
                break element;
            }
            // bug 19036,to irregularly data,still compute for RowCountIndicator
            if (object == null && !(indicator instanceof RowCountIndicator)) {
                continue element;
            }
            // Added yyin 20120608 TDQ-3589
            if (indicator instanceof DuplicateCountIndicator) {
                ((DuplicateCountIndicator) indicator).handle(object, rowValues);
            } else {
                // ~
                indicator.handle(object);
            }
            AnalyzedDataSet analyzedDataSet = indicToRowMap.get(indicator);
            if (analyzedDataSet == null) {
                analyzedDataSet = AnalysisFactory.eINSTANCE.createAnalyzedDataSet();
                indicToRowMap.put(indicator, analyzedDataSet);
                analyzedDataSet.setDataCount(maxNumberRows);
                analyzedDataSet.setRecordSize(0);
            }
            // see IndicatorEvaluator line 166, the logic is almost the same
            if (analysis.getParameters().isStoreData()) {
                if (indicator.mustStoreRow()) {
                    List<Object[]> valueObjectList = initDataSet(indicator, indicToRowMap, object);
                    recordIncrement = valueObjectList.size();
                    List<Object> inputRowList = new ArrayList<Object>();
                    for (int j = 0; j < rowValues.length; j++) {
                        Object newobject = rowValues[j];
                        if (indicator.isUsedMapDBMode()) {
                            inputRowList.add(newobject == null ? PluginConstant.NULL_STRING : newobject);
                            continue;
                        } else {
                            if (recordIncrement < maxNumberRows) {
                                if (recordIncrement < valueObjectList.size()) {
                                    valueObjectList.get(recordIncrement)[j] = newobject;
                                } else {
                                    Object[] valueObject = new Object[rowValues.length];
                                    valueObject[j] = newobject;
                                    valueObjectList.add(valueObject);
                                }
                            } else {
                                break;
                            }
                        }
                    }
                    if (indicator.isUsedMapDBMode()) {
                        MapDBUtils.handleDrillDownData(object, inputRowList, indicator);
                    }
                } else if (indicator instanceof UniqueCountIndicator && analysis.getResults().getIndicToRowMap().get(indicator).getData() != null) {
                    List<Object[]> removeValueObjectList = analysis.getResults().getIndicToRowMap().get(indicator).getData();
                    if (columnElementList.size() == 0) {
                        continue;
                    }
                    int offsetting = columnElementList.indexOf(indicator.getAnalyzedElement());
                    for (Object[] dataObject : removeValueObjectList) {
                        // Added yyin 20120611 TDQ5279
                        if (object instanceof Integer) {
                            if (object.equals(Integer.parseInt((String) dataObject[offsetting]))) {
                                removeValueObjectList.remove(dataObject);
                                break;
                            }
                        }
                        // ~
                        if (dataObject[offsetting].equals(object)) {
                            removeValueObjectList.remove(dataObject);
                            break;
                        }
                    }
                }
            }
        }
    }
    return returnCode;
}
Also used : DuplicateCountIndicator(org.talend.dataquality.indicators.DuplicateCountIndicator) ReturnCode(org.talend.utils.sugars.ReturnCode) AnalyzedDataSet(org.talend.dataquality.analysis.AnalyzedDataSet) UniqueCountIndicator(org.talend.dataquality.indicators.UniqueCountIndicator) UniqueCountIndicator(org.talend.dataquality.indicators.UniqueCountIndicator) Indicator(org.talend.dataquality.indicators.Indicator) DuplicateCountIndicator(org.talend.dataquality.indicators.DuplicateCountIndicator) RowCountIndicator(org.talend.dataquality.indicators.RowCountIndicator) MetadataColumn(org.talend.core.model.metadata.builder.connection.MetadataColumn) ArrayList(java.util.ArrayList) List(java.util.List) RowCountIndicator(org.talend.dataquality.indicators.RowCountIndicator)

Aggregations

UniqueCountIndicator (org.talend.dataquality.indicators.UniqueCountIndicator)9 DuplicateCountIndicator (org.talend.dataquality.indicators.DuplicateCountIndicator)5 ArrayList (java.util.ArrayList)4 RowCountIndicator (org.talend.dataquality.indicators.RowCountIndicator)4 List (java.util.List)3 AnalyzedDataSet (org.talend.dataquality.analysis.AnalyzedDataSet)3 DistinctCountIndicator (org.talend.dataquality.indicators.DistinctCountIndicator)3 Indicator (org.talend.dataquality.indicators.Indicator)3 MaxLengthIndicator (org.talend.dataquality.indicators.MaxLengthIndicator)3 MinLengthIndicator (org.talend.dataquality.indicators.MinLengthIndicator)3 ENotificationImpl (org.eclipse.emf.ecore.impl.ENotificationImpl)2 Test (org.junit.Test)2 AverageLengthIndicator (org.talend.dataquality.indicators.AverageLengthIndicator)2 BlankCountIndicator (org.talend.dataquality.indicators.BlankCountIndicator)2 FrequencyIndicator (org.talend.dataquality.indicators.FrequencyIndicator)2 MeanIndicator (org.talend.dataquality.indicators.MeanIndicator)2 MedianIndicator (org.talend.dataquality.indicators.MedianIndicator)2 ModeIndicator (org.talend.dataquality.indicators.ModeIndicator)2 NullCountIndicator (org.talend.dataquality.indicators.NullCountIndicator)2 UserDefIndicator (org.talend.dataquality.indicators.sql.UserDefIndicator)2