use of org.talend.dataquality.indicators.DuplicateCountIndicator in project tdq-studio-se by Talend.
the class ColumnSetDBMapTest method testSubListLongLongMapOfLongListOfObjectDataValidationCase2.
/**
* Test method for
* {@link org.talend.dataquality.indicators.mapdb.ColumnSetDBMap#subList(long, long, java.util.Map, org.talend.cwm.indicator.DataValidation)}
* Case 2 duplicate case
*/
@Test
public void testSubListLongLongMapOfLongListOfObjectDataValidationCase2() {
ColumnSetDBMap dbMap1 = new ColumnSetDBMap();
Assert.assertEquals(true, dbMap1.isEmpty());
List<Object> keyList = new ArrayList<Object>();
// $NON-NLS-1$
keyList.add("id1");
// $NON-NLS-1$
keyList.add("name1");
dbMap1.put(keyList, 1l);
keyList = new ArrayList<Object>();
// $NON-NLS-1$
keyList.add("id2");
// $NON-NLS-1$
keyList.add("name2");
dbMap1.put(keyList, 2l);
Assert.assertEquals(2, dbMap1.size());
DuplicateCountIndicator duplicateCountIndicator = IndicatorsFactory.eINSTANCE.createDuplicateCountIndicator();
List<Object[]> subList = dbMap1.subList(0, 2, new HashMap<Long, List<Object>>(), IDataValidationFactory.INSTANCE.createValidation(duplicateCountIndicator));
Assert.assertEquals(1, subList.size());
}
use of org.talend.dataquality.indicators.DuplicateCountIndicator in project tdq-studio-se by Talend.
the class DrillDownEditorInput method getDesignatedData.
/**
* DOC zshen Comment method "getDesignatedData".
*
* @return make column mapping with data
*/
private List<Object[]> getDesignatedData(List<Object[]> dataList) {
ModelElement analysisElement = currIndicator.getAnalyzedElement();
List<Object[]> returnDataList = new ArrayList<Object[]>();
if (dataList == null || dataList.size() < 0) {
return returnDataList;
}
if (DrillDownUtils.judgeMenuType(this.getMenuType(), DrillDownUtils.MENU_VALUE_TYPE)) {
int offset = 0;
// MOD qiongli 2011-3-3 feature 19192 drill down for columnSet with jave engine.
if (analysisElement == null && currIndicator.eContainer() instanceof SimpleStatIndicator) {
returnDataList = dataList;
} else {
if (analysisElement instanceof MetadataColumn) {
MetadataTable mTable = ColumnHelper.getColumnOwnerAsMetadataTable((MetadataColumn) analysisElement);
List<MetadataColumn> columnElementList = mTable.getColumns();
offset = columnElementList.indexOf(analysisElement);
}
// Added yyin 20120608 TDQ-3589
if (currIndicator instanceof DuplicateCountIndicator) {
for (Object obj : ((DuplicateCountIndicator) currIndicator).getDuplicateValues()) {
Object[] newObj = new Object[1];
newObj[0] = obj;
returnDataList.add(newObj);
}
return returnDataList;
}
// ~
for (Object[] obj : dataList) {
Object[] newObj = new Object[1];
newObj[0] = obj[offset];
returnDataList.add(newObj);
}
}
} else {
returnDataList = dataList;
}
return returnDataList;
}
use of org.talend.dataquality.indicators.DuplicateCountIndicator in project tdq-studio-se by Talend.
the class ColumnSetIndicatorEvaluator method storeDataSet.
/**
* store data which from 'simpleIndicator.getListRows()' except RowCountIndicator.
*
* @param indicToRowMap
*/
private void storeDataSet() {
EMap<Indicator, AnalyzedDataSet> indicToRowMap = analysis.getResults().getIndicToRowMap();
for (Indicator indicator : analysis.getResults().getIndicators()) {
if (indicator instanceof SimpleStatIndicator) {
SimpleStatIndicator simpleIndicator = (SimpleStatIndicator) indicator;
if (!analysis.getParameters().isStoreData()) {
break;
}
if (simpleIndicator.isUsedMapDBMode() && AnalysisHelper.isJavaExecutionEngine(analysis)) {
// nothing need to do
} else {
List<Object[]> listRows = simpleIndicator.getListRows();
if (listRows == null || listRows.isEmpty()) {
break;
}
for (Indicator leafIndicator : simpleIndicator.getLeafIndicators()) {
if (leafIndicator instanceof RowCountIndicator) {
continue;
}
List<Object[]> dataList = new ArrayList<Object[]>();
AnalyzedDataSet analyzedDataSet = indicToRowMap.get(leafIndicator);
if (analyzedDataSet == null) {
analyzedDataSet = AnalysisFactory.eINSTANCE.createAnalyzedDataSet();
indicToRowMap.put(leafIndicator, analyzedDataSet);
analyzedDataSet.setDataCount(analysis.getParameters().getMaxNumberRows());
analyzedDataSet.setRecordSize(0);
}
for (int i = 0; i < listRows.size(); i++) {
// if (dataList.size() >= analyzedDataSet.getDataCount()) {
// break;
// }
Object[] object = listRows.get(i);
// the last element store the count value.
Object count = object[object.length > 0 ? object.length - 1 : 0];
if (leafIndicator instanceof DistinctCountIndicator) {
dataList.add(object);
} else if (leafIndicator instanceof UniqueCountIndicator) {
if (count != null && NumberUtils.isNumber(count + PluginConstant.EMPTY_STRING)) {
if (Long.valueOf(count + PluginConstant.EMPTY_STRING).longValue() == 1) {
dataList.add(object);
}
}
} else if (leafIndicator instanceof DuplicateCountIndicator) {
if (count != null && NumberUtils.isNumber(count + PluginConstant.EMPTY_STRING)) {
if (Long.valueOf(count + PluginConstant.EMPTY_STRING).longValue() > 1) {
dataList.add(object);
}
}
}
}
analyzedDataSet.setData(dataList);
}
// MOD sizhaoliu TDQ-7144 clear the listRows after usage for drill down
if (!simpleIndicator.isStoreData()) {
simpleIndicator.setListRows(new ArrayList<Object[]>());
}
}
}
if (indicator instanceof AllMatchIndicator) {
AllMatchIndicator allMatchIndicator = (AllMatchIndicator) indicator;
if (!allMatchIndicator.isStoreData()) {
allMatchIndicator.setListRows(new ArrayList<Object[]>());
}
}
}
}
use of org.talend.dataquality.indicators.DuplicateCountIndicator in project tdq-studio-se by Talend.
the class IndicatorCommonUtil method getIndicatorValue.
public static Object getIndicatorValue(Indicator indicator) {
Object value = null;
IndicatorEnum type = IndicatorEnum.findIndicatorEnum(indicator.eClass());
if (type != null) {
try {
if (type == IndicatorEnum.RangeIndicatorEnum || type == IndicatorEnum.IQRIndicatorEnum) {
value = ((RangeIndicator) indicator).getRange();
((RangeIndicator) indicator).setComputed(true);
} else if (indicator.isComputed()) {
switch(type) {
case RowCountIndicatorEnum:
value = ((RowCountIndicator) indicator).getCount();
break;
case NullCountIndicatorEnum:
value = ((NullCountIndicator) indicator).getNullCount();
break;
case DistinctCountIndicatorEnum:
value = ((DistinctCountIndicator) indicator).getDistinctValueCount();
break;
case UniqueIndicatorEnum:
value = (((UniqueCountIndicator) indicator).getUniqueValueCount());
break;
case DuplicateCountIndicatorEnum:
value = ((DuplicateCountIndicator) indicator).getDuplicateValueCount();
break;
case BlankCountIndicatorEnum:
value = ((BlankCountIndicator) indicator).getBlankCount();
break;
case DefValueCountIndicatorEnum:
value = ((DefValueCountIndicator) indicator).getDefaultValCount();
break;
case MinLengthIndicatorEnum:
value = ((MinLengthIndicator) indicator).getLength();
break;
case MinLengthWithNullIndicatorEnum:
value = ((MinLengthWithNullIndicator) indicator).getLength();
break;
case MinLengthWithBlankIndicatorEnum:
value = ((MinLengthWithBlankIndicator) indicator).getLength();
break;
case MinLengthWithBlankNullIndicatorEnum:
value = ((MinLengthWithBlankNullIndicator) indicator).getLength();
break;
case MaxLengthIndicatorEnum:
value = ((MaxLengthIndicator) indicator).getLength();
break;
case MaxLengthWithNullIndicatorEnum:
value = ((MaxLengthWithNullIndicator) indicator).getLength();
break;
case MaxLengthWithBlankIndicatorEnum:
value = ((MaxLengthWithBlankIndicator) indicator).getLength();
break;
case MaxLengthWithBlankNullIndicatorEnum:
value = ((MaxLengthWithBlankNullIndicator) indicator).getLength();
break;
case AverageLengthIndicatorEnum:
value = ((AverageLengthIndicator) indicator).getAverageLength();
break;
case AverageLengthWithNullIndicatorEnum:
value = ((AvgLengthWithNullIndicator) indicator).getAverageLength();
break;
case AverageLengthWithBlankIndicatorEnum:
value = ((AvgLengthWithBlankIndicator) indicator).getAverageLength();
break;
case AverageLengthWithNullBlankIndicatorEnum:
value = ((AvgLengthWithBlankNullIndicator) indicator).getAverageLength();
break;
case FrequencyIndicatorEnum:
case DateFrequencyIndicatorEnum:
case WeekFrequencyIndicatorEnum:
case MonthFrequencyIndicatorEnum:
case QuarterFrequencyIndicatorEnum:
case YearFrequencyIndicatorEnum:
case BinFrequencyIndicatorEnum:
case LowFrequencyIndicatorEnum:
case DateLowFrequencyIndicatorEnum:
case WeekLowFrequencyIndicatorEnum:
case MonthLowFrequencyIndicatorEnum:
case QuarterLowFrequencyIndicatorEnum:
case YearLowFrequencyIndicatorEnum:
case BinLowFrequencyIndicatorEnum:
case PatternFreqIndicatorEnum:
case PatternLowFreqIndicatorEnum:
case EastAsiaPatternFreqIndicatorEnum:
case EastAsiaPatternLowFreqIndicatorEnum:
case DatePatternFreqIndicatorEnum:
case SoundexIndicatorEnum:
case SoundexLowIndicatorEnum:
case BenfordLawFrequencyIndicatorEnum:
value = handleFrequency(indicator);
break;
case MeanIndicatorEnum:
value = ((MeanIndicator) indicator).getMean();
break;
case MedianIndicatorEnum:
value = ((MedianIndicator) indicator).getMedian();
break;
case MinValueIndicatorEnum:
value = ((MinValueIndicator) indicator).getValue();
break;
case MaxValueIndicatorEnum:
value = ((MaxValueIndicator) indicator).getValue();
break;
case LowerQuartileIndicatorEnum:
value = ((LowerQuartileIndicator) indicator).getValue();
break;
case UpperQuartileIndicatorEnum:
value = ((UpperQuartileIndicator) indicator).getValue();
break;
case RegexpMatchingIndicatorEnum:
case SqlPatternMatchingIndicatorEnum:
case AllMatchIndicatorEnum:
value = handleMatchingValue(indicator);
break;
case ModeIndicatorEnum:
value = ((ModeIndicator) indicator).getMode();
break;
case UserDefinedIndicatorEnum:
value = handleUDIValue(indicator);
break;
case WhereRuleIndicatorEnum:
Long userCount = ((WhereRuleIndicator) indicator).getUserCount();
value = userCount == null ? 0 : userCount;
break;
// MOD qiongli 2011-7-21 feature 22362
case ValidPhoneCountIndicatorEnum:
value = ((ValidPhoneCountIndicator) indicator).getValidPhoneNumCount();
break;
case ValidRegCodeCountIndicatorEnum:
value = ((ValidRegCodeCountIndicator) indicator).getValidRegCount();
break;
case InvalidRegCodeCountIndicatorEnum:
value = ((InvalidRegCodeCountIndicator) indicator).getInvalidRegCount();
break;
case WellFormE164PhoneCountIndicatorEnum:
value = ((WellFormE164PhoneCountIndicator) indicator).getWellFormE164PhoneCount();
break;
case WellFormIntePhoneCountIndicatorEnum:
value = ((WellFormIntePhoneCountIndicator) indicator).getWellFormIntePhoneCount();
break;
case WellFormNationalPhoneCountIndicatorEnum:
value = ((WellFormNationalPhoneCountIndicator) indicator).getWellFormNatiPhoneCount();
break;
case PossiblePhoneCountIndicatorEnum:
value = ((PossiblePhoneCountIndicator) indicator).getPossiblePhoneCount();
break;
case FormatFreqPieIndictorEnum:
value = handleFreqPie(indicator);
default:
}
if (value == null || "null".equalsIgnoreCase(value.toString())) {
// $NON-NLS-1$
indicator.setComputed(false);
}
}
} catch (Exception e) {
// $NON-NLS-1$
log.error(Messages.getString("IndicatorCommonUtil.FailValue", e.getMessage()), e);
}
}
return value;
}
use of org.talend.dataquality.indicators.DuplicateCountIndicator in project tdq-studio-se by Talend.
the class IndicatorHelper method getIndicatorValue.
/**
* DOC bZhou Comment method "getIndicatorValue".
*
* @param indicator
* @return
*/
public static String getIndicatorValue(Indicator indicator) {
IndicatorsSwitch<String> mySwitch = new IndicatorsSwitch<String>() {
@Override
public String caseAverageLengthIndicator(AverageLengthIndicator object) {
return createStandardNumber(object.getAverageLength());
}
@Override
public String caseBlankCountIndicator(BlankCountIndicator object) {
return String.valueOf(object.getBlankCount());
}
@Override
public String caseDefValueCountIndicator(DefValueCountIndicator object) {
return String.valueOf(object.getDefaultValCount());
}
@Override
public String caseDistinctCountIndicator(DistinctCountIndicator object) {
return String.valueOf(object.getDistinctValueCount());
}
@Override
public String caseDuplicateCountIndicator(DuplicateCountIndicator object) {
return String.valueOf(object.getDuplicateValueCount());
}
@Override
public String caseMaxLengthIndicator(MaxLengthIndicator object) {
return String.valueOf(object.getLength());
}
@Override
public String caseMeanIndicator(MeanIndicator object) {
return createStandardNumber(object.getMean());
}
@Override
public String caseMedianIndicator(MedianIndicator object) {
return createStandardNumber(object.getMedian());
}
@Override
public String caseMinLengthIndicator(MinLengthIndicator object) {
return String.valueOf(object.getLength());
}
@Override
public String caseModeIndicator(ModeIndicator object) {
return String.valueOf(object.getMode());
}
@Override
public String caseNullCountIndicator(NullCountIndicator object) {
return String.valueOf(object.getNullCount());
}
@Override
public String casePatternMatchingIndicator(PatternMatchingIndicator object) {
return String.valueOf(object.getMatchingValueCount());
}
@Override
public String caseRowCountIndicator(RowCountIndicator object) {
return String.valueOf(object.getCount());
}
@Override
public String caseUniqueCountIndicator(UniqueCountIndicator object) {
return String.valueOf(object.getUniqueValueCount());
}
@Override
public String caseValueIndicator(ValueIndicator object) {
return object.getValue();
}
@Override
public String caseValidPhoneCountIndicator(ValidPhoneCountIndicator object) {
return String.valueOf(object.getValidPhoneNumCount());
}
@Override
public String casePossiblePhoneCountIndicator(PossiblePhoneCountIndicator object) {
return String.valueOf(object.getPossiblePhoneCount());
}
@Override
public String caseValidRegCodeCountIndicator(ValidRegCodeCountIndicator object) {
return String.valueOf(object.getValidRegCount());
}
@Override
public String caseInvalidRegCodeCountIndicator(InvalidRegCodeCountIndicator object) {
return String.valueOf(object.getInvalidRegCount());
}
@Override
public String caseWellFormE164PhoneCountIndicator(WellFormE164PhoneCountIndicator object) {
return String.valueOf(object.getWellFormE164PhoneCount());
}
@Override
public String caseWellFormIntePhoneCountIndicator(WellFormIntePhoneCountIndicator object) {
return String.valueOf(object.getWellFormIntePhoneCount());
}
@Override
public String caseWellFormNationalPhoneCountIndicator(WellFormNationalPhoneCountIndicator object) {
return String.valueOf(object.getWellFormNatiPhoneCount());
}
};
// TDQ-11114: consider the UDI type
IndicatorSqlSwitch<String> sqlSwitch = new IndicatorSqlSwitch<String>() {
@Override
public String caseWhereRuleIndicator(WhereRuleIndicator object) {
return String.valueOf(object.getUserCount());
}
@Override
public String caseUserDefIndicator(UserDefIndicator object) {
// TDQ-11114: get the correct value for user define realvalue indicator
if (object instanceof UserDefIndicator) {
UserDefIndicator userDefineIndicator = object;
IndicatorCategory category = IndicatorCategoryHelper.getCategory(userDefineIndicator.getIndicatorDefinition());
if (IndicatorCategoryHelper.isUserDefRealValue(category)) {
return String.valueOf(userDefineIndicator.getRealValue());
}
}
// TDQ-11485: fix the match udi get result value error
return String.valueOf(object.getIntegerValue());
}
@Override
public String caseJavaUserDefIndicator(JavaUserDefIndicator object) {
return String.valueOf(object.getUserCount());
}
};
String result = mySwitch.doSwitch(indicator);
return result == null ? sqlSwitch.doSwitch(indicator) : result;
}
Aggregations