use of org.talend.dataquality.indicators.MaxLengthIndicator in project tdq-studio-se by Talend.
the class IndicatorEvaluator method initDataSet.
@SuppressWarnings("unchecked")
protected List<Object[]> initDataSet(Indicator indicator, EMap<Indicator, AnalyzedDataSet> indicToRowMap, Object object) {
AnalyzedDataSet analyzedDataSet = indicToRowMap.get(indicator);
List<Object[]> valueObjectList = null;
if (analyzedDataSet == null) {
analyzedDataSet = AnalysisFactory.eINSTANCE.createAnalyzedDataSet();
indicToRowMap.put(indicator, analyzedDataSet);
analyzedDataSet.setDataCount(analysis.getParameters().getMaxNumberRows());
analyzedDataSet.setRecordSize(0);
}
if (indicator instanceof FrequencyIndicator || indicator instanceof MinLengthIndicator || indicator instanceof MaxLengthIndicator) {
Map<Object, List<Object[]>> valueObjectListMap = analyzedDataSet.getFrequencyData();
if (valueObjectListMap == null) {
valueObjectListMap = new HashMap<Object, List<Object[]>>();
analyzedDataSet.setFrequencyData(valueObjectListMap);
}
String key = null;
if (object == null) {
key = SpecialValueDisplay.NULL_FIELD;
} else if (indicator instanceof MinLengthIndicator || indicator instanceof MaxLengthIndicator) {
key = String.valueOf(object.toString().length());
} else if (object.equals(PluginConstant.EMPTY_STRING)) {
key = SpecialValueDisplay.EMPTY_FIELD;
} else if (indicator instanceof PatternLowFreqIndicator) {
key = ((PatternLowFreqIndicator) indicator).convertCharacters(object.toString());
} else if (indicator instanceof PatternFreqIndicator) {
key = ((PatternFreqIndicator) indicator).convertCharacters(object.toString());
} else if (indicator instanceof FormatFreqPieIndicator) {
// MOD qiongli 2011-8-26,feature TDQ-3253.
key = ((FormatFreqPieIndicator) indicator).getCurrentKey();
} else {
key = object.toString();
}
valueObjectList = valueObjectListMap.get(key);
if (valueObjectList == null) {
valueObjectList = new ArrayList<Object[]>();
valueObjectListMap.put(key, valueObjectList);
}
} else if (indicator.isInValidRow() || indicator.isValidRow()) {
List<Object> patternData = analyzedDataSet.getPatternData();
if (patternData == null) {
patternData = new ArrayList<Object>();
// mapping with AnalyzedDataSetImpl.VALID_VALUE
patternData.add(new ArrayList<Object[]>());
// mapping with AnalyzedDataSetImpl.INVALID_VALUE
patternData.add(new ArrayList<Object[]>());
analyzedDataSet.setPatternData(patternData);
}
Object listObject = indicator.isInValidRow() ? patternData.get(AnalyzedDataSetImpl.INVALID_VALUE) : patternData.get(AnalyzedDataSetImpl.VALID_VALUE);
if (listObject instanceof ArrayList<?>) {
valueObjectList = (ArrayList<Object[]>) listObject;
}
} else {
valueObjectList = analyzedDataSet.getData();
if (valueObjectList == null) {
valueObjectList = new ArrayList<Object[]>();
analyzedDataSet.setData(valueObjectList);
}
}
return valueObjectList;
}
use of org.talend.dataquality.indicators.MaxLengthIndicator in project tdq-studio-se by Talend.
the class IndicatorCommonUtil method getIndicatorValue.
public static Object getIndicatorValue(Indicator indicator) {
Object value = null;
IndicatorEnum type = IndicatorEnum.findIndicatorEnum(indicator.eClass());
if (type != null) {
try {
if (type == IndicatorEnum.RangeIndicatorEnum || type == IndicatorEnum.IQRIndicatorEnum) {
value = ((RangeIndicator) indicator).getRange();
((RangeIndicator) indicator).setComputed(true);
} else if (indicator.isComputed()) {
switch(type) {
case RowCountIndicatorEnum:
value = ((RowCountIndicator) indicator).getCount();
break;
case NullCountIndicatorEnum:
value = ((NullCountIndicator) indicator).getNullCount();
break;
case DistinctCountIndicatorEnum:
value = ((DistinctCountIndicator) indicator).getDistinctValueCount();
break;
case UniqueIndicatorEnum:
value = (((UniqueCountIndicator) indicator).getUniqueValueCount());
break;
case DuplicateCountIndicatorEnum:
value = ((DuplicateCountIndicator) indicator).getDuplicateValueCount();
break;
case BlankCountIndicatorEnum:
value = ((BlankCountIndicator) indicator).getBlankCount();
break;
case DefValueCountIndicatorEnum:
value = ((DefValueCountIndicator) indicator).getDefaultValCount();
break;
case MinLengthIndicatorEnum:
value = ((MinLengthIndicator) indicator).getLength();
break;
case MinLengthWithNullIndicatorEnum:
value = ((MinLengthWithNullIndicator) indicator).getLength();
break;
case MinLengthWithBlankIndicatorEnum:
value = ((MinLengthWithBlankIndicator) indicator).getLength();
break;
case MinLengthWithBlankNullIndicatorEnum:
value = ((MinLengthWithBlankNullIndicator) indicator).getLength();
break;
case MaxLengthIndicatorEnum:
value = ((MaxLengthIndicator) indicator).getLength();
break;
case MaxLengthWithNullIndicatorEnum:
value = ((MaxLengthWithNullIndicator) indicator).getLength();
break;
case MaxLengthWithBlankIndicatorEnum:
value = ((MaxLengthWithBlankIndicator) indicator).getLength();
break;
case MaxLengthWithBlankNullIndicatorEnum:
value = ((MaxLengthWithBlankNullIndicator) indicator).getLength();
break;
case AverageLengthIndicatorEnum:
value = ((AverageLengthIndicator) indicator).getAverageLength();
break;
case AverageLengthWithNullIndicatorEnum:
value = ((AvgLengthWithNullIndicator) indicator).getAverageLength();
break;
case AverageLengthWithBlankIndicatorEnum:
value = ((AvgLengthWithBlankIndicator) indicator).getAverageLength();
break;
case AverageLengthWithNullBlankIndicatorEnum:
value = ((AvgLengthWithBlankNullIndicator) indicator).getAverageLength();
break;
case FrequencyIndicatorEnum:
case DateFrequencyIndicatorEnum:
case WeekFrequencyIndicatorEnum:
case MonthFrequencyIndicatorEnum:
case QuarterFrequencyIndicatorEnum:
case YearFrequencyIndicatorEnum:
case BinFrequencyIndicatorEnum:
case LowFrequencyIndicatorEnum:
case DateLowFrequencyIndicatorEnum:
case WeekLowFrequencyIndicatorEnum:
case MonthLowFrequencyIndicatorEnum:
case QuarterLowFrequencyIndicatorEnum:
case YearLowFrequencyIndicatorEnum:
case BinLowFrequencyIndicatorEnum:
case PatternFreqIndicatorEnum:
case PatternLowFreqIndicatorEnum:
case EastAsiaPatternFreqIndicatorEnum:
case EastAsiaPatternLowFreqIndicatorEnum:
case DatePatternFreqIndicatorEnum:
case SoundexIndicatorEnum:
case SoundexLowIndicatorEnum:
case BenfordLawFrequencyIndicatorEnum:
value = handleFrequency(indicator);
break;
case MeanIndicatorEnum:
value = ((MeanIndicator) indicator).getMean();
break;
case MedianIndicatorEnum:
value = ((MedianIndicator) indicator).getMedian();
break;
case MinValueIndicatorEnum:
value = ((MinValueIndicator) indicator).getValue();
break;
case MaxValueIndicatorEnum:
value = ((MaxValueIndicator) indicator).getValue();
break;
case LowerQuartileIndicatorEnum:
value = ((LowerQuartileIndicator) indicator).getValue();
break;
case UpperQuartileIndicatorEnum:
value = ((UpperQuartileIndicator) indicator).getValue();
break;
case RegexpMatchingIndicatorEnum:
case SqlPatternMatchingIndicatorEnum:
case AllMatchIndicatorEnum:
value = handleMatchingValue(indicator);
break;
case ModeIndicatorEnum:
value = ((ModeIndicator) indicator).getMode();
break;
case UserDefinedIndicatorEnum:
value = handleUDIValue(indicator);
break;
case WhereRuleIndicatorEnum:
Long userCount = ((WhereRuleIndicator) indicator).getUserCount();
value = userCount == null ? 0 : userCount;
break;
// MOD qiongli 2011-7-21 feature 22362
case ValidPhoneCountIndicatorEnum:
value = ((ValidPhoneCountIndicator) indicator).getValidPhoneNumCount();
break;
case ValidRegCodeCountIndicatorEnum:
value = ((ValidRegCodeCountIndicator) indicator).getValidRegCount();
break;
case InvalidRegCodeCountIndicatorEnum:
value = ((InvalidRegCodeCountIndicator) indicator).getInvalidRegCount();
break;
case WellFormE164PhoneCountIndicatorEnum:
value = ((WellFormE164PhoneCountIndicator) indicator).getWellFormE164PhoneCount();
break;
case WellFormIntePhoneCountIndicatorEnum:
value = ((WellFormIntePhoneCountIndicator) indicator).getWellFormIntePhoneCount();
break;
case WellFormNationalPhoneCountIndicatorEnum:
value = ((WellFormNationalPhoneCountIndicator) indicator).getWellFormNatiPhoneCount();
break;
case PossiblePhoneCountIndicatorEnum:
value = ((PossiblePhoneCountIndicator) indicator).getPossiblePhoneCount();
break;
case FormatFreqPieIndictorEnum:
value = handleFreqPie(indicator);
default:
}
if (value == null || "null".equalsIgnoreCase(value.toString())) {
// $NON-NLS-1$
indicator.setComputed(false);
}
}
} catch (Exception e) {
// $NON-NLS-1$
log.error(Messages.getString("IndicatorCommonUtil.FailValue", e.getMessage()), e);
}
}
return value;
}
use of org.talend.dataquality.indicators.MaxLengthIndicator in project tdq-studio-se by Talend.
the class IndicatorHelper method getIndicatorValue.
/**
* DOC bZhou Comment method "getIndicatorValue".
*
* @param indicator
* @return
*/
public static String getIndicatorValue(Indicator indicator) {
IndicatorsSwitch<String> mySwitch = new IndicatorsSwitch<String>() {
@Override
public String caseAverageLengthIndicator(AverageLengthIndicator object) {
return createStandardNumber(object.getAverageLength());
}
@Override
public String caseBlankCountIndicator(BlankCountIndicator object) {
return String.valueOf(object.getBlankCount());
}
@Override
public String caseDefValueCountIndicator(DefValueCountIndicator object) {
return String.valueOf(object.getDefaultValCount());
}
@Override
public String caseDistinctCountIndicator(DistinctCountIndicator object) {
return String.valueOf(object.getDistinctValueCount());
}
@Override
public String caseDuplicateCountIndicator(DuplicateCountIndicator object) {
return String.valueOf(object.getDuplicateValueCount());
}
@Override
public String caseMaxLengthIndicator(MaxLengthIndicator object) {
return String.valueOf(object.getLength());
}
@Override
public String caseMeanIndicator(MeanIndicator object) {
return createStandardNumber(object.getMean());
}
@Override
public String caseMedianIndicator(MedianIndicator object) {
return createStandardNumber(object.getMedian());
}
@Override
public String caseMinLengthIndicator(MinLengthIndicator object) {
return String.valueOf(object.getLength());
}
@Override
public String caseModeIndicator(ModeIndicator object) {
return String.valueOf(object.getMode());
}
@Override
public String caseNullCountIndicator(NullCountIndicator object) {
return String.valueOf(object.getNullCount());
}
@Override
public String casePatternMatchingIndicator(PatternMatchingIndicator object) {
return String.valueOf(object.getMatchingValueCount());
}
@Override
public String caseRowCountIndicator(RowCountIndicator object) {
return String.valueOf(object.getCount());
}
@Override
public String caseUniqueCountIndicator(UniqueCountIndicator object) {
return String.valueOf(object.getUniqueValueCount());
}
@Override
public String caseValueIndicator(ValueIndicator object) {
return object.getValue();
}
@Override
public String caseValidPhoneCountIndicator(ValidPhoneCountIndicator object) {
return String.valueOf(object.getValidPhoneNumCount());
}
@Override
public String casePossiblePhoneCountIndicator(PossiblePhoneCountIndicator object) {
return String.valueOf(object.getPossiblePhoneCount());
}
@Override
public String caseValidRegCodeCountIndicator(ValidRegCodeCountIndicator object) {
return String.valueOf(object.getValidRegCount());
}
@Override
public String caseInvalidRegCodeCountIndicator(InvalidRegCodeCountIndicator object) {
return String.valueOf(object.getInvalidRegCount());
}
@Override
public String caseWellFormE164PhoneCountIndicator(WellFormE164PhoneCountIndicator object) {
return String.valueOf(object.getWellFormE164PhoneCount());
}
@Override
public String caseWellFormIntePhoneCountIndicator(WellFormIntePhoneCountIndicator object) {
return String.valueOf(object.getWellFormIntePhoneCount());
}
@Override
public String caseWellFormNationalPhoneCountIndicator(WellFormNationalPhoneCountIndicator object) {
return String.valueOf(object.getWellFormNatiPhoneCount());
}
};
// TDQ-11114: consider the UDI type
IndicatorSqlSwitch<String> sqlSwitch = new IndicatorSqlSwitch<String>() {
@Override
public String caseWhereRuleIndicator(WhereRuleIndicator object) {
return String.valueOf(object.getUserCount());
}
@Override
public String caseUserDefIndicator(UserDefIndicator object) {
// TDQ-11114: get the correct value for user define realvalue indicator
if (object instanceof UserDefIndicator) {
UserDefIndicator userDefineIndicator = object;
IndicatorCategory category = IndicatorCategoryHelper.getCategory(userDefineIndicator.getIndicatorDefinition());
if (IndicatorCategoryHelper.isUserDefRealValue(category)) {
return String.valueOf(userDefineIndicator.getRealValue());
}
}
// TDQ-11485: fix the match udi get result value error
return String.valueOf(object.getIntegerValue());
}
@Override
public String caseJavaUserDefIndicator(JavaUserDefIndicator object) {
return String.valueOf(object.getUserCount());
}
};
String result = mySwitch.doSwitch(indicator);
return result == null ? sqlSwitch.doSwitch(indicator) : result;
}
use of org.talend.dataquality.indicators.MaxLengthIndicator in project tdq-studio-se by Talend.
the class IndicatorEvaluationMain method main.
/**
* DOC scorreia Comment method "main".
*
* @param args
*/
public static void main(String[] args) {
TypedProperties connectionParams = PropertiesLoader.getProperties(IndicatorEvaluator.class, "db.properties");
String driverClassName = connectionParams.getProperty("driver");
String dbUrl = connectionParams.getProperty("url");
try {
TimeTracer tt = new TimeTracer("Indicator evaluation", null);
tt.start();
// create connection
Connection connection = ConnectionUtils.createConnection(dbUrl, driverClassName, connectionParams);
String database = "test";
String tableName = "my_test";
// --- columns to analyze
String[] columnsArray = new String[] { // 0
"my_int", // 1
"my_double", // 2
"my_text", // 4
"my_date", // 3
"my_string", // 5
"my_int_null" };
List<String> columns = Arrays.asList(columnsArray);
// store in file
File file = new File("out/columnTest_0.1.ana");
EMFUtil util = new EMFUtil();
Resource resource = util.getResourceSet().createResource(URI.createFileURI(file.getAbsolutePath()));
rContents = resource.getContents();
evaluator.setConnection(connection);
// --- create indicators
RowCountIndicator rowCountIndicator = IndicatorsFactory.eINSTANCE.createRowCountIndicator();
NullCountIndicator nullCountIndicator = IndicatorsFactory.eINSTANCE.createNullCountIndicator();
DistinctCountIndicator distinctCountIndicator = IndicatorsFactory.eINSTANCE.createDistinctCountIndicator();
DistinctCountIndicator distinctCountIndicator2 = IndicatorsFactory.eINSTANCE.createDistinctCountIndicator();
UniqueCountIndicator uniqueCountIndicator = IndicatorsFactory.eINSTANCE.createUniqueCountIndicator();
DuplicateCountIndicator duplicateCountIndicator = IndicatorsFactory.eINSTANCE.createDuplicateCountIndicator();
BlankCountIndicator blankCountIndicator = IndicatorsFactory.eINSTANCE.createBlankCountIndicator();
MinLengthIndicator minLengthIndicator = IndicatorsFactory.eINSTANCE.createMinLengthIndicator();
MaxLengthIndicator maxLengthIndicator = IndicatorsFactory.eINSTANCE.createMaxLengthIndicator();
AverageLengthIndicator averageLengthIndicator = IndicatorsFactory.eINSTANCE.createAverageLengthIndicator();
AverageLengthIndicator averageLengthIndicator2 = IndicatorsFactory.eINSTANCE.createAverageLengthIndicator();
ModeIndicator modeIndicator = IndicatorsFactory.eINSTANCE.createModeIndicator();
FrequencyIndicator textFrequencyIndicator = IndicatorsFactory.eINSTANCE.createFrequencyIndicator();
// store in freq indic
// textFrequencyIndicator.setDistinctCountIndicator(distinctCountIndicator);
// textFrequencyIndicator.setDistinctCountIndicator(distinctCountIndicator2);
// textFrequencyIndicator.setUniqueCountIndicator(uniqueCountIndicator);
// textFrequencyIndicator.setDuplicateCountIndicator(duplicateCountIndicator);
// textFrequencyIndicator.setModeIndicator(modeIndicator);
MeanIndicator doubleMeanIndicator = IndicatorsFactory.eINSTANCE.createMeanIndicator();
MeanIndicator integerMeanIndicator = IndicatorsFactory.eINSTANCE.createMeanIndicator();
MedianIndicator medianIndicator = IndicatorsFactory.eINSTANCE.createMedianIndicator();
SumIndicator integerSumIndicator = IndicatorsFactory.eINSTANCE.createSumIndicator();
addIndicator(columnsArray[0], medianIndicator);
addIndicator(columnsArray[1], doubleMeanIndicator);
addIndicator(columnsArray[2], blankCountIndicator);
addIndicator(columnsArray[5], nullCountIndicator);
// addIndicator(columnsArray[2], textFrequencyIndicator);
// addIndicator(columnsArray[2], distinctCountIndicator); // probably not useful?
// addIndicator(columnsArray[2], uniqueCountIndicator); // probably not useful?
// addIndicator(columnsArray[2], duplicateCountIndicator); // probably not useful?
// addIndicator(columnsArray[2], modeIndicator); // probably not useful?
addIndicator(columnsArray[3], rowCountIndicator);
addIndicator(columnsArray[5], integerSumIndicator);
addIndicator(columnsArray[5], integerMeanIndicator);
addIndicator(columnsArray[2], averageLengthIndicator);
addIndicator(columnsArray[3], averageLengthIndicator2);
addIndicator(columnsArray[3], minLengthIndicator);
addIndicator(columnsArray[3], maxLengthIndicator);
// build query on columns
// TODO scorreia add filter somewhere here...
String selectCols = sqlSelectColumns(database, tableName, columns);
// --- create a description of the column set
QueryExpression queryExpression = DatatypesFactory.eINSTANCE.createQueryExpression();
queryExpression.setBody(selectCols);
// TODO scorreia externalize this as a constant
queryExpression.setLanguage("SQL");
tt.start("compute");
evaluator.setFetchSize(10000);
evaluator.evaluateIndicators(selectCols, true);
tt.end("compute");
// Print indicators the median
System.out.println("Median=" + medianIndicator.getMedian());
System.out.println("# Unique values= " + textFrequencyIndicator.getUniqueValueCount());
System.out.println("# Distinct values= " + textFrequencyIndicator.getDistinctValueCount());
for (String col : columns) {
printIndicators(evaluator.getIndicators(col));
}
tt.start("save");
util.save();
tt.end("saved in " + file.getAbsolutePath());
tt.end();
CwmResource cwmR = (CwmResource) resource;
String id = cwmR.getID(medianIndicator);
System.out.println("ecore util.getId= " + EcoreUtil.getID(medianIndicator));
System.out.println("uuId= " + id);
// test reload this file
// LoadSerialData.main(args);
} catch (SQLException e) {
// TODO Auto-generated catch block
log.error(e, e);
} catch (InstantiationException e) {
// TODO Auto-generated catch block
log.error(e, e);
} catch (IllegalAccessException e) {
// TODO Auto-generated catch block
log.error(e, e);
} catch (ClassNotFoundException e) {
// TODO Auto-generated catch block
log.error(e, e);
}
}
use of org.talend.dataquality.indicators.MaxLengthIndicator in project tdq-studio-se by Talend.
the class TextIndicatorImpl method basicSetMaxLengthIndicator.
/**
* <!-- begin-user-doc --> <!-- end-user-doc -->
* @generated
*/
public NotificationChain basicSetMaxLengthIndicator(MaxLengthIndicator newMaxLengthIndicator, NotificationChain msgs) {
MaxLengthIndicator oldMaxLengthIndicator = maxLengthIndicator;
maxLengthIndicator = newMaxLengthIndicator;
if (eNotificationRequired()) {
ENotificationImpl notification = new ENotificationImpl(this, Notification.SET, IndicatorsPackage.TEXT_INDICATOR__MAX_LENGTH_INDICATOR, oldMaxLengthIndicator, newMaxLengthIndicator);
if (msgs == null)
msgs = notification;
else
msgs.add(notification);
}
return msgs;
}
Aggregations