use of org.talend.dataquality.indicators.FrequencyIndicator in project tdq-studio-se by Talend.
the class IndicatorEvaluationMain method main.
/**
* DOC scorreia Comment method "main".
*
* @param args
*/
public static void main(String[] args) {
TypedProperties connectionParams = PropertiesLoader.getProperties(IndicatorEvaluator.class, "db.properties");
String driverClassName = connectionParams.getProperty("driver");
String dbUrl = connectionParams.getProperty("url");
try {
TimeTracer tt = new TimeTracer("Indicator evaluation", null);
tt.start();
// create connection
Connection connection = ConnectionUtils.createConnection(dbUrl, driverClassName, connectionParams);
String database = "test";
String tableName = "my_test";
// --- columns to analyze
String[] columnsArray = new String[] { // 0
"my_int", // 1
"my_double", // 2
"my_text", // 4
"my_date", // 3
"my_string", // 5
"my_int_null" };
List<String> columns = Arrays.asList(columnsArray);
// store in file
File file = new File("out/columnTest_0.1.ana");
EMFUtil util = new EMFUtil();
Resource resource = util.getResourceSet().createResource(URI.createFileURI(file.getAbsolutePath()));
rContents = resource.getContents();
evaluator.setConnection(connection);
// --- create indicators
RowCountIndicator rowCountIndicator = IndicatorsFactory.eINSTANCE.createRowCountIndicator();
NullCountIndicator nullCountIndicator = IndicatorsFactory.eINSTANCE.createNullCountIndicator();
DistinctCountIndicator distinctCountIndicator = IndicatorsFactory.eINSTANCE.createDistinctCountIndicator();
DistinctCountIndicator distinctCountIndicator2 = IndicatorsFactory.eINSTANCE.createDistinctCountIndicator();
UniqueCountIndicator uniqueCountIndicator = IndicatorsFactory.eINSTANCE.createUniqueCountIndicator();
DuplicateCountIndicator duplicateCountIndicator = IndicatorsFactory.eINSTANCE.createDuplicateCountIndicator();
BlankCountIndicator blankCountIndicator = IndicatorsFactory.eINSTANCE.createBlankCountIndicator();
MinLengthIndicator minLengthIndicator = IndicatorsFactory.eINSTANCE.createMinLengthIndicator();
MaxLengthIndicator maxLengthIndicator = IndicatorsFactory.eINSTANCE.createMaxLengthIndicator();
AverageLengthIndicator averageLengthIndicator = IndicatorsFactory.eINSTANCE.createAverageLengthIndicator();
AverageLengthIndicator averageLengthIndicator2 = IndicatorsFactory.eINSTANCE.createAverageLengthIndicator();
ModeIndicator modeIndicator = IndicatorsFactory.eINSTANCE.createModeIndicator();
FrequencyIndicator textFrequencyIndicator = IndicatorsFactory.eINSTANCE.createFrequencyIndicator();
// store in freq indic
// textFrequencyIndicator.setDistinctCountIndicator(distinctCountIndicator);
// textFrequencyIndicator.setDistinctCountIndicator(distinctCountIndicator2);
// textFrequencyIndicator.setUniqueCountIndicator(uniqueCountIndicator);
// textFrequencyIndicator.setDuplicateCountIndicator(duplicateCountIndicator);
// textFrequencyIndicator.setModeIndicator(modeIndicator);
MeanIndicator doubleMeanIndicator = IndicatorsFactory.eINSTANCE.createMeanIndicator();
MeanIndicator integerMeanIndicator = IndicatorsFactory.eINSTANCE.createMeanIndicator();
MedianIndicator medianIndicator = IndicatorsFactory.eINSTANCE.createMedianIndicator();
SumIndicator integerSumIndicator = IndicatorsFactory.eINSTANCE.createSumIndicator();
addIndicator(columnsArray[0], medianIndicator);
addIndicator(columnsArray[1], doubleMeanIndicator);
addIndicator(columnsArray[2], blankCountIndicator);
addIndicator(columnsArray[5], nullCountIndicator);
// addIndicator(columnsArray[2], textFrequencyIndicator);
// addIndicator(columnsArray[2], distinctCountIndicator); // probably not useful?
// addIndicator(columnsArray[2], uniqueCountIndicator); // probably not useful?
// addIndicator(columnsArray[2], duplicateCountIndicator); // probably not useful?
// addIndicator(columnsArray[2], modeIndicator); // probably not useful?
addIndicator(columnsArray[3], rowCountIndicator);
addIndicator(columnsArray[5], integerSumIndicator);
addIndicator(columnsArray[5], integerMeanIndicator);
addIndicator(columnsArray[2], averageLengthIndicator);
addIndicator(columnsArray[3], averageLengthIndicator2);
addIndicator(columnsArray[3], minLengthIndicator);
addIndicator(columnsArray[3], maxLengthIndicator);
// build query on columns
// TODO scorreia add filter somewhere here...
String selectCols = sqlSelectColumns(database, tableName, columns);
// --- create a description of the column set
QueryExpression queryExpression = DatatypesFactory.eINSTANCE.createQueryExpression();
queryExpression.setBody(selectCols);
// TODO scorreia externalize this as a constant
queryExpression.setLanguage("SQL");
tt.start("compute");
evaluator.setFetchSize(10000);
evaluator.evaluateIndicators(selectCols, true);
tt.end("compute");
// Print indicators the median
System.out.println("Median=" + medianIndicator.getMedian());
System.out.println("# Unique values= " + textFrequencyIndicator.getUniqueValueCount());
System.out.println("# Distinct values= " + textFrequencyIndicator.getDistinctValueCount());
for (String col : columns) {
printIndicators(evaluator.getIndicators(col));
}
tt.start("save");
util.save();
tt.end("saved in " + file.getAbsolutePath());
tt.end();
CwmResource cwmR = (CwmResource) resource;
String id = cwmR.getID(medianIndicator);
System.out.println("ecore util.getId= " + EcoreUtil.getID(medianIndicator));
System.out.println("uuId= " + id);
// test reload this file
// LoadSerialData.main(args);
} catch (SQLException e) {
// TODO Auto-generated catch block
log.error(e, e);
} catch (InstantiationException e) {
// TODO Auto-generated catch block
log.error(e, e);
} catch (IllegalAccessException e) {
// TODO Auto-generated catch block
log.error(e, e);
} catch (ClassNotFoundException e) {
// TODO Auto-generated catch block
log.error(e, e);
}
}
use of org.talend.dataquality.indicators.FrequencyIndicator in project tdq-studio-se by Talend.
the class IndicatorParametersImplTest method testEBasicSetContainerInternalEObjectInt.
/**
* Test method for
* {@link org.talend.dataquality.indicators.impl.IndicatorParametersImpl#eBasicSetContainer(org.eclipse.emf.ecore.InternalEObject, int)}
* .
*/
@Test
public void testEBasicSetContainerInternalEObjectInt() {
IndicatorDefaultValueServiceUtil.getIstance().getIndicatorDVService().setFrequencyLimitResult(FREQUENCYRESULTLIMIT);
IndicatorDefaultValueServiceUtil.getIstance().getIndicatorDVService().setLowFrequencyLimitResult(LOWFREQUENCYRESULTLIMIT);
// freqyency value indicator
FrequencyIndicator createFrequencyIndicator = IndicatorsFactory.eINSTANCE.createFrequencyIndicator();
createFrequencyIndicator.setParameters(IndicatorsFactory.eINSTANCE.createIndicatorParameters());
Assert.assertEquals(FREQUENCYRESULTLIMIT, createFrequencyIndicator.getParameters().getTopN());
// low freqyency value indicator
LowFrequencyIndicator createLowFrequencyIndicator = IndicatorsFactory.eINSTANCE.createLowFrequencyIndicator();
createLowFrequencyIndicator.setParameters(IndicatorsFactory.eINSTANCE.createIndicatorParameters());
Assert.assertEquals(LOWFREQUENCYRESULTLIMIT, createLowFrequencyIndicator.getParameters().getTopN());
// date freqyency value indicator
DateFrequencyIndicator createDateFrequencyIndicator = IndicatorsFactory.eINSTANCE.createDateFrequencyIndicator();
createDateFrequencyIndicator.setParameters(IndicatorsFactory.eINSTANCE.createIndicatorParameters());
Assert.assertEquals(FREQUENCYRESULTLIMIT, createDateFrequencyIndicator.getParameters().getTopN());
// date low freqyency value indicator
DateLowFrequencyIndicator createDateLowFrequencyIndicator = IndicatorsFactory.eINSTANCE.createDateLowFrequencyIndicator();
createDateLowFrequencyIndicator.setParameters(IndicatorsFactory.eINSTANCE.createIndicatorParameters());
Assert.assertEquals(LOWFREQUENCYRESULTLIMIT, createDateLowFrequencyIndicator.getParameters().getTopN());
// week freqyency value indicator
WeekFrequencyIndicator createWeekFrequencyIndicator = IndicatorsFactory.eINSTANCE.createWeekFrequencyIndicator();
createWeekFrequencyIndicator.setParameters(IndicatorsFactory.eINSTANCE.createIndicatorParameters());
Assert.assertEquals(FREQUENCYRESULTLIMIT, createWeekFrequencyIndicator.getParameters().getTopN());
// week low freqyency value indicator
WeekLowFrequencyIndicator createWeekLowFrequencyIndicator = IndicatorsFactory.eINSTANCE.createWeekLowFrequencyIndicator();
createWeekLowFrequencyIndicator.setParameters(IndicatorsFactory.eINSTANCE.createIndicatorParameters());
Assert.assertEquals(LOWFREQUENCYRESULTLIMIT, createWeekLowFrequencyIndicator.getParameters().getTopN());
// Month freqyency value indicator
MonthFrequencyIndicator createMonthFrequencyIndicator = IndicatorsFactory.eINSTANCE.createMonthFrequencyIndicator();
createMonthFrequencyIndicator.setParameters(IndicatorsFactory.eINSTANCE.createIndicatorParameters());
Assert.assertEquals(FREQUENCYRESULTLIMIT, createMonthFrequencyIndicator.getParameters().getTopN());
// Month low freqyency value indicator
MonthLowFrequencyIndicator createMonthLowFrequencyIndicator = IndicatorsFactory.eINSTANCE.createMonthLowFrequencyIndicator();
createMonthLowFrequencyIndicator.setParameters(IndicatorsFactory.eINSTANCE.createIndicatorParameters());
Assert.assertEquals(LOWFREQUENCYRESULTLIMIT, createMonthLowFrequencyIndicator.getParameters().getTopN());
// quarter freqyency value indicator
QuarterFrequencyIndicator createQuarterFrequencyIndicator = IndicatorsFactory.eINSTANCE.createQuarterFrequencyIndicator();
createQuarterFrequencyIndicator.setParameters(IndicatorsFactory.eINSTANCE.createIndicatorParameters());
Assert.assertEquals(FREQUENCYRESULTLIMIT, createQuarterFrequencyIndicator.getParameters().getTopN());
// quarter low freqyency value indicator
QuarterLowFrequencyIndicator createQuarterLowFrequencyIndicator = IndicatorsFactory.eINSTANCE.createQuarterLowFrequencyIndicator();
createQuarterLowFrequencyIndicator.setParameters(IndicatorsFactory.eINSTANCE.createIndicatorParameters());
Assert.assertEquals(LOWFREQUENCYRESULTLIMIT, createQuarterLowFrequencyIndicator.getParameters().getTopN());
// year freqyency value indicator
YearFrequencyIndicator createYearFrequencyIndicator = IndicatorsFactory.eINSTANCE.createYearFrequencyIndicator();
createYearFrequencyIndicator.setParameters(IndicatorsFactory.eINSTANCE.createIndicatorParameters());
Assert.assertEquals(FREQUENCYRESULTLIMIT, createYearFrequencyIndicator.getParameters().getTopN());
// year low freqyency value indicator
YearLowFrequencyIndicator createYearLowFrequencyIndicator = IndicatorsFactory.eINSTANCE.createYearLowFrequencyIndicator();
createYearLowFrequencyIndicator.setParameters(IndicatorsFactory.eINSTANCE.createIndicatorParameters());
Assert.assertEquals(LOWFREQUENCYRESULTLIMIT, createYearLowFrequencyIndicator.getParameters().getTopN());
// bin freqyency value indicator
BinFrequencyIndicator createBinFrequencyIndicator = IndicatorsFactory.eINSTANCE.createBinFrequencyIndicator();
createBinFrequencyIndicator.setParameters(IndicatorsFactory.eINSTANCE.createIndicatorParameters());
Assert.assertEquals(FREQUENCYRESULTLIMIT, createBinFrequencyIndicator.getParameters().getTopN());
// bin low freqyency value indicator
BinLowFrequencyIndicator createBinLowFrequencyIndicator = IndicatorsFactory.eINSTANCE.createBinLowFrequencyIndicator();
createBinLowFrequencyIndicator.setParameters(IndicatorsFactory.eINSTANCE.createIndicatorParameters());
Assert.assertEquals(LOWFREQUENCYRESULTLIMIT, createBinLowFrequencyIndicator.getParameters().getTopN());
// pattern freqyency value indicator
PatternFreqIndicator createPatternFreqIndicator = IndicatorsFactory.eINSTANCE.createPatternFreqIndicator();
createPatternFreqIndicator.setParameters(IndicatorsFactory.eINSTANCE.createIndicatorParameters());
Assert.assertEquals(FREQUENCYRESULTLIMIT, createPatternFreqIndicator.getParameters().getTopN());
// pattern low freqyency value indicator
PatternLowFreqIndicator createPatternLowFreqIndicator = IndicatorsFactory.eINSTANCE.createPatternLowFreqIndicator();
createPatternLowFreqIndicator.setParameters(IndicatorsFactory.eINSTANCE.createIndicatorParameters());
Assert.assertEquals(LOWFREQUENCYRESULTLIMIT, createPatternLowFreqIndicator.getParameters().getTopN());
// east asia freqyency value indicator
EastAsiaPatternFreqIndicator createEastAsiaPatternFreqIndicator = IndicatorsFactory.eINSTANCE.createEastAsiaPatternFreqIndicator();
createEastAsiaPatternFreqIndicator.setParameters(IndicatorsFactory.eINSTANCE.createIndicatorParameters());
Assert.assertEquals(FREQUENCYRESULTLIMIT, createEastAsiaPatternFreqIndicator.getParameters().getTopN());
// east asia low freqyency value indicator
EastAsiaPatternLowFreqIndicator createEastAsiaPatternLowFreqIndicator = IndicatorsFactory.eINSTANCE.createEastAsiaPatternLowFreqIndicator();
createEastAsiaPatternLowFreqIndicator.setParameters(IndicatorsFactory.eINSTANCE.createIndicatorParameters());
Assert.assertEquals(LOWFREQUENCYRESULTLIMIT, createEastAsiaPatternLowFreqIndicator.getParameters().getTopN());
// soundex freqyency value indicator
SoundexFreqIndicator createSoundexFreqIndicator = IndicatorsFactory.eINSTANCE.createSoundexFreqIndicator();
createSoundexFreqIndicator.setParameters(IndicatorsFactory.eINSTANCE.createIndicatorParameters());
Assert.assertEquals(FREQUENCYRESULTLIMIT, createSoundexFreqIndicator.getParameters().getTopN());
// soundex low freqyency value indicator
SoundexLowFreqIndicator createSoundexLowFreqIndicator = IndicatorsFactory.eINSTANCE.createSoundexLowFreqIndicator();
createSoundexLowFreqIndicator.setParameters(IndicatorsFactory.eINSTANCE.createIndicatorParameters());
Assert.assertEquals(LOWFREQUENCYRESULTLIMIT, createSoundexLowFreqIndicator.getParameters().getTopN());
// Date pattern freqyency value indicator
DatePatternFreqIndicator createDatePatternFreqIndicator = IndicatorsFactory.eINSTANCE.createDatePatternFreqIndicator();
createDatePatternFreqIndicator.setParameters(IndicatorsFactory.eINSTANCE.createIndicatorParameters());
Assert.assertEquals(FREQUENCYRESULTLIMIT, createDatePatternFreqIndicator.getParameters().getTopN());
}
use of org.talend.dataquality.indicators.FrequencyIndicator in project tdq-studio-se by Talend.
the class AnalysisTableTreeViewer method hasIndicatorParameters.
/**
* DOC msjian Comment method "hasIndicatorParameters".
*
* @param indicatorUnit
* @return
*/
private boolean hasIndicatorParameters(TableIndicatorUnit indicatorUnit) {
IndicatorParameters parameters = indicatorUnit.getIndicator().getParameters();
if (parameters == null) {
return false;
}
if (indicatorUnit.getIndicator() instanceof FrequencyIndicator) {
return true;
}
TextParameters tParameter = parameters.getTextParameter();
if (tParameter != null) {
return true;
}
DateParameters dParameters = parameters.getDateParameters();
if (dParameters != null) {
return true;
}
Domain dataValidDomain = parameters.getDataValidDomain();
if (dataValidDomain != null) {
return true;
}
Domain indicatorValidDomain = parameters.getIndicatorValidDomain();
if (indicatorValidDomain != null) {
return true;
}
Domain bins = parameters.getBins();
if (bins != null) {
return true;
}
return false;
}
use of org.talend.dataquality.indicators.FrequencyIndicator in project tdq-studio-se by Talend.
the class CreateDateAnalysisAction method getPredefinedColumnIndicator.
/*
* (non-Javadoc)
*
* @see org.talend.dataprofiler.core.ui.action.AbstractPredefinedAnalysisAction#getPredefinedColumnIndicator()
*/
@Override
protected ModelElementIndicator[] getPredefinedColumnIndicator() {
int count = 5;
if (isTimeType()) {
count = 3;
}
IndicatorEnum[] allwedEnumes = new IndicatorEnum[count];
allwedEnumes[0] = IndicatorEnum.CountsIndicatorEnum;
allwedEnumes[1] = IndicatorEnum.MinValueIndicatorEnum;
allwedEnumes[2] = IndicatorEnum.MaxValueIndicatorEnum;
if (count == 5) {
allwedEnumes[3] = IndicatorEnum.LowFrequencyIndicatorEnum;
allwedEnumes[4] = IndicatorEnum.FrequencyIndicatorEnum;
}
ModelElementIndicator[] returnColumnIndicator = composePredefinedColumnIndicator(allwedEnumes);
if (parameters != null) {
for (ModelElementIndicator columnIndicator : returnColumnIndicator) {
for (Indicator indicator : columnIndicator.getIndicators()) {
if (indicator instanceof FrequencyIndicator) {
if (indicator.getParameters().getDateParameters() != null) {
// TODO If we will never use the
// "parameters" parameter, why we
// add TimeSliceOptionPage and have
// this loop
indicator.getParameters().getDateParameters().setDateAggregationType(parameters.getDateParameters().getDateAggregationType());
}
}
}
}
}
return returnColumnIndicator;
}
Aggregations