use of org.talend.dataquality.indicators.UniqueCountIndicator in project tdq-studio-se by Talend.
the class CountsIndicatorImpl method basicSetUniqueCountIndicator.
/**
* <!-- begin-user-doc --> <!-- end-user-doc -->
* @generated
*/
public NotificationChain basicSetUniqueCountIndicator(UniqueCountIndicator newUniqueCountIndicator, NotificationChain msgs) {
UniqueCountIndicator oldUniqueCountIndicator = uniqueCountIndicator;
uniqueCountIndicator = newUniqueCountIndicator;
if (eNotificationRequired()) {
ENotificationImpl notification = new ENotificationImpl(this, Notification.SET, IndicatorsPackage.COUNTS_INDICATOR__UNIQUE_COUNT_INDICATOR, oldUniqueCountIndicator, newUniqueCountIndicator);
if (msgs == null)
msgs = notification;
else
msgs.add(notification);
}
return msgs;
}
use of org.talend.dataquality.indicators.UniqueCountIndicator in project tdq-studio-se by Talend.
the class IndicatorEvaluationMain method main.
/**
* DOC scorreia Comment method "main".
*
* @param args
*/
public static void main(String[] args) {
TypedProperties connectionParams = PropertiesLoader.getProperties(IndicatorEvaluator.class, "db.properties");
String driverClassName = connectionParams.getProperty("driver");
String dbUrl = connectionParams.getProperty("url");
try {
TimeTracer tt = new TimeTracer("Indicator evaluation", null);
tt.start();
// create connection
Connection connection = ConnectionUtils.createConnection(dbUrl, driverClassName, connectionParams);
String database = "test";
String tableName = "my_test";
// --- columns to analyze
String[] columnsArray = new String[] { // 0
"my_int", // 1
"my_double", // 2
"my_text", // 4
"my_date", // 3
"my_string", // 5
"my_int_null" };
List<String> columns = Arrays.asList(columnsArray);
// store in file
File file = new File("out/columnTest_0.1.ana");
EMFUtil util = new EMFUtil();
Resource resource = util.getResourceSet().createResource(URI.createFileURI(file.getAbsolutePath()));
rContents = resource.getContents();
evaluator.setConnection(connection);
// --- create indicators
RowCountIndicator rowCountIndicator = IndicatorsFactory.eINSTANCE.createRowCountIndicator();
NullCountIndicator nullCountIndicator = IndicatorsFactory.eINSTANCE.createNullCountIndicator();
DistinctCountIndicator distinctCountIndicator = IndicatorsFactory.eINSTANCE.createDistinctCountIndicator();
DistinctCountIndicator distinctCountIndicator2 = IndicatorsFactory.eINSTANCE.createDistinctCountIndicator();
UniqueCountIndicator uniqueCountIndicator = IndicatorsFactory.eINSTANCE.createUniqueCountIndicator();
DuplicateCountIndicator duplicateCountIndicator = IndicatorsFactory.eINSTANCE.createDuplicateCountIndicator();
BlankCountIndicator blankCountIndicator = IndicatorsFactory.eINSTANCE.createBlankCountIndicator();
MinLengthIndicator minLengthIndicator = IndicatorsFactory.eINSTANCE.createMinLengthIndicator();
MaxLengthIndicator maxLengthIndicator = IndicatorsFactory.eINSTANCE.createMaxLengthIndicator();
AverageLengthIndicator averageLengthIndicator = IndicatorsFactory.eINSTANCE.createAverageLengthIndicator();
AverageLengthIndicator averageLengthIndicator2 = IndicatorsFactory.eINSTANCE.createAverageLengthIndicator();
ModeIndicator modeIndicator = IndicatorsFactory.eINSTANCE.createModeIndicator();
FrequencyIndicator textFrequencyIndicator = IndicatorsFactory.eINSTANCE.createFrequencyIndicator();
// store in freq indic
// textFrequencyIndicator.setDistinctCountIndicator(distinctCountIndicator);
// textFrequencyIndicator.setDistinctCountIndicator(distinctCountIndicator2);
// textFrequencyIndicator.setUniqueCountIndicator(uniqueCountIndicator);
// textFrequencyIndicator.setDuplicateCountIndicator(duplicateCountIndicator);
// textFrequencyIndicator.setModeIndicator(modeIndicator);
MeanIndicator doubleMeanIndicator = IndicatorsFactory.eINSTANCE.createMeanIndicator();
MeanIndicator integerMeanIndicator = IndicatorsFactory.eINSTANCE.createMeanIndicator();
MedianIndicator medianIndicator = IndicatorsFactory.eINSTANCE.createMedianIndicator();
SumIndicator integerSumIndicator = IndicatorsFactory.eINSTANCE.createSumIndicator();
addIndicator(columnsArray[0], medianIndicator);
addIndicator(columnsArray[1], doubleMeanIndicator);
addIndicator(columnsArray[2], blankCountIndicator);
addIndicator(columnsArray[5], nullCountIndicator);
// addIndicator(columnsArray[2], textFrequencyIndicator);
// addIndicator(columnsArray[2], distinctCountIndicator); // probably not useful?
// addIndicator(columnsArray[2], uniqueCountIndicator); // probably not useful?
// addIndicator(columnsArray[2], duplicateCountIndicator); // probably not useful?
// addIndicator(columnsArray[2], modeIndicator); // probably not useful?
addIndicator(columnsArray[3], rowCountIndicator);
addIndicator(columnsArray[5], integerSumIndicator);
addIndicator(columnsArray[5], integerMeanIndicator);
addIndicator(columnsArray[2], averageLengthIndicator);
addIndicator(columnsArray[3], averageLengthIndicator2);
addIndicator(columnsArray[3], minLengthIndicator);
addIndicator(columnsArray[3], maxLengthIndicator);
// build query on columns
// TODO scorreia add filter somewhere here...
String selectCols = sqlSelectColumns(database, tableName, columns);
// --- create a description of the column set
QueryExpression queryExpression = DatatypesFactory.eINSTANCE.createQueryExpression();
queryExpression.setBody(selectCols);
// TODO scorreia externalize this as a constant
queryExpression.setLanguage("SQL");
tt.start("compute");
evaluator.setFetchSize(10000);
evaluator.evaluateIndicators(selectCols, true);
tt.end("compute");
// Print indicators the median
System.out.println("Median=" + medianIndicator.getMedian());
System.out.println("# Unique values= " + textFrequencyIndicator.getUniqueValueCount());
System.out.println("# Distinct values= " + textFrequencyIndicator.getDistinctValueCount());
for (String col : columns) {
printIndicators(evaluator.getIndicators(col));
}
tt.start("save");
util.save();
tt.end("saved in " + file.getAbsolutePath());
tt.end();
CwmResource cwmR = (CwmResource) resource;
String id = cwmR.getID(medianIndicator);
System.out.println("ecore util.getId= " + EcoreUtil.getID(medianIndicator));
System.out.println("uuId= " + id);
// test reload this file
// LoadSerialData.main(args);
} catch (SQLException e) {
// TODO Auto-generated catch block
log.error(e, e);
} catch (InstantiationException e) {
// TODO Auto-generated catch block
log.error(e, e);
} catch (IllegalAccessException e) {
// TODO Auto-generated catch block
log.error(e, e);
} catch (ClassNotFoundException e) {
// TODO Auto-generated catch block
log.error(e, e);
}
}
use of org.talend.dataquality.indicators.UniqueCountIndicator in project tdq-studio-se by Talend.
the class ColumnSetDBMapTest method testSubListLongLongMapOfLongListOfObjectDataValidationCase1.
/**
* Test method for
* {@link org.talend.dataquality.indicators.mapdb.ColumnSetDBMap#subList(long, long, java.util.Map, org.talend.cwm.indicator.DataValidation)}
* Case 1 Unique case
*/
@Test
public void testSubListLongLongMapOfLongListOfObjectDataValidationCase1() {
ColumnSetDBMap dbMap1 = new ColumnSetDBMap();
Assert.assertEquals(true, dbMap1.isEmpty());
List<Object> keyList = new ArrayList<Object>();
// $NON-NLS-1$
keyList.add("id1");
// $NON-NLS-1$
keyList.add("name1");
dbMap1.put(keyList, 1l);
keyList = new ArrayList<Object>();
// $NON-NLS-1$
keyList.add("id2");
// $NON-NLS-1$
keyList.add("name2");
dbMap1.put(keyList, 2l);
Assert.assertEquals(2, dbMap1.size());
UniqueCountIndicator uniqueCountIndicator = IndicatorsFactory.eINSTANCE.createUniqueCountIndicator();
List<Object[]> subList = dbMap1.subList(0, 2, new HashMap<Long, List<Object>>(), IDataValidationFactory.INSTANCE.createValidation(uniqueCountIndicator));
Assert.assertEquals(1, subList.size());
}
use of org.talend.dataquality.indicators.UniqueCountIndicator in project tdq-studio-se by Talend.
the class IndicatorEvaluator method executeSqlQuery.
@Override
protected ReturnCode executeSqlQuery(String sqlStatement) throws SQLException {
ReturnCode ok = new ReturnCode(true);
// check analyzed columns
Set<String> columns = getAnalyzedElements();
// feature 0010630 zshen:Make order unify which columns and columnName in the sqlStatement.mssqlOdbc need do
// this
List<String> columnlist = sortColumnName(columns, sqlStatement);
if (columnlist.isEmpty()) {
// $NON-NLS-1$
ok.setReturnCode(Messages.getString("IndicatorEvaluator.DefineAnalyzedColumns"), false);
return ok;
}
// ADD xqliu 2010-07-27 bug 13826
Map<String, String> columnlistMap = buildColumnListMap(columnlist);
// ~ 13826
// create query statement
// feature 0010630 zshen: Tables are not found when using Excel with ODBC connection
Statement statement = createStatement();
// MOD xqliu 2009-02-09 bug 6237
if (continueRun()) {
if (log.isInfoEnabled()) {
// $NON-NLS-1$
log.info("Executing query: " + sqlStatement);
}
statement.execute(sqlStatement);
}
// get the results
ResultSet resultSet = statement.getResultSet();
if (resultSet == null) {
// $NON-NLS-1$
String mess = Messages.getString("Evaluator.NoResultSet", sqlStatement);
log.warn(mess);
ok.setReturnCode(mess, false);
statement.close();
return ok;
}
// MOD qiongli TDQ-7282,check invalid judi.if there are invalid judis,return false code and show message later.
ok = getMessageForInvalidJUDIs();
int columnCount = resultSet.getMetaData().getColumnCount();
int maxNumberRows = analysis.getParameters().getMaxNumberRows();
// MOD mzhao feature: 12919, add capability to dill down data on Java engine.
AnalysisResult anaResult = analysis.getResults();
EMap<Indicator, AnalyzedDataSet> indicToRowMap = anaResult.getIndicToRowMap();
indicToRowMap.clear();
int recordIncrement = 0;
// --- for each row
int columnListSize = columnlist.size();
label: while (resultSet.next()) {
// feature 0010630 zshen: dislodge the Qualifiers from name of the column
for (int i = 0; i < columnListSize; i++) {
// MOD xqliu 2010-07-27 bug 13826
String col = columnlist.get(i);
List<Indicator> indicators = getIndicators(col);
col = columnlistMap.get(col);
// --- get content of column
Object object = ResultSetUtils.getBigObject(resultSet, col);
// FIXME this will slow down a lot the computation
if (object != null && !(object instanceof String) && object.toString().indexOf("TIMESTAMP") > -1) {
// $NON-NLS-1$
object = resultSet.getTimestamp(col);
}
// TDQ-11299: fix the ClassCastException: java.sql.Date cannot be cast to java.lang.String
if (object instanceof Date) {
if (object instanceof Time) {
object = new TalendFormatTime((Time) object);
} else {
object = new TalendFormatDate((Date) object);
}
}
// --- give row to handle to indicators
for (Indicator indicator : indicators) {
// MOD xqliu 2009-02-09 bug 6237
if (!continueRun()) {
break label;
}
// Added yyin 20120608 TDQ-3589
if (indicator instanceof DuplicateCountIndicator) {
((DuplicateCountIndicator) indicator).handle(object, resultSet, columnCount);
} else {
// ~
indicator.handle(object);
// ~MOD mzhao feature: 12919
}
AnalyzedDataSet analyzedDataSet = indicToRowMap.get(indicator);
if (analyzedDataSet == null) {
analyzedDataSet = AnalysisFactory.eINSTANCE.createAnalyzedDataSet();
indicToRowMap.put(indicator, analyzedDataSet);
analyzedDataSet.setDataCount(maxNumberRows);
analyzedDataSet.setRecordSize(0);
}
// should store data for dirll down
if (analysis.getParameters().isStoreData()) {
// current indicator is need to store the data
if (indicator.mustStoreRow()) {
List<Object[]> valueObjectList = initDataSet(indicator, indicToRowMap, object);
// MOD zshen add another loop to insert all of columnValue on the row into indicator.
recordIncrement = valueObjectList.size();
// MOD klliu 2011-06-30 bug 22523 whichever is Table or View,that finds columns should ues
// columnset
ColumnSet doSwitch = SwitchHelpers.COLUMN_SET_SWITCH.doSwitch(indicator.getAnalyzedElement().eContainer());
List<TdColumn> columnList = ColumnSetHelper.getColumns(doSwitch);
List<Object> inputRowList = new ArrayList<Object>();
for (int j = 0; j < columnCount; j++) {
String newcol = columnList.get(j).getName();
Object newobject = ResultSetUtils.getBigObject(resultSet, newcol);
// same format as result page.
if (newobject instanceof Date) {
if (newobject instanceof Time) {
newobject = new TalendFormatTime((Time) newobject);
} else {
newobject = new TalendFormatDate((Date) newobject);
}
}
if (indicator.isUsedMapDBMode()) {
inputRowList.add(newobject == null ? PluginConstant.NULL_STRING : newobject);
continue;
} else {
if (recordIncrement < maxNumberRows) {
// data.
if (recordIncrement < valueObjectList.size()) {
// decide whether need to
// increase
// current array.
valueObjectList.get(recordIncrement)[j] = newobject;
} else {
Object[] valueObject = new Object[columnCount];
valueObject[j] = newobject;
valueObjectList.add(valueObject);
}
} else {
break;
}
}
}
if (indicator.isUsedMapDBMode()) {
MapDBUtils.handleDrillDownData(object, inputRowList, indicator);
}
// ~
} else if (indicator instanceof UniqueCountIndicator && analysis.getResults().getIndicToRowMap().get(indicator).getData() != null) {
List<Object[]> removeValueObjectList = analysis.getResults().getIndicToRowMap().get(indicator).getData();
// MOD klliu 2011-06-30 bug 22523 whichever is Table or View,that finds columns should ues
// columnset
ColumnSet doSwitch = SwitchHelpers.COLUMN_SET_SWITCH.doSwitch(indicator.getAnalyzedElement().eContainer());
List<TdColumn> columnElementList = ColumnSetHelper.getColumns(doSwitch);
int offsetting = columnElementList.indexOf(indicator.getAnalyzedElement());
for (Object[] dataObject : removeValueObjectList) {
if (dataObject[offsetting].equals(object)) {
removeValueObjectList.remove(dataObject);
break;
}
}
}
}
}
}
}
// --- release resultset
resultSet.close();
// --- release statement
statement.close();
// --- close connection
getConnection().close();
return ok;
}
Aggregations