use of org.talend.dataquality.indicators.NullCountIndicator in project tdq-studio-se by Talend.
the class IndicatorHelper method getNullCountIndicator.
public static NullCountIndicator getNullCountIndicator(ModelElement modelElement, Map<ModelElement, List<Indicator>> elementToIndicator) {
List<Indicator> list = elementToIndicator.get(modelElement);
NullCountIndicator nullCountIndicator = null;
if (list == null) {
return nullCountIndicator;
}
for (Indicator indicator : list) {
if (IndicatorsPackage.eINSTANCE.getNullCountIndicator().equals(indicator.eClass())) {
nullCountIndicator = (NullCountIndicator) indicator;
}
}
return nullCountIndicator;
}
use of org.talend.dataquality.indicators.NullCountIndicator in project tdq-studio-se by Talend.
the class IndicatorEvaluationMain method main.
/**
* DOC scorreia Comment method "main".
*
* @param args
*/
public static void main(String[] args) {
TypedProperties connectionParams = PropertiesLoader.getProperties(IndicatorEvaluator.class, "db.properties");
String driverClassName = connectionParams.getProperty("driver");
String dbUrl = connectionParams.getProperty("url");
try {
TimeTracer tt = new TimeTracer("Indicator evaluation", null);
tt.start();
// create connection
Connection connection = ConnectionUtils.createConnection(dbUrl, driverClassName, connectionParams);
String database = "test";
String tableName = "my_test";
// --- columns to analyze
String[] columnsArray = new String[] { // 0
"my_int", // 1
"my_double", // 2
"my_text", // 4
"my_date", // 3
"my_string", // 5
"my_int_null" };
List<String> columns = Arrays.asList(columnsArray);
// store in file
File file = new File("out/columnTest_0.1.ana");
EMFUtil util = new EMFUtil();
Resource resource = util.getResourceSet().createResource(URI.createFileURI(file.getAbsolutePath()));
rContents = resource.getContents();
evaluator.setConnection(connection);
// --- create indicators
RowCountIndicator rowCountIndicator = IndicatorsFactory.eINSTANCE.createRowCountIndicator();
NullCountIndicator nullCountIndicator = IndicatorsFactory.eINSTANCE.createNullCountIndicator();
DistinctCountIndicator distinctCountIndicator = IndicatorsFactory.eINSTANCE.createDistinctCountIndicator();
DistinctCountIndicator distinctCountIndicator2 = IndicatorsFactory.eINSTANCE.createDistinctCountIndicator();
UniqueCountIndicator uniqueCountIndicator = IndicatorsFactory.eINSTANCE.createUniqueCountIndicator();
DuplicateCountIndicator duplicateCountIndicator = IndicatorsFactory.eINSTANCE.createDuplicateCountIndicator();
BlankCountIndicator blankCountIndicator = IndicatorsFactory.eINSTANCE.createBlankCountIndicator();
MinLengthIndicator minLengthIndicator = IndicatorsFactory.eINSTANCE.createMinLengthIndicator();
MaxLengthIndicator maxLengthIndicator = IndicatorsFactory.eINSTANCE.createMaxLengthIndicator();
AverageLengthIndicator averageLengthIndicator = IndicatorsFactory.eINSTANCE.createAverageLengthIndicator();
AverageLengthIndicator averageLengthIndicator2 = IndicatorsFactory.eINSTANCE.createAverageLengthIndicator();
ModeIndicator modeIndicator = IndicatorsFactory.eINSTANCE.createModeIndicator();
FrequencyIndicator textFrequencyIndicator = IndicatorsFactory.eINSTANCE.createFrequencyIndicator();
// store in freq indic
// textFrequencyIndicator.setDistinctCountIndicator(distinctCountIndicator);
// textFrequencyIndicator.setDistinctCountIndicator(distinctCountIndicator2);
// textFrequencyIndicator.setUniqueCountIndicator(uniqueCountIndicator);
// textFrequencyIndicator.setDuplicateCountIndicator(duplicateCountIndicator);
// textFrequencyIndicator.setModeIndicator(modeIndicator);
MeanIndicator doubleMeanIndicator = IndicatorsFactory.eINSTANCE.createMeanIndicator();
MeanIndicator integerMeanIndicator = IndicatorsFactory.eINSTANCE.createMeanIndicator();
MedianIndicator medianIndicator = IndicatorsFactory.eINSTANCE.createMedianIndicator();
SumIndicator integerSumIndicator = IndicatorsFactory.eINSTANCE.createSumIndicator();
addIndicator(columnsArray[0], medianIndicator);
addIndicator(columnsArray[1], doubleMeanIndicator);
addIndicator(columnsArray[2], blankCountIndicator);
addIndicator(columnsArray[5], nullCountIndicator);
// addIndicator(columnsArray[2], textFrequencyIndicator);
// addIndicator(columnsArray[2], distinctCountIndicator); // probably not useful?
// addIndicator(columnsArray[2], uniqueCountIndicator); // probably not useful?
// addIndicator(columnsArray[2], duplicateCountIndicator); // probably not useful?
// addIndicator(columnsArray[2], modeIndicator); // probably not useful?
addIndicator(columnsArray[3], rowCountIndicator);
addIndicator(columnsArray[5], integerSumIndicator);
addIndicator(columnsArray[5], integerMeanIndicator);
addIndicator(columnsArray[2], averageLengthIndicator);
addIndicator(columnsArray[3], averageLengthIndicator2);
addIndicator(columnsArray[3], minLengthIndicator);
addIndicator(columnsArray[3], maxLengthIndicator);
// build query on columns
// TODO scorreia add filter somewhere here...
String selectCols = sqlSelectColumns(database, tableName, columns);
// --- create a description of the column set
QueryExpression queryExpression = DatatypesFactory.eINSTANCE.createQueryExpression();
queryExpression.setBody(selectCols);
// TODO scorreia externalize this as a constant
queryExpression.setLanguage("SQL");
tt.start("compute");
evaluator.setFetchSize(10000);
evaluator.evaluateIndicators(selectCols, true);
tt.end("compute");
// Print indicators the median
System.out.println("Median=" + medianIndicator.getMedian());
System.out.println("# Unique values= " + textFrequencyIndicator.getUniqueValueCount());
System.out.println("# Distinct values= " + textFrequencyIndicator.getDistinctValueCount());
for (String col : columns) {
printIndicators(evaluator.getIndicators(col));
}
tt.start("save");
util.save();
tt.end("saved in " + file.getAbsolutePath());
tt.end();
CwmResource cwmR = (CwmResource) resource;
String id = cwmR.getID(medianIndicator);
System.out.println("ecore util.getId= " + EcoreUtil.getID(medianIndicator));
System.out.println("uuId= " + id);
// test reload this file
// LoadSerialData.main(args);
} catch (SQLException e) {
// TODO Auto-generated catch block
log.error(e, e);
} catch (InstantiationException e) {
// TODO Auto-generated catch block
log.error(e, e);
} catch (IllegalAccessException e) {
// TODO Auto-generated catch block
log.error(e, e);
} catch (ClassNotFoundException e) {
// TODO Auto-generated catch block
log.error(e, e);
}
}
use of org.talend.dataquality.indicators.NullCountIndicator in project tdq-studio-se by Talend.
the class ItemRecordTest method testLoadProperty.
/**
* Test method for {@link org.talend.dataprofiler.core.ui.imex.model.ItemRecord#loadProperty()}.
*
* @throws PersistenceException
*/
@Test
public void testLoadProperty() throws PersistenceException {
chooseRightProject();
// $NON-NLS-1$
Property analysisProperty = createAnalysis("ItemRecordTestanalysis1");
TDQAnalysisItem item = (TDQAnalysisItem) analysisProperty.getItem();
Analysis analysis = item.getAnalysis();
AnalysisResult createAnalysisResult = analysis.getResults();
Assert.assertEquals(0, createAnalysisResult.getIndicators().size());
// create Indicator
RowCountIndicator rowCountIndicator = IndicatorsFactory.eINSTANCE.createRowCountIndicator();
String rowCountPropertyID = EcoreUtil.generateUUID();
// $NON-NLS-1$
saveIndicatorDefintion(rowCountPropertyID, "ItemRecordWithRefreshedTestIndicatorDefinition1");
rowCountIndicator.setIndicatorDefinition(((TDQIndicatorDefinitionItem) ProxyRepositoryFactory.getInstance().getLastVersion(rowCountPropertyID).getProperty().getItem()).getIndicatorDefinition());
// $NON-NLS-1$
Assert.assertNotNull("Row count indicator definition should not be null", rowCountIndicator.getIndicatorDefinition());
Assert.assertEquals("ItemRecordWithRefreshedTestIndicatorDefinition1", // $NON-NLS-1$
rowCountIndicator.getIndicatorDefinition().getLabel());
createAnalysisResult.getIndicators().add(rowCountIndicator);
Assert.assertEquals(1, createAnalysisResult.getIndicators().size());
ReturnCode saveAnalysis = saveAnalysis(analysis);
Assert.assertEquals(1, ((TDQAnalysisItem) analysisProperty.getItem()).getAnalysis().getResults().getIndicators().size());
// $NON-NLS-1$
Assert.assertTrue("The analysis first time saving is not work", saveAnalysis.isOk());
File analysisFile = WorkspaceUtils.ifileToFile(PropertyHelper.getItemFile(analysisProperty));
ItemRecord itemRecord = new ItemRecord(analysisFile);
Assert.assertEquals(1, itemRecord.getDependencySet().size());
// create Indicator
NullCountIndicator nullCountIndicator = IndicatorsFactory.eINSTANCE.createNullCountIndicator();
String nullCountPropertyID = EcoreUtil.generateUUID();
// $NON-NLS-1$
saveIndicatorDefintion(nullCountPropertyID, "ItemRecordWithRefreshedTestIndicatorDefinition2");
nullCountIndicator.setIndicatorDefinition(((TDQIndicatorDefinitionItem) ProxyRepositoryFactory.getInstance().getLastVersion(nullCountPropertyID).getProperty().getItem()).getIndicatorDefinition());
analysis = item.getAnalysis();
analysis.getResults().getIndicators().add(nullCountIndicator);
ReturnCode saveAnalysis2 = saveAnalysis(analysis);
// $NON-NLS-1$
Assert.assertTrue("The analysis second time saving is not work", saveAnalysis2.isOk());
// get last resource so that the dependecy will not changed
itemRecord = new ItemRecord(analysisFile);
Assert.assertEquals(1, itemRecord.getDependencySet().size());
// after clear the resource will be lastest so that the dependency is added
ItemRecord.clear();
itemRecord = new ItemRecord(analysisFile);
Assert.assertEquals(2, itemRecord.getDependencySet().size());
}
use of org.talend.dataquality.indicators.NullCountIndicator in project tdq-studio-se by Talend.
the class IndicatorHelperTest method testGetNullCountIndicator2.
/**
* Test method for
* {@link org.talend.dataquality.helpers.IndicatorHelper#getNullCountIndicator(orgomg.cwm.objectmodel.core.ModelElement, java.util.Map)}
* . get NullCountIndicator
*/
@Test
public void testGetNullCountIndicator2() {
// TdColumn
TdColumn column1 = RelationalFactory.eINSTANCE.createTdColumn();
// ~
// List
List<Indicator> list = null;
// ~
// Map
Map<ModelElement, List<Indicator>> elementToIndicator = new HashMap<ModelElement, List<Indicator>>();
elementToIndicator.put(column1, list);
// ~
NullCountIndicator nullCountIndicator2 = IndicatorHelper.getNullCountIndicator(column1, elementToIndicator);
assert (nullCountIndicator2 == null);
}
use of org.talend.dataquality.indicators.NullCountIndicator in project tdq-studio-se by Talend.
the class ColumnAnalysisSqlExecutor method setRowCountAndNullCount.
/**
* DOC scorreia Comment method "setRowCountAndNullCount".
*
* @param elementToIndicator
*/
protected void setRowCountAndNullCount(Map<ModelElement, List<Indicator>> elementToIndicator) {
Set<ModelElement> analyzedElements = elementToIndicator.keySet();
for (ModelElement modelElement : analyzedElements) {
// get row count indicator
RowCountIndicator rowCount = IndicatorHelper.getRowCountIndicator(modelElement, elementToIndicator);
// get null count indicator
NullCountIndicator nullCount = IndicatorHelper.getNullCountIndicator(modelElement, elementToIndicator);
List<Indicator> list = elementToIndicator.get(modelElement);
for (Indicator ind : list) {
// set row count value to each indicator
if (rowCount != null && needPercentage(ind)) {
ind.setCount(rowCount.getCount());
}
// set null count value to each indicator
if (nullCount != null) {
ind.setNullCount(nullCount.getNullCount());
}
}
}
}
Aggregations