use of org.talend.dataquality.rules.AlgorithmDefinition in project tdq-studio-se by Talend.
the class MatchAnalysisExecutorTest method assertScenario4.
/**
* DOC zhao Comment method "assertScenario3".
*
* @param matchAnalysisExecutor
* @param analysis
* @param name
* @param nameVar
*/
@SuppressWarnings("nls")
private void assertScenario4(MatchAnalysisExecutor matchAnalysisExecutor, Analysis analysis, MetadataColumn name, String nameVar, double groupQualityThreshold, double matchInterval) {
// Set indicators into analysis result.
RecordMatchingIndicator matchIndicator = ColumnsetPackage.eINSTANCE.getColumnsetFactory().createRecordMatchingIndicator();
// Match key: name, no block key, levenshtein attribute algorithm.
matchIndicator.setAnalyzedElement(name);
createMatchIndicatorWithOneMathRule(nameVar, matchIndicator, groupQualityThreshold, matchInterval);
// Add a blocking key: country
BlockKeyDefinition blockKeyDef = RulesPackage.eINSTANCE.getRulesFactory().createBlockKeyDefinition();
AlgorithmDefinition algoDef = RulesPackage.eINSTANCE.getRulesFactory().createAlgorithmDefinition();
algoDef.setAlgorithmType(AttributeMatcherType.EXACT.name());
blockKeyDef.setAlgorithm(algoDef);
blockKeyDef.setColumn("country");
blockKeyDef.setName("country");
AlgorithmDefinition dummyAlgoPre = RulesPackage.eINSTANCE.getRulesFactory().createAlgorithmDefinition();
dummyAlgoPre.setAlgorithmType(BlockingKeyPreAlgorithmEnum.NON_ALGO.getComponentValueName());
blockKeyDef.setPreAlgorithm(dummyAlgoPre);
AlgorithmDefinition dummyAlgoPost = RulesPackage.eINSTANCE.getRulesFactory().createAlgorithmDefinition();
dummyAlgoPost.setAlgorithmType(BlockingKeyPreAlgorithmEnum.NON_ALGO.getComponentValueName());
blockKeyDef.setPostAlgorithm(dummyAlgoPost);
matchIndicator.getBuiltInMatchRuleDefinition().getBlockKeys().add(blockKeyDef);
executeAnalysis(matchAnalysisExecutor, analysis, matchIndicator);
// Assert group size and frequency.
Map<Object, Long> size2Frequency = matchIndicator.getGroupSize2groupFrequency();
// For 1 -> FR(4)"babass","SebastiĆ£o","nicolas","nigula"
assertTrue(size2Frequency.get(String.valueOf(1)) == 6l);
// CN(2)"nigula","nico"
// For 2 -> FR(1)"sebas", CN(1)"nicolas"("nicola")
assertTrue(size2Frequency.get(String.valueOf(2)) == 2l);
// For 4 -> FR(4)"seb"
assertTrue(size2Frequency.get(String.valueOf(4)) == 1l);
// Assert row count, unique records, matched records and suspect records.
assertTrue(matchIndicator.getCount() == 14);
// For 6 -> FR 4*"seb", FR 2 *"sebas"
assertTrue(matchIndicator.getMatchedRecordCount() == 6);
// For 2 -> CN "nicolas"("nicola"), group score: 0.9 <
assertTrue(matchIndicator.getSuspectRecordCount() == 2);
// 0.95
}
use of org.talend.dataquality.rules.AlgorithmDefinition in project tdq-studio-se by Talend.
the class ExecuteMatchRuleHandlerTest method testExecute2.
/**
* Test method for
* {@link org.talend.dq.analysis.ExecuteMatchRuleHandler#execute(java.util.Map, org.talend.dataquality.indicators.columnset.RecordMatchingIndicator, java.util.List, org.talend.dataquality.indicators.columnset.BlockKeyIndicator)}
* .
*
* one block key one match key
*/
@Test
public void testExecute2() {
Map<MetadataColumn, String> columnMap = new HashMap<MetadataColumn, String>();
MetadataColumn col0 = ConnectionFactory.eINSTANCE.createMetadataColumn();
col0.setName(columnName0);
// $NON-NLS-1$
columnMap.put(col0, "0");
MetadataColumn col1 = ConnectionFactory.eINSTANCE.createMetadataColumn();
col1.setName(columnName1);
// $NON-NLS-1$
columnMap.put(col1, "1");
MetadataColumn col2 = ConnectionFactory.eINSTANCE.createMetadataColumn();
col2.setName(columnName2);
// $NON-NLS-1$
columnMap.put(col2, "2");
MetadataColumn col3 = ConnectionFactory.eINSTANCE.createMetadataColumn();
col3.setName(columnName3);
// $NON-NLS-1$
columnMap.put(col3, "3");
RecordMatchingIndicator recordMatchingIndicator = ColumnsetFactory.eINSTANCE.createRecordMatchingIndicator();
MatchRuleDefinition matchRuleDef = RulesPackage.eINSTANCE.getRulesFactory().createMatchRuleDefinition();
recordMatchingIndicator.setBuiltInMatchRuleDefinition(matchRuleDef);
// create match key
MatchRule createMatchRule1 = RulesFactory.eINSTANCE.createMatchRule();
MatchKeyDefinition createMatchKeyDefinition1 = RulesFactory.eINSTANCE.createMatchKeyDefinition();
createMatchRule1.getMatchKeys().add(createMatchKeyDefinition1);
createMatchKeyDefinition1.setColumn(columnName2);
createMatchKeyDefinition1.setConfidenceWeight(1);
// $NON-NLS-1$
createMatchKeyDefinition1.setName("rule1.matchkey1");
createMatchKeyDefinition1.setHandleNull(HandleNullEnum.NULL_MATCH_NULL.getValue());
AlgorithmDefinition createAlgorithmDefinition1 = RulesFactory.eINSTANCE.createAlgorithmDefinition();
createAlgorithmDefinition1.setAlgorithmType(AttributeMatcherType.EXACT.name());
createMatchKeyDefinition1.setAlgorithm(createAlgorithmDefinition1);
matchRuleDef.getMatchRules().add(createMatchRule1);
// create block key
BlockKeyDefinition createBlockKeyDefinition = RulesFactory.eINSTANCE.createBlockKeyDefinition();
createBlockKeyDefinition.setColumn(columnName1);
// $NON-NLS-1$
createBlockKeyDefinition.setName("blockKey1");
// setPreAlgorithm
AlgorithmDefinition blockPreAlgorithm = RulesFactory.eINSTANCE.createAlgorithmDefinition();
blockPreAlgorithm.setAlgorithmType(BlockingKeyPreAlgorithmEnum.NON_ALGO.getValue());
createBlockKeyDefinition.setPreAlgorithm(blockPreAlgorithm);
// setAlgorithm
AlgorithmDefinition blockAlgorithm = RulesFactory.eINSTANCE.createAlgorithmDefinition();
blockAlgorithm.setAlgorithmType(BlockingKeyAlgorithmEnum.EXACT.getValue());
createBlockKeyDefinition.setAlgorithm(blockAlgorithm);
// setPostAlgorithm
AlgorithmDefinition blockPostAlgorithm = RulesFactory.eINSTANCE.createAlgorithmDefinition();
blockPostAlgorithm.setAlgorithmType(BlockingKeyPostAlgorithmEnum.NON_ALGO.getValue());
createBlockKeyDefinition.setPostAlgorithm(blockPostAlgorithm);
matchRuleDef.getBlockKeys().add(createBlockKeyDefinition);
List<Object[]> matchRows = new ArrayList<Object[]>();
// $NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
matchRows.add(new String[] { "id1", "name1", "number1", "date1" });
// $NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
matchRows.add(new String[] { "id2", "name1", "number2", "date2" });
// $NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
matchRows.add(new String[] { "id3", "name2", "number2", "date3" });
// $NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
matchRows.add(new String[] { "id4", "name2", "number2", "date1" });
BlockKeyIndicator blockKeyIndicator = ColumnsetFactory.eINSTANCE.createBlockKeyIndicator();
ExecuteMatchRuleHandler execHandler = new ExecuteMatchRuleHandler();
MatchGroupResultConsumer matchResultConsumer = createMatchGroupResultConsumer(columnMap, recordMatchingIndicator);
TypedReturnCode<MatchGroupResultConsumer> executeResult = execHandler.execute(columnMap, recordMatchingIndicator, matchRows, blockKeyIndicator, matchResultConsumer);
Assert.assertTrue(executeResult.isOk());
Assert.assertTrue(executeResult.getMessage() == null);
Assert.assertTrue(executeResult.getObject() != null);
MatchGroupResultConsumer ResultConsumer = executeResult.getObject();
List<Object[]> fullMatchResult = ResultConsumer.getFullMatchResult();
Assert.assertTrue(fullMatchResult.size() == 4);
for (int i = 0; i < fullMatchResult.size(); i++) {
Object[] objectArray = fullMatchResult.get(i);
Object masterValue = objectArray[7];
Object idValue = objectArray[0];
// judge id1 is master id2 is master id3 is master and id4 is not master
if ("id4".equals(idValue)) {
// $NON-NLS-1$
Assert.assertFalse(Boolean.parseBoolean(masterValue.toString()));
} else {
Assert.assertTrue(Boolean.parseBoolean(masterValue.toString()));
}
}
}
use of org.talend.dataquality.rules.AlgorithmDefinition in project tdq-studio-se by Talend.
the class ExecuteMatchRuleHandlerTest method testExecute3.
/**
* Test method for
* {@link org.talend.dq.analysis.ExecuteMatchRuleHandler#execute(java.util.Map, org.talend.dataquality.indicators.columnset.RecordMatchingIndicator, java.util.List, org.talend.dataquality.indicators.columnset.BlockKeyIndicator)}
* .
*
* one block key, two match rule
*/
@Test
public void testExecute3() {
Map<MetadataColumn, String> columnMap = new HashMap<MetadataColumn, String>();
MetadataColumn col0 = ConnectionFactory.eINSTANCE.createMetadataColumn();
col0.setName(columnName0);
// $NON-NLS-1$
columnMap.put(col0, "0");
MetadataColumn col1 = ConnectionFactory.eINSTANCE.createMetadataColumn();
col1.setName(columnName1);
// $NON-NLS-1$
columnMap.put(col1, "1");
MetadataColumn col2 = ConnectionFactory.eINSTANCE.createMetadataColumn();
col2.setName(columnName2);
// $NON-NLS-1$
columnMap.put(col2, "2");
MetadataColumn col3 = ConnectionFactory.eINSTANCE.createMetadataColumn();
col3.setName(columnName3);
// $NON-NLS-1$
columnMap.put(col3, "3");
RecordMatchingIndicator recordMatchingIndicator = ColumnsetFactory.eINSTANCE.createRecordMatchingIndicator();
MatchRuleDefinition matchRuleDef = RulesPackage.eINSTANCE.getRulesFactory().createMatchRuleDefinition();
recordMatchingIndicator.setBuiltInMatchRuleDefinition(matchRuleDef);
// create match rule
MatchRule matchRule1 = RulesFactory.eINSTANCE.createMatchRule();
MatchKeyDefinition createMatchKeyDefinition1 = RulesFactory.eINSTANCE.createMatchKeyDefinition();
matchRule1.getMatchKeys().add(createMatchKeyDefinition1);
createMatchKeyDefinition1.setColumn(columnName2);
createMatchKeyDefinition1.setConfidenceWeight(1);
// $NON-NLS-1$
createMatchKeyDefinition1.setName("rule1.matchkey1");
createMatchKeyDefinition1.setHandleNull(HandleNullEnum.NULL_MATCH_NULL.getValue());
AlgorithmDefinition createAlgorithmDefinition1 = RulesFactory.eINSTANCE.createAlgorithmDefinition();
createAlgorithmDefinition1.setAlgorithmType(AttributeMatcherType.EXACT.name());
createMatchKeyDefinition1.setAlgorithm(createAlgorithmDefinition1);
matchRuleDef.getMatchRules().add(matchRule1);
// create match rule
MatchRule matchRule2 = RulesFactory.eINSTANCE.createMatchRule();
MatchKeyDefinition createMatchKeyDefinition2 = RulesFactory.eINSTANCE.createMatchKeyDefinition();
matchRule2.getMatchKeys().add(createMatchKeyDefinition2);
createMatchKeyDefinition2.setColumn(columnName3);
createMatchKeyDefinition2.setConfidenceWeight(1);
// $NON-NLS-1$
createMatchKeyDefinition2.setName("rule1.matchkey1");
createMatchKeyDefinition2.setHandleNull(HandleNullEnum.NULL_MATCH_NULL.getValue());
AlgorithmDefinition createAlgorithmDefinition2 = RulesFactory.eINSTANCE.createAlgorithmDefinition();
createAlgorithmDefinition2.setAlgorithmType(AttributeMatcherType.EXACT.name());
createMatchKeyDefinition2.setAlgorithm(createAlgorithmDefinition2);
matchRuleDef.getMatchRules().add(matchRule2);
// create block key
BlockKeyDefinition createBlockKeyDefinition = RulesFactory.eINSTANCE.createBlockKeyDefinition();
createBlockKeyDefinition.setColumn(columnName1);
// $NON-NLS-1$
createBlockKeyDefinition.setName("blockKey1");
// setPreAlgorithm
AlgorithmDefinition blockPreAlgorithm = RulesFactory.eINSTANCE.createAlgorithmDefinition();
blockPreAlgorithm.setAlgorithmType(BlockingKeyPreAlgorithmEnum.NON_ALGO.getValue());
createBlockKeyDefinition.setPreAlgorithm(blockPreAlgorithm);
// setAlgorithm
AlgorithmDefinition blockAlgorithm = RulesFactory.eINSTANCE.createAlgorithmDefinition();
blockAlgorithm.setAlgorithmType(BlockingKeyAlgorithmEnum.EXACT.getValue());
createBlockKeyDefinition.setAlgorithm(blockAlgorithm);
// setPostAlgorithm
AlgorithmDefinition blockPostAlgorithm = RulesFactory.eINSTANCE.createAlgorithmDefinition();
blockPostAlgorithm.setAlgorithmType(BlockingKeyPostAlgorithmEnum.NON_ALGO.getValue());
createBlockKeyDefinition.setPostAlgorithm(blockPostAlgorithm);
matchRuleDef.getBlockKeys().add(createBlockKeyDefinition);
List<Object[]> matchRows = new ArrayList<Object[]>();
// $NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
matchRows.add(new String[] { "id1", "name1", "number1", "date1" });
// $NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
matchRows.add(new String[] { "id2", "name1", "number2", "date1" });
// $NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
matchRows.add(new String[] { "id3", "name2", "number2", "date3" });
// $NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
matchRows.add(new String[] { "id4", "name2", "number2", "date1" });
BlockKeyIndicator blockKeyIndicator = ColumnsetFactory.eINSTANCE.createBlockKeyIndicator();
ExecuteMatchRuleHandler execHandler = new ExecuteMatchRuleHandler();
MatchGroupResultConsumer matchResultConsumer = createMatchGroupResultConsumer(columnMap, recordMatchingIndicator);
TypedReturnCode<MatchGroupResultConsumer> executeResult = execHandler.execute(columnMap, recordMatchingIndicator, matchRows, blockKeyIndicator, matchResultConsumer);
Assert.assertTrue(executeResult.isOk());
Assert.assertTrue(executeResult.getMessage() == null);
Assert.assertTrue(executeResult.getObject() != null);
MatchGroupResultConsumer ResultConsumer = executeResult.getObject();
List<Object[]> fullMatchResult = ResultConsumer.getFullMatchResult();
Assert.assertTrue(fullMatchResult.size() == 4);
for (int i = 0; i < fullMatchResult.size(); i++) {
Object[] objectArray = fullMatchResult.get(i);
Object masterValue = objectArray[7];
Object idValue = objectArray[0];
// id2 is because of matchRule1 id4 is because of matchRule2
if ("id2".equals(idValue) || "id4".equals(idValue)) {
// $NON-NLS-1$ //$NON-NLS-2$
Assert.assertFalse(Boolean.parseBoolean(masterValue.toString()));
} else {
Assert.assertTrue(Boolean.parseBoolean(masterValue.toString()));
}
}
}
use of org.talend.dataquality.rules.AlgorithmDefinition in project tdq-studio-se by Talend.
the class ParticularDefSurshipDefinitionSection method checkFunctionValid.
/**
* Check validity of function
*
* @param functionType the type of function
* @param matchedColumnName the name of column
* @param dataType the data type of column
* @return true if function is valid else false
*/
private boolean checkFunctionValid(ParticularDefaultSurvivorshipDefinitions partucykarDefinition) {
AlgorithmDefinition algorithmDefinition = partucykarDefinition.getFunction();
String functionType = algorithmDefinition != null && algorithmDefinition.getAlgorithmType() != null ? algorithmDefinition.getAlgorithmType() : StringUtils.EMPTY;
String dataType = partucykarDefinition.getDataType();
SurvivorShipAlgorithmEnum functionEnum = SurvivorShipAlgorithmEnum.getTypeBySavedValue(functionType);
DefaultSurvivorShipDataTypeEnum[] valudDataType = functionEnum.getDataType();
return FunctionEditingSupport.isSupportDataType(valudDataType, dataType);
}
use of org.talend.dataquality.rules.AlgorithmDefinition in project tdq-studio-se by Talend.
the class BlockingKeyTableViewer method createNewKeyDefinition.
/**
* DOC zshen Comment method "createDefaultRow".
*
* @param columnName
* @return
*/
@Override
protected BlockKeyDefinition createNewKeyDefinition(String columnName) {
BlockKeyDefinition createBlockKeyDefinition = RulesFactory.eINSTANCE.createBlockKeyDefinition();
createBlockKeyDefinition.setName(columnName);
createBlockKeyDefinition.setColumn(columnName);
AlgorithmDefinition createAlgorithmDefinition = RulesFactory.eINSTANCE.createAlgorithmDefinition();
createAlgorithmDefinition.setAlgorithmType(BlockingKeyPreAlgorithmEnum.getTypeByIndex(0).getComponentValueName());
createAlgorithmDefinition.setAlgorithmParameters(StringUtils.EMPTY);
createBlockKeyDefinition.setPreAlgorithm(createAlgorithmDefinition);
createAlgorithmDefinition = RulesFactory.eINSTANCE.createAlgorithmDefinition();
createAlgorithmDefinition.setAlgorithmType(BlockingKeyAlgorithmEnum.getTypeByIndex(0).getComponentValueName());
createAlgorithmDefinition.setAlgorithmParameters(StringUtils.EMPTY);
createBlockKeyDefinition.setAlgorithm(createAlgorithmDefinition);
createAlgorithmDefinition = RulesFactory.eINSTANCE.createAlgorithmDefinition();
createAlgorithmDefinition.setAlgorithmType(BlockingKeyPostAlgorithmEnum.getTypeByIndex(0).getComponentValueName());
createAlgorithmDefinition.setAlgorithmParameters(StringUtils.EMPTY);
createBlockKeyDefinition.setPostAlgorithm(createAlgorithmDefinition);
return createBlockKeyDefinition;
}
Aggregations