use of org.talend.dataquality.rules.MatchRule in project tdq-studio-se by Talend.
the class MatchAnalysisExecutorTest method createMatchIndicatorWithOneMathRule.
/**
* DOC zhao Comment method "createMatchIndicatorWithOneMathRule".
*
* @param nameVar
* @param matchIndicator
* @param groupQualityThreshold
* @param matchInterval
*/
private void createMatchIndicatorWithOneMathRule(String nameVar, RecordMatchingIndicator matchIndicator, double groupQualityThreshold, double matchInterval) {
MatchRuleDefinition matchRuleDefinition = RulesPackage.eINSTANCE.getRulesFactory().createMatchRuleDefinition();
matchRuleDefinition.setMatchGroupQualityThreshold(groupQualityThreshold);
MatchRule matchRule = RulesPackage.eINSTANCE.getRulesFactory().createMatchRule();
matchRule.setMatchInterval(matchInterval);
matchRule.setName("match rule 1");
MatchKeyDefinition matchkeyDef = RulesPackage.eINSTANCE.getRulesFactory().createMatchKeyDefinition();
matchkeyDef.setName(nameVar);
matchkeyDef.setColumn(nameVar);
AlgorithmDefinition algoDef = RulesPackage.eINSTANCE.getRulesFactory().createAlgorithmDefinition();
algoDef.setAlgorithmType(AttributeMatcherType.LEVENSHTEIN.name());
matchkeyDef.setAlgorithm(algoDef);
matchkeyDef.setConfidenceWeight(1);
matchRule.getMatchKeys().add(matchkeyDef);
matchRuleDefinition.getMatchRules().add(matchRule);
matchIndicator.setBuiltInMatchRuleDefinition(matchRuleDefinition);
}
use of org.talend.dataquality.rules.MatchRule in project tdq-studio-se by Talend.
the class ExecuteMatchRuleHandlerTest method testExecute2.
/**
* Test method for
* {@link org.talend.dq.analysis.ExecuteMatchRuleHandler#execute(java.util.Map, org.talend.dataquality.indicators.columnset.RecordMatchingIndicator, java.util.List, org.talend.dataquality.indicators.columnset.BlockKeyIndicator)}
* .
*
* one block key one match key
*/
@Test
public void testExecute2() {
Map<MetadataColumn, String> columnMap = new HashMap<MetadataColumn, String>();
MetadataColumn col0 = ConnectionFactory.eINSTANCE.createMetadataColumn();
col0.setName(columnName0);
// $NON-NLS-1$
columnMap.put(col0, "0");
MetadataColumn col1 = ConnectionFactory.eINSTANCE.createMetadataColumn();
col1.setName(columnName1);
// $NON-NLS-1$
columnMap.put(col1, "1");
MetadataColumn col2 = ConnectionFactory.eINSTANCE.createMetadataColumn();
col2.setName(columnName2);
// $NON-NLS-1$
columnMap.put(col2, "2");
MetadataColumn col3 = ConnectionFactory.eINSTANCE.createMetadataColumn();
col3.setName(columnName3);
// $NON-NLS-1$
columnMap.put(col3, "3");
RecordMatchingIndicator recordMatchingIndicator = ColumnsetFactory.eINSTANCE.createRecordMatchingIndicator();
MatchRuleDefinition matchRuleDef = RulesPackage.eINSTANCE.getRulesFactory().createMatchRuleDefinition();
recordMatchingIndicator.setBuiltInMatchRuleDefinition(matchRuleDef);
// create match key
MatchRule createMatchRule1 = RulesFactory.eINSTANCE.createMatchRule();
MatchKeyDefinition createMatchKeyDefinition1 = RulesFactory.eINSTANCE.createMatchKeyDefinition();
createMatchRule1.getMatchKeys().add(createMatchKeyDefinition1);
createMatchKeyDefinition1.setColumn(columnName2);
createMatchKeyDefinition1.setConfidenceWeight(1);
// $NON-NLS-1$
createMatchKeyDefinition1.setName("rule1.matchkey1");
createMatchKeyDefinition1.setHandleNull(HandleNullEnum.NULL_MATCH_NULL.getValue());
AlgorithmDefinition createAlgorithmDefinition1 = RulesFactory.eINSTANCE.createAlgorithmDefinition();
createAlgorithmDefinition1.setAlgorithmType(AttributeMatcherType.EXACT.name());
createMatchKeyDefinition1.setAlgorithm(createAlgorithmDefinition1);
matchRuleDef.getMatchRules().add(createMatchRule1);
// create block key
BlockKeyDefinition createBlockKeyDefinition = RulesFactory.eINSTANCE.createBlockKeyDefinition();
createBlockKeyDefinition.setColumn(columnName1);
// $NON-NLS-1$
createBlockKeyDefinition.setName("blockKey1");
// setPreAlgorithm
AlgorithmDefinition blockPreAlgorithm = RulesFactory.eINSTANCE.createAlgorithmDefinition();
blockPreAlgorithm.setAlgorithmType(BlockingKeyPreAlgorithmEnum.NON_ALGO.getValue());
createBlockKeyDefinition.setPreAlgorithm(blockPreAlgorithm);
// setAlgorithm
AlgorithmDefinition blockAlgorithm = RulesFactory.eINSTANCE.createAlgorithmDefinition();
blockAlgorithm.setAlgorithmType(BlockingKeyAlgorithmEnum.EXACT.getValue());
createBlockKeyDefinition.setAlgorithm(blockAlgorithm);
// setPostAlgorithm
AlgorithmDefinition blockPostAlgorithm = RulesFactory.eINSTANCE.createAlgorithmDefinition();
blockPostAlgorithm.setAlgorithmType(BlockingKeyPostAlgorithmEnum.NON_ALGO.getValue());
createBlockKeyDefinition.setPostAlgorithm(blockPostAlgorithm);
matchRuleDef.getBlockKeys().add(createBlockKeyDefinition);
List<Object[]> matchRows = new ArrayList<Object[]>();
// $NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
matchRows.add(new String[] { "id1", "name1", "number1", "date1" });
// $NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
matchRows.add(new String[] { "id2", "name1", "number2", "date2" });
// $NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
matchRows.add(new String[] { "id3", "name2", "number2", "date3" });
// $NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
matchRows.add(new String[] { "id4", "name2", "number2", "date1" });
BlockKeyIndicator blockKeyIndicator = ColumnsetFactory.eINSTANCE.createBlockKeyIndicator();
ExecuteMatchRuleHandler execHandler = new ExecuteMatchRuleHandler();
MatchGroupResultConsumer matchResultConsumer = createMatchGroupResultConsumer(columnMap, recordMatchingIndicator);
TypedReturnCode<MatchGroupResultConsumer> executeResult = execHandler.execute(columnMap, recordMatchingIndicator, matchRows, blockKeyIndicator, matchResultConsumer);
Assert.assertTrue(executeResult.isOk());
Assert.assertTrue(executeResult.getMessage() == null);
Assert.assertTrue(executeResult.getObject() != null);
MatchGroupResultConsumer ResultConsumer = executeResult.getObject();
List<Object[]> fullMatchResult = ResultConsumer.getFullMatchResult();
Assert.assertTrue(fullMatchResult.size() == 4);
for (int i = 0; i < fullMatchResult.size(); i++) {
Object[] objectArray = fullMatchResult.get(i);
Object masterValue = objectArray[7];
Object idValue = objectArray[0];
// judge id1 is master id2 is master id3 is master and id4 is not master
if ("id4".equals(idValue)) {
// $NON-NLS-1$
Assert.assertFalse(Boolean.parseBoolean(masterValue.toString()));
} else {
Assert.assertTrue(Boolean.parseBoolean(masterValue.toString()));
}
}
}
use of org.talend.dataquality.rules.MatchRule in project tdq-studio-se by Talend.
the class ExecuteMatchRuleHandlerTest method testExecute3.
/**
* Test method for
* {@link org.talend.dq.analysis.ExecuteMatchRuleHandler#execute(java.util.Map, org.talend.dataquality.indicators.columnset.RecordMatchingIndicator, java.util.List, org.talend.dataquality.indicators.columnset.BlockKeyIndicator)}
* .
*
* one block key, two match rule
*/
@Test
public void testExecute3() {
Map<MetadataColumn, String> columnMap = new HashMap<MetadataColumn, String>();
MetadataColumn col0 = ConnectionFactory.eINSTANCE.createMetadataColumn();
col0.setName(columnName0);
// $NON-NLS-1$
columnMap.put(col0, "0");
MetadataColumn col1 = ConnectionFactory.eINSTANCE.createMetadataColumn();
col1.setName(columnName1);
// $NON-NLS-1$
columnMap.put(col1, "1");
MetadataColumn col2 = ConnectionFactory.eINSTANCE.createMetadataColumn();
col2.setName(columnName2);
// $NON-NLS-1$
columnMap.put(col2, "2");
MetadataColumn col3 = ConnectionFactory.eINSTANCE.createMetadataColumn();
col3.setName(columnName3);
// $NON-NLS-1$
columnMap.put(col3, "3");
RecordMatchingIndicator recordMatchingIndicator = ColumnsetFactory.eINSTANCE.createRecordMatchingIndicator();
MatchRuleDefinition matchRuleDef = RulesPackage.eINSTANCE.getRulesFactory().createMatchRuleDefinition();
recordMatchingIndicator.setBuiltInMatchRuleDefinition(matchRuleDef);
// create match rule
MatchRule matchRule1 = RulesFactory.eINSTANCE.createMatchRule();
MatchKeyDefinition createMatchKeyDefinition1 = RulesFactory.eINSTANCE.createMatchKeyDefinition();
matchRule1.getMatchKeys().add(createMatchKeyDefinition1);
createMatchKeyDefinition1.setColumn(columnName2);
createMatchKeyDefinition1.setConfidenceWeight(1);
// $NON-NLS-1$
createMatchKeyDefinition1.setName("rule1.matchkey1");
createMatchKeyDefinition1.setHandleNull(HandleNullEnum.NULL_MATCH_NULL.getValue());
AlgorithmDefinition createAlgorithmDefinition1 = RulesFactory.eINSTANCE.createAlgorithmDefinition();
createAlgorithmDefinition1.setAlgorithmType(AttributeMatcherType.EXACT.name());
createMatchKeyDefinition1.setAlgorithm(createAlgorithmDefinition1);
matchRuleDef.getMatchRules().add(matchRule1);
// create match rule
MatchRule matchRule2 = RulesFactory.eINSTANCE.createMatchRule();
MatchKeyDefinition createMatchKeyDefinition2 = RulesFactory.eINSTANCE.createMatchKeyDefinition();
matchRule2.getMatchKeys().add(createMatchKeyDefinition2);
createMatchKeyDefinition2.setColumn(columnName3);
createMatchKeyDefinition2.setConfidenceWeight(1);
// $NON-NLS-1$
createMatchKeyDefinition2.setName("rule1.matchkey1");
createMatchKeyDefinition2.setHandleNull(HandleNullEnum.NULL_MATCH_NULL.getValue());
AlgorithmDefinition createAlgorithmDefinition2 = RulesFactory.eINSTANCE.createAlgorithmDefinition();
createAlgorithmDefinition2.setAlgorithmType(AttributeMatcherType.EXACT.name());
createMatchKeyDefinition2.setAlgorithm(createAlgorithmDefinition2);
matchRuleDef.getMatchRules().add(matchRule2);
// create block key
BlockKeyDefinition createBlockKeyDefinition = RulesFactory.eINSTANCE.createBlockKeyDefinition();
createBlockKeyDefinition.setColumn(columnName1);
// $NON-NLS-1$
createBlockKeyDefinition.setName("blockKey1");
// setPreAlgorithm
AlgorithmDefinition blockPreAlgorithm = RulesFactory.eINSTANCE.createAlgorithmDefinition();
blockPreAlgorithm.setAlgorithmType(BlockingKeyPreAlgorithmEnum.NON_ALGO.getValue());
createBlockKeyDefinition.setPreAlgorithm(blockPreAlgorithm);
// setAlgorithm
AlgorithmDefinition blockAlgorithm = RulesFactory.eINSTANCE.createAlgorithmDefinition();
blockAlgorithm.setAlgorithmType(BlockingKeyAlgorithmEnum.EXACT.getValue());
createBlockKeyDefinition.setAlgorithm(blockAlgorithm);
// setPostAlgorithm
AlgorithmDefinition blockPostAlgorithm = RulesFactory.eINSTANCE.createAlgorithmDefinition();
blockPostAlgorithm.setAlgorithmType(BlockingKeyPostAlgorithmEnum.NON_ALGO.getValue());
createBlockKeyDefinition.setPostAlgorithm(blockPostAlgorithm);
matchRuleDef.getBlockKeys().add(createBlockKeyDefinition);
List<Object[]> matchRows = new ArrayList<Object[]>();
// $NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
matchRows.add(new String[] { "id1", "name1", "number1", "date1" });
// $NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
matchRows.add(new String[] { "id2", "name1", "number2", "date1" });
// $NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
matchRows.add(new String[] { "id3", "name2", "number2", "date3" });
// $NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
matchRows.add(new String[] { "id4", "name2", "number2", "date1" });
BlockKeyIndicator blockKeyIndicator = ColumnsetFactory.eINSTANCE.createBlockKeyIndicator();
ExecuteMatchRuleHandler execHandler = new ExecuteMatchRuleHandler();
MatchGroupResultConsumer matchResultConsumer = createMatchGroupResultConsumer(columnMap, recordMatchingIndicator);
TypedReturnCode<MatchGroupResultConsumer> executeResult = execHandler.execute(columnMap, recordMatchingIndicator, matchRows, blockKeyIndicator, matchResultConsumer);
Assert.assertTrue(executeResult.isOk());
Assert.assertTrue(executeResult.getMessage() == null);
Assert.assertTrue(executeResult.getObject() != null);
MatchGroupResultConsumer ResultConsumer = executeResult.getObject();
List<Object[]> fullMatchResult = ResultConsumer.getFullMatchResult();
Assert.assertTrue(fullMatchResult.size() == 4);
for (int i = 0; i < fullMatchResult.size(); i++) {
Object[] objectArray = fullMatchResult.get(i);
Object masterValue = objectArray[7];
Object idValue = objectArray[0];
// id2 is because of matchRule1 id4 is because of matchRule2
if ("id2".equals(idValue) || "id4".equals(idValue)) {
// $NON-NLS-1$ //$NON-NLS-2$
Assert.assertFalse(Boolean.parseBoolean(masterValue.toString()));
} else {
Assert.assertTrue(Boolean.parseBoolean(masterValue.toString()));
}
}
}
use of org.talend.dataquality.rules.MatchRule in project tdq-studio-se by Talend.
the class AnalysisRecordGroupingUtils method setRuleMatcher.
/**
* DOC zhao Comment method "setRuleMatcher".
*
* @param columnMap
* @param recordMatchingIndicator
* @param analysisMatchRecordGrouping
* @throws BusinessException
*/
public static void setRuleMatcher(Map<MetadataColumn, String> columnMap, RecordMatchingIndicator recordMatchingIndicator, AnalysisMatchRecordGrouping analysisMatchRecordGrouping) throws BusinessException {
List<MatchRule> matchRules = recordMatchingIndicator.getBuiltInMatchRuleDefinition().getMatchRules();
// Column index list store all the indices.
List<String> allColumnIndice = new ArrayList<String>();
for (MatchRule matcher : matchRules) {
if (matcher == null) {
continue;
}
List<Map<String, String>> currentRuleMatcher = new ArrayList<Map<String, String>>();
List<String> currentColumnIndice = new ArrayList<String>();
for (MatchKeyDefinition matchDef : matcher.getMatchKeys()) {
// column,throw exception, do not continue
if (matchDef.getColumn() == null || StringUtils.EMPTY.equals(matchDef.getColumn())) {
BusinessException businessException = new BusinessException();
businessException.setAdditonalMessage(// $NON-NLS-1$
Messages.getString(// $NON-NLS-1$
"MatchAnalysisExecutor.NoColumnInMatchKey", matchDef.getName()));
throw businessException;
}
String algorithmType = matchDef.getAlgorithm().getAlgorithmType();
Map<String, String> matchKeyMap = getMatchKeyMap(columnMap, matcher, matchDef, algorithmType);
addMatchKeyOrderbyColumnIdx(currentRuleMatcher, matchKeyMap);
currentColumnIndice.add(matchKeyMap.get(IRecordGrouping.COLUMN_IDX));
}
if (allColumnIndice.isEmpty()) {
for (Map<String, String> matchKey : currentRuleMatcher) {
String colIdx = matchKey.get(IRecordGrouping.COLUMN_IDX);
allColumnIndice.add(colIdx);
}
} else {
refineMatcherWithDummy(analysisMatchRecordGrouping, allColumnIndice, currentColumnIndice, currentRuleMatcher);
}
analysisMatchRecordGrouping.addRuleMatcher(currentRuleMatcher);
}
}
use of org.talend.dataquality.rules.MatchRule in project tdq-studio-se by Talend.
the class ImportMatchRuleAction method run.
@Override
public void run() {
MatchRuleElementTreeSelectionDialog dialog = new MatchRuleElementTreeSelectionDialog(null, new DQRepositoryViewLabelProvider(), new ResourceViewContentProvider(), MatchRuleElementTreeSelectionDialog.MATCH_ANALYSIS_TYPE);
List<String> inputColumnNames = new ArrayList<String>();
Map<String, String> columnName2Type = new HashMap<String, String>();
Analysis analysis = masterPage.getCurrentModelElement();
EList<ModelElement> elements = analysis.getContext().getAnalysedElements();
for (ModelElement me : elements) {
inputColumnNames.add(me.getName());
if (me instanceof MetadataColumn) {
columnName2Type.put(me.getName(), ((MetadataColumn) me).getTalendType());
}
}
dialog.setInputColumnNames(inputColumnNames);
dialog.setColumnName2Type(columnName2Type);
AnalysisResult anaResults = analysis.getResults();
if (anaResults != null) {
for (Indicator ind : anaResults.getIndicators()) {
if (ind != null && ind instanceof RecordMatchingIndicator) {
RecordMatchingIndicator rmInd = (RecordMatchingIndicator) ind;
MatchRuleDefinition builtInMatchRuleDefinition = rmInd.getBuiltInMatchRuleDefinition();
if (builtInMatchRuleDefinition != null) {
if (builtInMatchRuleDefinition.getBlockKeys() != null && builtInMatchRuleDefinition.getBlockKeys().size() > 0) {
List<String> blockKeyName = new ArrayList<String>();
for (BlockKeyDefinition blockKey : builtInMatchRuleDefinition.getBlockKeys()) {
blockKeyName.add(blockKey.getName());
}
dialog.setCurrentAnaBlockKeys(blockKeyName);
}
List<String> matchKeysName = new ArrayList<String>();
for (MatchRule matchRule : builtInMatchRuleDefinition.getMatchRules()) {
EList<MatchKeyDefinition> matchKeys = matchRule.getMatchKeys();
for (MatchKeyDefinition mkd : matchKeys) {
// same
if (!matchKeysName.contains(mkd.getName())) {
matchKeysName.add(mkd.getName());
}
}
}
dialog.setAnalysisCurrentMatchKeys(matchKeysName);
List<String> pdsdKeysName = new ArrayList<String>();
for (ParticularDefaultSurvivorshipDefinitions pdsd : builtInMatchRuleDefinition.getParticularDefaultSurvivorshipDefinitions()) {
pdsdKeysName.add(pdsd.getColumn());
}
dialog.setAnalysisCurrentParticularColumns(pdsdKeysName);
}
}
}
}
dialog.create();
if (dialog.open() == Window.OK) {
Object[] results = dialog.getResult();
for (Object obj : results) {
if (obj instanceof RuleRepNode) {
RuleRepNode node = (RuleRepNode) obj;
MatchRuleDefinition matchRule = (MatchRuleDefinition) node.getRule();
if (matchRule != null) {
updateMatchRule(matchRule, dialog.isOverwrite());
}
}
}
}
}
Aggregations