use of org.talend.dataquality.rules.MatchKeyDefinition in project tdq-studio-se by Talend.
the class ExecuteMatchRuleHandlerTest method testExecute4.
/**
* Test method for
* {@link org.talend.dq.analysis.ExecuteMatchRuleHandler#execute(java.util.Map, org.talend.dataquality.indicators.columnset.RecordMatchingIndicator, java.util.List, org.talend.dataquality.indicators.columnset.BlockKeyIndicator)}
* .
*
* same to case 3 but the match rule order is exchange
*/
@Test
public void testExecute4() {
Map<MetadataColumn, String> columnMap = new HashMap<MetadataColumn, String>();
MetadataColumn col0 = ConnectionFactory.eINSTANCE.createMetadataColumn();
col0.setName(columnName0);
// $NON-NLS-1$
columnMap.put(col0, "0");
MetadataColumn col1 = ConnectionFactory.eINSTANCE.createMetadataColumn();
col1.setName(columnName1);
// $NON-NLS-1$
columnMap.put(col1, "1");
MetadataColumn col2 = ConnectionFactory.eINSTANCE.createMetadataColumn();
col2.setName(columnName2);
// $NON-NLS-1$
columnMap.put(col2, "2");
MetadataColumn col3 = ConnectionFactory.eINSTANCE.createMetadataColumn();
col3.setName(columnName3);
// $NON-NLS-1$
columnMap.put(col3, "3");
RecordMatchingIndicator recordMatchingIndicator = ColumnsetFactory.eINSTANCE.createRecordMatchingIndicator();
MatchRuleDefinition matchRuleDef = RulesPackage.eINSTANCE.getRulesFactory().createMatchRuleDefinition();
recordMatchingIndicator.setBuiltInMatchRuleDefinition(matchRuleDef);
// create match rule
MatchRule matchRule2 = RulesFactory.eINSTANCE.createMatchRule();
MatchKeyDefinition createMatchKeyDefinition2 = RulesFactory.eINSTANCE.createMatchKeyDefinition();
matchRule2.getMatchKeys().add(createMatchKeyDefinition2);
createMatchKeyDefinition2.setColumn(columnName3);
createMatchKeyDefinition2.setConfidenceWeight(1);
// $NON-NLS-1$
createMatchKeyDefinition2.setName("rule1.matchkey1");
createMatchKeyDefinition2.setHandleNull(HandleNullEnum.NULL_MATCH_NULL.getValue());
AlgorithmDefinition createAlgorithmDefinition2 = RulesFactory.eINSTANCE.createAlgorithmDefinition();
createAlgorithmDefinition2.setAlgorithmType(AttributeMatcherType.EXACT.name());
createMatchKeyDefinition2.setAlgorithm(createAlgorithmDefinition2);
matchRuleDef.getMatchRules().add(matchRule2);
// create match rule
MatchRule matchRule1 = RulesFactory.eINSTANCE.createMatchRule();
MatchKeyDefinition createMatchKeyDefinition1 = RulesFactory.eINSTANCE.createMatchKeyDefinition();
matchRule1.getMatchKeys().add(createMatchKeyDefinition1);
createMatchKeyDefinition1.setColumn(columnName2);
createMatchKeyDefinition1.setConfidenceWeight(1);
// $NON-NLS-1$
createMatchKeyDefinition1.setName("rule1.matchkey1");
createMatchKeyDefinition1.setHandleNull(HandleNullEnum.NULL_MATCH_NULL.getValue());
AlgorithmDefinition createAlgorithmDefinition1 = RulesFactory.eINSTANCE.createAlgorithmDefinition();
createAlgorithmDefinition1.setAlgorithmType(AttributeMatcherType.EXACT.name());
createMatchKeyDefinition1.setAlgorithm(createAlgorithmDefinition1);
matchRuleDef.getMatchRules().add(matchRule1);
// create block key
BlockKeyDefinition createBlockKeyDefinition = RulesFactory.eINSTANCE.createBlockKeyDefinition();
createBlockKeyDefinition.setColumn(columnName1);
// $NON-NLS-1$
createBlockKeyDefinition.setName("blockKey1");
// setPreAlgorithm
AlgorithmDefinition blockPreAlgorithm = RulesFactory.eINSTANCE.createAlgorithmDefinition();
blockPreAlgorithm.setAlgorithmType(BlockingKeyPreAlgorithmEnum.NON_ALGO.getValue());
createBlockKeyDefinition.setPreAlgorithm(blockPreAlgorithm);
// setAlgorithm
AlgorithmDefinition blockAlgorithm = RulesFactory.eINSTANCE.createAlgorithmDefinition();
blockAlgorithm.setAlgorithmType(BlockingKeyAlgorithmEnum.EXACT.getValue());
createBlockKeyDefinition.setAlgorithm(blockAlgorithm);
// setPostAlgorithm
AlgorithmDefinition blockPostAlgorithm = RulesFactory.eINSTANCE.createAlgorithmDefinition();
blockPostAlgorithm.setAlgorithmType(BlockingKeyPostAlgorithmEnum.NON_ALGO.getValue());
createBlockKeyDefinition.setPostAlgorithm(blockPostAlgorithm);
matchRuleDef.getBlockKeys().add(createBlockKeyDefinition);
List<Object[]> matchRows = new ArrayList<Object[]>();
// $NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
matchRows.add(new String[] { "id1", "name1", "number1", "date1" });
// $NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
matchRows.add(new String[] { "id2", "name1", "number2", "date1" });
// $NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
matchRows.add(new String[] { "id3", "name2", "number2", "date3" });
// $NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
matchRows.add(new String[] { "id4", "name2", "number2", "date1" });
BlockKeyIndicator blockKeyIndicator = ColumnsetFactory.eINSTANCE.createBlockKeyIndicator();
ExecuteMatchRuleHandler execHandler = new ExecuteMatchRuleHandler();
MatchGroupResultConsumer matchResultConsumer = createMatchGroupResultConsumer(columnMap, recordMatchingIndicator);
TypedReturnCode<MatchGroupResultConsumer> executeResult = execHandler.execute(columnMap, recordMatchingIndicator, matchRows, blockKeyIndicator, matchResultConsumer);
Assert.assertTrue(executeResult.isOk());
Assert.assertTrue(executeResult.getMessage() == null);
Assert.assertTrue(executeResult.getObject() != null);
MatchGroupResultConsumer ResultConsumer = executeResult.getObject();
List<Object[]> fullMatchResult = ResultConsumer.getFullMatchResult();
Assert.assertTrue(fullMatchResult.size() == 4);
for (int i = 0; i < fullMatchResult.size(); i++) {
Object[] objectArray = fullMatchResult.get(i);
Object masterValue = objectArray[7];
Object idValue = objectArray[0];
// id2 is because of matchRule1 id4 is because of matchRule2
if ("id2".equals(idValue) || "id4".equals(idValue)) {
// $NON-NLS-1$ //$NON-NLS-2$
Assert.assertFalse(Boolean.parseBoolean(masterValue.toString()));
} else {
Assert.assertTrue(Boolean.parseBoolean(masterValue.toString()));
}
}
}
use of org.talend.dataquality.rules.MatchKeyDefinition in project tdq-studio-se by Talend.
the class ExecuteMatchRuleHandlerTest method testExecute1.
/**
* Test method for
* {@link org.talend.dq.analysis.ExecuteMatchRuleHandler#execute(java.util.Map, org.talend.dataquality.indicators.columnset.RecordMatchingIndicator, java.util.List, org.talend.dataquality.indicators.columnset.BlockKeyIndicator)}
* .
*
* no block key one match key
*/
@Test
public void testExecute1() {
Map<MetadataColumn, String> columnMap = new HashMap<MetadataColumn, String>();
MetadataColumn col0 = ConnectionFactory.eINSTANCE.createMetadataColumn();
col0.setName(columnName0);
// $NON-NLS-1$
columnMap.put(col0, "0");
MetadataColumn col1 = ConnectionFactory.eINSTANCE.createMetadataColumn();
col1.setName(columnName1);
// $NON-NLS-1$
columnMap.put(col1, "1");
MetadataColumn col2 = ConnectionFactory.eINSTANCE.createMetadataColumn();
col2.setName(columnName2);
// $NON-NLS-1$
columnMap.put(col2, "2");
MetadataColumn col3 = ConnectionFactory.eINSTANCE.createMetadataColumn();
col3.setName(columnName3);
// $NON-NLS-1$
columnMap.put(col3, "3");
// create match key
RecordMatchingIndicator recordMatchingIndicator = ColumnsetFactory.eINSTANCE.createRecordMatchingIndicator();
MatchRuleDefinition matchRuleDef = RulesPackage.eINSTANCE.getRulesFactory().createMatchRuleDefinition();
recordMatchingIndicator.setBuiltInMatchRuleDefinition(matchRuleDef);
MatchRule createMatchRule1 = RulesFactory.eINSTANCE.createMatchRule();
MatchKeyDefinition createMatchKeyDefinition1 = RulesFactory.eINSTANCE.createMatchKeyDefinition();
createMatchRule1.getMatchKeys().add(createMatchKeyDefinition1);
createMatchKeyDefinition1.setColumn(columnName0);
createMatchKeyDefinition1.setConfidenceWeight(1);
// $NON-NLS-1$
createMatchKeyDefinition1.setName("rule1.matchkey1");
createMatchKeyDefinition1.setHandleNull(HandleNullEnum.NULL_MATCH_NULL.getValue());
AlgorithmDefinition createAlgorithmDefinition1 = RulesFactory.eINSTANCE.createAlgorithmDefinition();
createAlgorithmDefinition1.setAlgorithmType(AttributeMatcherType.EXACT.name());
createMatchKeyDefinition1.setAlgorithm(createAlgorithmDefinition1);
matchRuleDef.getMatchRules().add(createMatchRule1);
// input data
List<Object[]> matchRows = new ArrayList<Object[]>();
// $NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
matchRows.add(new String[] { "id1", "name1", "number1", "date1" });
// $NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
matchRows.add(new String[] { "id2", "name2", "number2", "date2" });
// $NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
matchRows.add(new String[] { "id3", "name1", "number3", "date3" });
// $NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
matchRows.add(new String[] { "id4", "name4", "number2", "date1" });
BlockKeyIndicator blockKeyIndicator = ColumnsetFactory.eINSTANCE.createBlockKeyIndicator();
ExecuteMatchRuleHandler execHandler = new ExecuteMatchRuleHandler();
MatchGroupResultConsumer matchResultConsumer = createMatchGroupResultConsumer(columnMap, recordMatchingIndicator);
TypedReturnCode<MatchGroupResultConsumer> executeResult = execHandler.execute(columnMap, recordMatchingIndicator, matchRows, blockKeyIndicator, matchResultConsumer);
Assert.assertTrue(executeResult.isOk());
Assert.assertTrue(executeResult.getMessage() == null);
Assert.assertTrue(executeResult.getObject() != null);
MatchGroupResultConsumer ResultConsumer = executeResult.getObject();
List<Object[]> fullMatchResult = ResultConsumer.getFullMatchResult();
Assert.assertTrue(fullMatchResult.size() == 4);
// every input data is master data
for (Object[] objectArray : fullMatchResult) {
Object object = objectArray[7];
Assert.assertTrue(Boolean.parseBoolean(object.toString()));
}
}
use of org.talend.dataquality.rules.MatchKeyDefinition in project tdq-studio-se by Talend.
the class AnaMatchSurvivorSection method generateSurvivorKeyByMatchKey.
protected List<MatchKeyAndSurvivorDefinition> generateSurvivorKeyByMatchKey(MatchRule matchRule, boolean isMustCreateSurvivor) {
List<MatchKeyAndSurvivorDefinition> matchAndSurvivorKeyList = matchRuleWithSurvMap.get(matchRule);
if (matchAndSurvivorKeyList == null) {
matchAndSurvivorKeyList = new ArrayList<MatchKeyAndSurvivorDefinition>();
matchRuleWithSurvMap.put(matchRule, matchAndSurvivorKeyList);
}
EList<MatchKeyDefinition> matchKeys = matchRule.getMatchKeys();
int index = 0;
for (MatchKeyDefinition matchKey : matchKeys) {
// first, find the current matchKey in MatchAndSurvivorKeyList
if (matchAndSurvivorKeyList.size() > index) {
MatchKeyAndSurvivorDefinition definition = matchAndSurvivorKeyList.get(index);
// check if the position of the match key moved or not
if (StringUtils.equals(matchKey.getName(), definition.getMatchKey().getName())) {
// update the current match key
definition.setMatchKey(matchKey);
updateSurvivorKey(isMustCreateSurvivor, matchKey.getName(), definition);
} else {
// the position of the current match key moved, need to find its related mAndS key in list,
MatchKeyAndSurvivorDefinition oldDefinition = findPositionOfCurrentMatchkey(matchKey, matchAndSurvivorKeyList);
// if can't find, means that it is a new one
if (oldDefinition == null) {
createMatchAndSurvivorKey(matchKey, isMustCreateSurvivor, matchAndSurvivorKeyList);
} else {
// delete the old definition in current list
matchAndSurvivorKeyList.remove(oldDefinition);
// set new match key to it
oldDefinition.setMatchKey(matchKey);
updateSurvivorKey(isMustCreateSurvivor, matchKey.getName(), oldDefinition);
// insert it in the new position
matchAndSurvivorKeyList.add(index, oldDefinition);
}
}
} else {
// need to create a MatchAndSurvivorKey
createMatchAndSurvivorKey(matchKey, isMustCreateSurvivor, matchAndSurvivorKeyList);
}
index++;
}
return matchAndSurvivorKeyList;
}
use of org.talend.dataquality.rules.MatchKeyDefinition in project tdq-studio-se by Talend.
the class MatchingKeySection method createMatchRuleByCopy.
// copy a match rule to a new one
private MatchRule createMatchRuleByCopy(MatchRule oldRule) {
MatchRule newRule = RulesFactory.eINSTANCE.createMatchRule();
if (oldRule.getMatchKeys() != null && oldRule.getMatchKeys().size() > 0) {
for (MatchKeyDefinition matchKey : oldRule.getMatchKeys()) {
newRule.getMatchKeys().add(EcoreUtil.copy(matchKey));
}
}
newRule.setMatchInterval(oldRule.getMatchInterval());
return newRule;
}
use of org.talend.dataquality.rules.MatchKeyDefinition in project tdq-studio-se by Talend.
the class MatchKeyAndSurvivorshipTableViewer method createNewKeyDefinition.
/*
* (non-Javadoc)
*
* @see
* org.talend.dataquality.record.linkage.ui.composite.tableviewer.definition.MatchKeyAndSurvivorshipTableViewer#
* createNewKeyDefinition(java.lang.String)
*/
@Override
protected MatchKeyAndSurvivorDefinition createNewKeyDefinition(String columnName) {
MatchKeyDefinition matchKeyDefinition = MatchRuleAnlaysisUtils.createDefaultMatchRow(columnName);
SurvivorshipKeyDefinition survivorshipKeyDefinition = createNewSurvivorshipKeyDefinition(columnName);
MatchKeyAndSurvivorDefinition matchKeySurvDef = new MatchKeyAndSurvivorDefinition();
matchKeySurvDef.setMatchKey(matchKeyDefinition);
matchKeySurvDef.setSurvivorShipKey(survivorshipKeyDefinition);
matchRule.getMatchKeys().add(matchKeySurvDef.getMatchKey());
((MatchRuleDefinition) matchRule.eContainer()).getSurvivorshipKeys().add(matchKeySurvDef.getSurvivorShipKey());
return matchKeySurvDef;
}
Aggregations