use of org.talend.dataquality.indicators.columnset.RecordMatchingIndicator in project tdq-studio-se by Talend.
the class MatchAnalysisDetailsPage method createSelectRecordLinkageSection.
private void createSelectRecordLinkageSection() {
selectAlgorithmSection = new AnalysisSelectionAlgorithmSection(form, topComp, toolkit);
RecordMatchingIndicator recordMatchingIndicator = MatchRuleAnlaysisUtils.getRecordMatchIndicatorFromAna(getCurrentModelElement());
selectAlgorithmSection.setMatchRuleDef(recordMatchingIndicator.getBuiltInMatchRuleDefinition());
selectAlgorithmSection.createChooseAlgorithmCom();
selectAlgorithmSection.addPropertyChangeListener(this);
selectAlgorithmSection.getSection().setExpanded(getExpandedStatus(selectAlgorithmSection.getSection().getText()));
registerSection(selectAlgorithmSection.getSection());
}
use of org.talend.dataquality.indicators.columnset.RecordMatchingIndicator in project tdq-studio-se by Talend.
the class MatchAnalysisDetailsPage method createMatchAndSurvivorKeySection.
private void createMatchAndSurvivorKeySection() {
matchAndSurvivorKeySection = new AnaMatchSurvivorSection(form, topComp, Section.TWISTIE | Section.TITLE_BAR | Section.EXPANDED, toolkit, getCurrentModelElement());
RecordMatchingIndicator recordMatchingIndicator = MatchRuleAnlaysisUtils.getRecordMatchIndicatorFromAna(getCurrentModelElement());
matchAndSurvivorKeySection.setMatchRuleDef(recordMatchingIndicator.getBuiltInMatchRuleDefinition());
matchAndSurvivorKeySection.setAddColumn(!selectAlgorithmSection.isVSRMode());
matchAndSurvivorKeySection.setColumnNameInput(getAllColumnsToKeyMap());
matchAndSurvivorKeySection.createContent();
registerSection(matchAndSurvivorKeySection.getSection());
matchAndSurvivorKeySection.addPropertyChangeListener(this);
matchAndSurvivorKeySection.changeSectionDisStatus(!selectAlgorithmSection.isVSRMode());
matchAndSurvivorKeySection.getSection().setExpanded(getExpandedStatus(matchAndSurvivorKeySection.getSection().getText()));
matchAndSurvivorKeySection.setIsNeedSubChart(true);
selectAlgorithmSection.setAnaMatchSurvivorSection(matchAndSurvivorKeySection);
if (selectAlgorithmSection.isVSRMode()) {
// Hide the section in case of vsr.
matchAndSurvivorKeySection.changeSectionDisStatus(false);
} else {
matchAndSurvivorKeySection.redrawnContent();
}
}
use of org.talend.dataquality.indicators.columnset.RecordMatchingIndicator in project tdq-studio-se by Talend.
the class MatchAnalysisDetailsPage method createParticularDefaultSurvivorshipSection.
/**
* Create section of ParticularDefaultSurvivorship table
*/
private void createParticularDefaultSurvivorshipSection() {
particularDefaultSurvivorshipSection = new ParticularDefSurshipDefinitionSection(form, topComp, toolkit);
RecordMatchingIndicator recordMatchingIndicator = MatchRuleAnlaysisUtils.getRecordMatchIndicatorFromAna(getCurrentModelElement());
particularDefaultSurvivorshipSection.setMatchRuleDef(recordMatchingIndicator.getBuiltInMatchRuleDefinition());
particularDefaultSurvivorshipSection.setColumnNameInput(getAllColumnsToKeyMap());
particularDefaultSurvivorshipSection.createContent();
registerSection(particularDefaultSurvivorshipSection.getSection());
particularDefaultSurvivorshipSection.addPropertyChangeListener(this);
particularDefaultSurvivorshipSection.changeSectionDisStatus(!selectAlgorithmSection.isVSRMode());
particularDefaultSurvivorshipSection.getSection().setExpanded(getExpandedStatus(particularDefaultSurvivorshipSection.getSection().getText()));
selectAlgorithmSection.setParticularDefaultSurvivorshipSection(particularDefaultSurvivorshipSection);
}
use of org.talend.dataquality.indicators.columnset.RecordMatchingIndicator in project tdq-studio-se by Talend.
the class AnalysisRecordGroupingUtilsTest method testCreateSurvivorShipAlgorithmParams.
/**
* Test method for
* {@link org.talend.dq.analysis.AnalysisRecordGroupingUtils#createSurvivorShipAlgorithmParams(org.talend.dataquality.record.linkage.grouping.AnalysisMatchRecordGrouping, org.talend.dataquality.indicators.columnset.RecordMatchingIndicator, Map)
* .
*/
@Test
public void testCreateSurvivorShipAlgorithmParams() {
// Survivorshipkey
RecordMatchingIndicator recordMatchingIndicator = ColumnsetFactory.eINSTANCE.createRecordMatchingIndicator();
MatchRuleDefinition createMatchRuleDefinition = RulesFactory.eINSTANCE.createMatchRuleDefinition();
recordMatchingIndicator.setBuiltInMatchRuleDefinition(createMatchRuleDefinition);
EList<SurvivorshipKeyDefinition> survivorshipKeys = createMatchRuleDefinition.getSurvivorshipKeys();
AlgorithmDefinition createAlgorithmDefinition = RulesFactory.eINSTANCE.createAlgorithmDefinition();
// $NON-NLS-1$
createAlgorithmDefinition.setAlgorithmParameters("");
// $NON-NLS-1$
createAlgorithmDefinition.setAlgorithmType("Longest");
// $NON-NLS-1$
survivorshipKeys.add(createKeyDefinition("a1", createAlgorithmDefinition));
createAlgorithmDefinition = RulesFactory.eINSTANCE.createAlgorithmDefinition();
// $NON-NLS-1$
createAlgorithmDefinition.setAlgorithmParameters("");
// $NON-NLS-1$
createAlgorithmDefinition.setAlgorithmType("Longest");
// $NON-NLS-1$
survivorshipKeys.add(createKeyDefinition("a2", createAlgorithmDefinition));
// DefaultSurvivorship
EList<DefaultSurvivorshipDefinition> defaultSurvivorshipDefinitions = createMatchRuleDefinition.getDefaultSurvivorshipDefinitions();
createAlgorithmDefinition = RulesFactory.eINSTANCE.createAlgorithmDefinition();
// $NON-NLS-1$
createAlgorithmDefinition.setAlgorithmParameters("");
// $NON-NLS-1$
createAlgorithmDefinition.setAlgorithmType("MostCommon");
// $NON-NLS-1$
defaultSurvivorshipDefinitions.add(createDefaultsurvivShip("String", createAlgorithmDefinition));
// DefaultSurvivorship
EList<ParticularDefaultSurvivorshipDefinitions> particularDefaultSurvivorshipDefinitions = createMatchRuleDefinition.getParticularDefaultSurvivorshipDefinitions();
createAlgorithmDefinition = RulesFactory.eINSTANCE.createAlgorithmDefinition();
// $NON-NLS-1$
createAlgorithmDefinition.setAlgorithmParameters("");
// $NON-NLS-1$
createAlgorithmDefinition.setAlgorithmType("Concatenate");
particularDefaultSurvivorshipDefinitions.add(createParticularDefaultSurvivorshipDefinitions("a2", // $NON-NLS-1$
createAlgorithmDefinition));
// init columnMap
Map<MetadataColumn, String> columnMap = new HashMap<MetadataColumn, String>();
MetadataColumn col0 = ConnectionFactory.eINSTANCE.createMetadataColumn();
// $NON-NLS-1$
col0.setName("a1");
// $NON-NLS-1$
col0.setTalendType("id_String");
// $NON-NLS-1$
columnMap.put(col0, "0");
MetadataColumn col1 = ConnectionFactory.eINSTANCE.createMetadataColumn();
// $NON-NLS-1$
col1.setTalendType("id_String");
// $NON-NLS-1$
col1.setName("a2");
// $NON-NLS-1$
columnMap.put(col1, "1");
MetadataColumn col2 = ConnectionFactory.eINSTANCE.createMetadataColumn();
// $NON-NLS-1$
col2.setTalendType("id_String");
// $NON-NLS-1$
col2.setName("a3");
// $NON-NLS-1$
columnMap.put(col2, "2");
MatchGroupResultConsumer matchGroupResultConsumer = new MatchGroupResultConsumer(true) {
@Override
public void handle(Object row) {
// no need to implement
}
};
AnalysisMatchRecordGrouping analysisMatchRecordGrouping = new AnalysisMatchRecordGrouping(matchGroupResultConsumer);
CombinedRecordMatcher combinedRecordMatcher = analysisMatchRecordGrouping.getCombinedRecordMatcher();
DQMFBRecordMatcher dqmfbRecordMatcher = new DQMFBRecordMatcher(0.9);
combinedRecordMatcher.getMatchers().add(dqmfbRecordMatcher);
List<List<Map<String, String>>> multiMatchRules = analysisMatchRecordGrouping.getMultiMatchRules();
List<Map<String, String>> matchRuleList = new ArrayList<Map<String, String>>();
Map<String, String> matchKeyMap1 = new HashMap<String, String>();
Map<String, String> matchKeyMap2 = new HashMap<String, String>();
Map<String, String> matchKeyMap3 = new HashMap<String, String>();
matchKeyMap1.put(IRecordGrouping.MATCHING_TYPE, AttributeMatcherType.EXACT.name());
// change by
matchKeyMap2.put(IRecordGrouping.MATCHING_TYPE, AttributeMatcherType.DUMMY.name());
// DefaultSurvivorshipDefinitions
// change by
matchKeyMap3.put(IRecordGrouping.MATCHING_TYPE, AttributeMatcherType.DUMMY.name());
// ParticularDefaultSurvivorshipDefinitions
matchRuleList.add(matchKeyMap1);
matchRuleList.add(matchKeyMap2);
matchRuleList.add(matchKeyMap3);
// $NON-NLS-1$
matchKeyMap1.put(IRecordGrouping.MATCH_KEY_NAME, "a1");
// $NON-NLS-1$
matchKeyMap2.put(IRecordGrouping.MATCH_KEY_NAME, "a2");
// $NON-NLS-1$
matchKeyMap3.put(IRecordGrouping.MATCH_KEY_NAME, "a3");
// $NON-NLS-1$
matchKeyMap1.put(IRecordGrouping.COLUMN_IDX, "0");
// $NON-NLS-1$
matchKeyMap2.put(IRecordGrouping.COLUMN_IDX, "1");
// $NON-NLS-1$
matchKeyMap3.put(IRecordGrouping.COLUMN_IDX, "2");
multiMatchRules.add(matchRuleList);
SurvivorShipAlgorithmParams createSurvivorShipAlgorithmParams = AnalysisRecordGroupingUtils.createSurvivorShipAlgorithmParams(analysisMatchRecordGrouping, recordMatchingIndicator, columnMap);
// $NON-NLS-1$
Assert.assertEquals(// $NON-NLS-1$
"The size of SurvivorShipAlgos should be 2", // $NON-NLS-1$
2, createSurvivorShipAlgorithmParams.getSurviorShipAlgos().length);
// the size of default survivorshipRules is come from by (column size * default item size)
Assert.assertEquals("The size of DefaultSurviorshipRules should be 3", 3, // $NON-NLS-1$
createSurvivorShipAlgorithmParams.getDefaultSurviorshipRules().size());
Map<IRecordMatcher, SurvivorshipFunction[]> survivorshipAlgosMap = createSurvivorShipAlgorithmParams.getSurvivorshipAlgosMap();
// $NON-NLS-1$
Assert.assertEquals("The size of survivorshipAlgosMap should be 1", 1, survivorshipAlgosMap.size());
SurvivorshipFunction[] survivorshipFunctions = survivorshipAlgosMap.get(dqmfbRecordMatcher);
// $NON-NLS-1$
Assert.assertEquals("The size of survivorshipFunctions should be 3", 3, survivorshipFunctions.length);
// $NON-NLS-1$
Assert.assertEquals(// $NON-NLS-1$
"The Algorithm of a1 function should be LONGEST", // $NON-NLS-1$
SurvivorShipAlgorithmEnum.LONGEST, survivorshipFunctions[0].getSurvivorShipAlgoEnum());
// $NON-NLS-1$
Assert.assertEquals(// $NON-NLS-1$
"The Algorithm of a2 function should be Concatenate", // $NON-NLS-1$
SurvivorShipAlgorithmEnum.CONCATENATE, survivorshipFunctions[1].getSurvivorShipAlgoEnum());
// $NON-NLS-1$
Assert.assertEquals(// $NON-NLS-1$
"The Algorithm of a3 function should be MostCommon", // $NON-NLS-1$
SurvivorShipAlgorithmEnum.MOST_COMMON, survivorshipFunctions[2].getSurvivorShipAlgoEnum());
}
use of org.talend.dataquality.indicators.columnset.RecordMatchingIndicator in project tdq-studio-se by Talend.
the class ExecuteMatchRuleHandlerTest method testExecute4.
/**
* Test method for
* {@link org.talend.dq.analysis.ExecuteMatchRuleHandler#execute(java.util.Map, org.talend.dataquality.indicators.columnset.RecordMatchingIndicator, java.util.List, org.talend.dataquality.indicators.columnset.BlockKeyIndicator)}
* .
*
* same to case 3 but the match rule order is exchange
*/
@Test
public void testExecute4() {
Map<MetadataColumn, String> columnMap = new HashMap<MetadataColumn, String>();
MetadataColumn col0 = ConnectionFactory.eINSTANCE.createMetadataColumn();
col0.setName(columnName0);
// $NON-NLS-1$
columnMap.put(col0, "0");
MetadataColumn col1 = ConnectionFactory.eINSTANCE.createMetadataColumn();
col1.setName(columnName1);
// $NON-NLS-1$
columnMap.put(col1, "1");
MetadataColumn col2 = ConnectionFactory.eINSTANCE.createMetadataColumn();
col2.setName(columnName2);
// $NON-NLS-1$
columnMap.put(col2, "2");
MetadataColumn col3 = ConnectionFactory.eINSTANCE.createMetadataColumn();
col3.setName(columnName3);
// $NON-NLS-1$
columnMap.put(col3, "3");
RecordMatchingIndicator recordMatchingIndicator = ColumnsetFactory.eINSTANCE.createRecordMatchingIndicator();
MatchRuleDefinition matchRuleDef = RulesPackage.eINSTANCE.getRulesFactory().createMatchRuleDefinition();
recordMatchingIndicator.setBuiltInMatchRuleDefinition(matchRuleDef);
// create match rule
MatchRule matchRule2 = RulesFactory.eINSTANCE.createMatchRule();
MatchKeyDefinition createMatchKeyDefinition2 = RulesFactory.eINSTANCE.createMatchKeyDefinition();
matchRule2.getMatchKeys().add(createMatchKeyDefinition2);
createMatchKeyDefinition2.setColumn(columnName3);
createMatchKeyDefinition2.setConfidenceWeight(1);
// $NON-NLS-1$
createMatchKeyDefinition2.setName("rule1.matchkey1");
createMatchKeyDefinition2.setHandleNull(HandleNullEnum.NULL_MATCH_NULL.getValue());
AlgorithmDefinition createAlgorithmDefinition2 = RulesFactory.eINSTANCE.createAlgorithmDefinition();
createAlgorithmDefinition2.setAlgorithmType(AttributeMatcherType.EXACT.name());
createMatchKeyDefinition2.setAlgorithm(createAlgorithmDefinition2);
matchRuleDef.getMatchRules().add(matchRule2);
// create match rule
MatchRule matchRule1 = RulesFactory.eINSTANCE.createMatchRule();
MatchKeyDefinition createMatchKeyDefinition1 = RulesFactory.eINSTANCE.createMatchKeyDefinition();
matchRule1.getMatchKeys().add(createMatchKeyDefinition1);
createMatchKeyDefinition1.setColumn(columnName2);
createMatchKeyDefinition1.setConfidenceWeight(1);
// $NON-NLS-1$
createMatchKeyDefinition1.setName("rule1.matchkey1");
createMatchKeyDefinition1.setHandleNull(HandleNullEnum.NULL_MATCH_NULL.getValue());
AlgorithmDefinition createAlgorithmDefinition1 = RulesFactory.eINSTANCE.createAlgorithmDefinition();
createAlgorithmDefinition1.setAlgorithmType(AttributeMatcherType.EXACT.name());
createMatchKeyDefinition1.setAlgorithm(createAlgorithmDefinition1);
matchRuleDef.getMatchRules().add(matchRule1);
// create block key
BlockKeyDefinition createBlockKeyDefinition = RulesFactory.eINSTANCE.createBlockKeyDefinition();
createBlockKeyDefinition.setColumn(columnName1);
// $NON-NLS-1$
createBlockKeyDefinition.setName("blockKey1");
// setPreAlgorithm
AlgorithmDefinition blockPreAlgorithm = RulesFactory.eINSTANCE.createAlgorithmDefinition();
blockPreAlgorithm.setAlgorithmType(BlockingKeyPreAlgorithmEnum.NON_ALGO.getValue());
createBlockKeyDefinition.setPreAlgorithm(blockPreAlgorithm);
// setAlgorithm
AlgorithmDefinition blockAlgorithm = RulesFactory.eINSTANCE.createAlgorithmDefinition();
blockAlgorithm.setAlgorithmType(BlockingKeyAlgorithmEnum.EXACT.getValue());
createBlockKeyDefinition.setAlgorithm(blockAlgorithm);
// setPostAlgorithm
AlgorithmDefinition blockPostAlgorithm = RulesFactory.eINSTANCE.createAlgorithmDefinition();
blockPostAlgorithm.setAlgorithmType(BlockingKeyPostAlgorithmEnum.NON_ALGO.getValue());
createBlockKeyDefinition.setPostAlgorithm(blockPostAlgorithm);
matchRuleDef.getBlockKeys().add(createBlockKeyDefinition);
List<Object[]> matchRows = new ArrayList<Object[]>();
// $NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
matchRows.add(new String[] { "id1", "name1", "number1", "date1" });
// $NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
matchRows.add(new String[] { "id2", "name1", "number2", "date1" });
// $NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
matchRows.add(new String[] { "id3", "name2", "number2", "date3" });
// $NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
matchRows.add(new String[] { "id4", "name2", "number2", "date1" });
BlockKeyIndicator blockKeyIndicator = ColumnsetFactory.eINSTANCE.createBlockKeyIndicator();
ExecuteMatchRuleHandler execHandler = new ExecuteMatchRuleHandler();
MatchGroupResultConsumer matchResultConsumer = createMatchGroupResultConsumer(columnMap, recordMatchingIndicator);
TypedReturnCode<MatchGroupResultConsumer> executeResult = execHandler.execute(columnMap, recordMatchingIndicator, matchRows, blockKeyIndicator, matchResultConsumer);
Assert.assertTrue(executeResult.isOk());
Assert.assertTrue(executeResult.getMessage() == null);
Assert.assertTrue(executeResult.getObject() != null);
MatchGroupResultConsumer ResultConsumer = executeResult.getObject();
List<Object[]> fullMatchResult = ResultConsumer.getFullMatchResult();
Assert.assertTrue(fullMatchResult.size() == 4);
for (int i = 0; i < fullMatchResult.size(); i++) {
Object[] objectArray = fullMatchResult.get(i);
Object masterValue = objectArray[7];
Object idValue = objectArray[0];
// id2 is because of matchRule1 id4 is because of matchRule2
if ("id2".equals(idValue) || "id4".equals(idValue)) {
// $NON-NLS-1$ //$NON-NLS-2$
Assert.assertFalse(Boolean.parseBoolean(masterValue.toString()));
} else {
Assert.assertTrue(Boolean.parseBoolean(masterValue.toString()));
}
}
}
Aggregations