use of org.talend.dataquality.rules.ParticularDefaultSurvivorshipDefinitions in project tdq-studio-se by Talend.
the class AnalysisRecordGroupingUtilsTest method testCreateSurvivorShipAlgorithmParams.
/**
* Test method for
* {@link org.talend.dq.analysis.AnalysisRecordGroupingUtils#createSurvivorShipAlgorithmParams(org.talend.dataquality.record.linkage.grouping.AnalysisMatchRecordGrouping, org.talend.dataquality.indicators.columnset.RecordMatchingIndicator, Map)
* .
*/
@Test
public void testCreateSurvivorShipAlgorithmParams() {
// Survivorshipkey
RecordMatchingIndicator recordMatchingIndicator = ColumnsetFactory.eINSTANCE.createRecordMatchingIndicator();
MatchRuleDefinition createMatchRuleDefinition = RulesFactory.eINSTANCE.createMatchRuleDefinition();
recordMatchingIndicator.setBuiltInMatchRuleDefinition(createMatchRuleDefinition);
EList<SurvivorshipKeyDefinition> survivorshipKeys = createMatchRuleDefinition.getSurvivorshipKeys();
AlgorithmDefinition createAlgorithmDefinition = RulesFactory.eINSTANCE.createAlgorithmDefinition();
// $NON-NLS-1$
createAlgorithmDefinition.setAlgorithmParameters("");
// $NON-NLS-1$
createAlgorithmDefinition.setAlgorithmType("Longest");
// $NON-NLS-1$
survivorshipKeys.add(createKeyDefinition("a1", createAlgorithmDefinition));
createAlgorithmDefinition = RulesFactory.eINSTANCE.createAlgorithmDefinition();
// $NON-NLS-1$
createAlgorithmDefinition.setAlgorithmParameters("");
// $NON-NLS-1$
createAlgorithmDefinition.setAlgorithmType("Longest");
// $NON-NLS-1$
survivorshipKeys.add(createKeyDefinition("a2", createAlgorithmDefinition));
// DefaultSurvivorship
EList<DefaultSurvivorshipDefinition> defaultSurvivorshipDefinitions = createMatchRuleDefinition.getDefaultSurvivorshipDefinitions();
createAlgorithmDefinition = RulesFactory.eINSTANCE.createAlgorithmDefinition();
// $NON-NLS-1$
createAlgorithmDefinition.setAlgorithmParameters("");
// $NON-NLS-1$
createAlgorithmDefinition.setAlgorithmType("MostCommon");
// $NON-NLS-1$
defaultSurvivorshipDefinitions.add(createDefaultsurvivShip("String", createAlgorithmDefinition));
// DefaultSurvivorship
EList<ParticularDefaultSurvivorshipDefinitions> particularDefaultSurvivorshipDefinitions = createMatchRuleDefinition.getParticularDefaultSurvivorshipDefinitions();
createAlgorithmDefinition = RulesFactory.eINSTANCE.createAlgorithmDefinition();
// $NON-NLS-1$
createAlgorithmDefinition.setAlgorithmParameters("");
// $NON-NLS-1$
createAlgorithmDefinition.setAlgorithmType("Concatenate");
particularDefaultSurvivorshipDefinitions.add(createParticularDefaultSurvivorshipDefinitions("a2", // $NON-NLS-1$
createAlgorithmDefinition));
// init columnMap
Map<MetadataColumn, String> columnMap = new HashMap<MetadataColumn, String>();
MetadataColumn col0 = ConnectionFactory.eINSTANCE.createMetadataColumn();
// $NON-NLS-1$
col0.setName("a1");
// $NON-NLS-1$
col0.setTalendType("id_String");
// $NON-NLS-1$
columnMap.put(col0, "0");
MetadataColumn col1 = ConnectionFactory.eINSTANCE.createMetadataColumn();
// $NON-NLS-1$
col1.setTalendType("id_String");
// $NON-NLS-1$
col1.setName("a2");
// $NON-NLS-1$
columnMap.put(col1, "1");
MetadataColumn col2 = ConnectionFactory.eINSTANCE.createMetadataColumn();
// $NON-NLS-1$
col2.setTalendType("id_String");
// $NON-NLS-1$
col2.setName("a3");
// $NON-NLS-1$
columnMap.put(col2, "2");
MatchGroupResultConsumer matchGroupResultConsumer = new MatchGroupResultConsumer(true) {
@Override
public void handle(Object row) {
// no need to implement
}
};
AnalysisMatchRecordGrouping analysisMatchRecordGrouping = new AnalysisMatchRecordGrouping(matchGroupResultConsumer);
CombinedRecordMatcher combinedRecordMatcher = analysisMatchRecordGrouping.getCombinedRecordMatcher();
DQMFBRecordMatcher dqmfbRecordMatcher = new DQMFBRecordMatcher(0.9);
combinedRecordMatcher.getMatchers().add(dqmfbRecordMatcher);
List<List<Map<String, String>>> multiMatchRules = analysisMatchRecordGrouping.getMultiMatchRules();
List<Map<String, String>> matchRuleList = new ArrayList<Map<String, String>>();
Map<String, String> matchKeyMap1 = new HashMap<String, String>();
Map<String, String> matchKeyMap2 = new HashMap<String, String>();
Map<String, String> matchKeyMap3 = new HashMap<String, String>();
matchKeyMap1.put(IRecordGrouping.MATCHING_TYPE, AttributeMatcherType.EXACT.name());
// change by
matchKeyMap2.put(IRecordGrouping.MATCHING_TYPE, AttributeMatcherType.DUMMY.name());
// DefaultSurvivorshipDefinitions
// change by
matchKeyMap3.put(IRecordGrouping.MATCHING_TYPE, AttributeMatcherType.DUMMY.name());
// ParticularDefaultSurvivorshipDefinitions
matchRuleList.add(matchKeyMap1);
matchRuleList.add(matchKeyMap2);
matchRuleList.add(matchKeyMap3);
// $NON-NLS-1$
matchKeyMap1.put(IRecordGrouping.MATCH_KEY_NAME, "a1");
// $NON-NLS-1$
matchKeyMap2.put(IRecordGrouping.MATCH_KEY_NAME, "a2");
// $NON-NLS-1$
matchKeyMap3.put(IRecordGrouping.MATCH_KEY_NAME, "a3");
// $NON-NLS-1$
matchKeyMap1.put(IRecordGrouping.COLUMN_IDX, "0");
// $NON-NLS-1$
matchKeyMap2.put(IRecordGrouping.COLUMN_IDX, "1");
// $NON-NLS-1$
matchKeyMap3.put(IRecordGrouping.COLUMN_IDX, "2");
multiMatchRules.add(matchRuleList);
SurvivorShipAlgorithmParams createSurvivorShipAlgorithmParams = AnalysisRecordGroupingUtils.createSurvivorShipAlgorithmParams(analysisMatchRecordGrouping, recordMatchingIndicator, columnMap);
// $NON-NLS-1$
Assert.assertEquals(// $NON-NLS-1$
"The size of SurvivorShipAlgos should be 2", // $NON-NLS-1$
2, createSurvivorShipAlgorithmParams.getSurviorShipAlgos().length);
// the size of default survivorshipRules is come from by (column size * default item size)
Assert.assertEquals("The size of DefaultSurviorshipRules should be 3", 3, // $NON-NLS-1$
createSurvivorShipAlgorithmParams.getDefaultSurviorshipRules().size());
Map<IRecordMatcher, SurvivorshipFunction[]> survivorshipAlgosMap = createSurvivorShipAlgorithmParams.getSurvivorshipAlgosMap();
// $NON-NLS-1$
Assert.assertEquals("The size of survivorshipAlgosMap should be 1", 1, survivorshipAlgosMap.size());
SurvivorshipFunction[] survivorshipFunctions = survivorshipAlgosMap.get(dqmfbRecordMatcher);
// $NON-NLS-1$
Assert.assertEquals("The size of survivorshipFunctions should be 3", 3, survivorshipFunctions.length);
// $NON-NLS-1$
Assert.assertEquals(// $NON-NLS-1$
"The Algorithm of a1 function should be LONGEST", // $NON-NLS-1$
SurvivorShipAlgorithmEnum.LONGEST, survivorshipFunctions[0].getSurvivorShipAlgoEnum());
// $NON-NLS-1$
Assert.assertEquals(// $NON-NLS-1$
"The Algorithm of a2 function should be Concatenate", // $NON-NLS-1$
SurvivorShipAlgorithmEnum.CONCATENATE, survivorshipFunctions[1].getSurvivorShipAlgoEnum());
// $NON-NLS-1$
Assert.assertEquals(// $NON-NLS-1$
"The Algorithm of a3 function should be MostCommon", // $NON-NLS-1$
SurvivorShipAlgorithmEnum.MOST_COMMON, survivorshipFunctions[2].getSurvivorShipAlgoEnum());
}
use of org.talend.dataquality.rules.ParticularDefaultSurvivorshipDefinitions in project tdq-studio-se by Talend.
the class AnalysisRecordGroupingUtilsTest method createParticularDefaultSurvivorshipDefinitions.
/**
* Create ParticularDefaultSurvivorshipDefinitions
*
* @return
*/
private ParticularDefaultSurvivorshipDefinitions createParticularDefaultSurvivorshipDefinitions(String colName, AlgorithmDefinition algorDef) {
ParticularDefaultSurvivorshipDefinitions createParticularDefaultSurvivorshipDefinition = RulesFactory.eINSTANCE.createParticularDefaultSurvivorshipDefinitions();
createParticularDefaultSurvivorshipDefinition.setFunction(algorDef);
createParticularDefaultSurvivorshipDefinition.setColumn(colName);
return createParticularDefaultSurvivorshipDefinition;
}
use of org.talend.dataquality.rules.ParticularDefaultSurvivorshipDefinitions in project tdq-studio-se by Talend.
the class AnalysisMatchParameterAdapter method getDefaultSurviorShipRules.
/*
* (non-Javadoc)
*
* @see org.talend.dataquality.record.linkage.grouping.adapter.MatchParameterAdapter#getDefaultSurviorShipRules()
*/
@Override
public Map<Integer, SurvivorshipFunction> getDefaultSurviorShipRules() {
Map<Integer, SurvivorshipFunction> defaultSurvRules = new HashMap<Integer, SurvivorshipFunction>();
// Set default survivorship functions.
List<DefaultSurvivorshipDefinition> defSurvDefs = recordMatchingIndicator.getBuiltInMatchRuleDefinition().getDefaultSurvivorshipDefinitions();
// consisder ParticularDefaultSurvivorshipDefinitions too
EList<ParticularDefaultSurvivorshipDefinitions> particularDefaultSurvivorshipDefinitions = recordMatchingIndicator.getBuiltInMatchRuleDefinition().getParticularDefaultSurvivorshipDefinitions();
for (MetadataColumn metaColumn : columnMap.keySet()) {
String dataTypeName = metaColumn.getTalendType();
for (ParticularDefaultSurvivorshipDefinitions pdefaultSurvivdef : particularDefaultSurvivorshipDefinitions) {
if (pdefaultSurvivdef.getColumn().equals(metaColumn.getName())) {
putNewSurvFunc(columnMap, defaultSurvRules, metaColumn, pdefaultSurvivdef);
break;
}
}
// default survivorship has been handle by Particular
if (defaultSurvRules.get(Integer.valueOf(columnMap.get(metaColumn))) != null) {
continue;
}
for (DefaultSurvivorshipDefinition defSurvDef : defSurvDefs) {
// type before judging if they are equal
if (StringUtils.equals(dataTypeName, "id_" + defSurvDef.getDataType()) || StringUtils.equals(defSurvDef.getDataType(), "Number") && JavaTypesManager.isNumber(dataTypeName)) {
// $NON-NLS-1$
putNewSurvFunc(columnMap, defaultSurvRules, metaColumn, defSurvDef);
break;
}
}
// End for: if no func defined, then the value will be taken from one of the records in a group (1st
// one ).
}
return defaultSurvRules;
}
use of org.talend.dataquality.rules.ParticularDefaultSurvivorshipDefinitions in project tdq-studio-se by Talend.
the class ParticularDefSurshipDefinitionSection method removeTableItem.
@Override
public void removeTableItem() {
boolean success = false;
ISelection selectItems = tableComposite.getSelectItems();
if (selectItems instanceof StructuredSelection) {
Iterator<ParticularDefaultSurvivorshipDefinitions> iterator = ((StructuredSelection) selectItems).iterator();
while (iterator.hasNext()) {
ParticularDefaultSurvivorshipDefinitions next = iterator.next();
tableComposite.removeKeyDefinition(next, matchRuleDef.getParticularDefaultSurvivorshipDefinitions());
success = true;
}
if (success) {
listeners.firePropertyChange(MatchAnalysisConstant.ISDIRTY_PROPERTY, true, false);
}
}
}
use of org.talend.dataquality.rules.ParticularDefaultSurvivorshipDefinitions in project tdq-studio-se by Talend.
the class ParticularDefSurshipDefinitionSection method importParticularSurvivorshipFunctions.
/**
* import the DefaultSurvivorshipFunctions, if overwrite, clear the DefaultSurvivorshipFunctions before import.
*
* @param matchRuleDef
* @param overwrite
*/
@SuppressWarnings("unchecked")
public void importParticularSurvivorshipFunctions(MatchRuleDefinition matchRuleDef, boolean overwrite) {
EList<ParticularDefaultSurvivorshipDefinitions> functions = null;
Object input = tableComposite.getInput();
if (input != null) {
functions = (EList<ParticularDefaultSurvivorshipDefinitions>) input;
}
if (functions == null) {
functions = new BasicEList<ParticularDefaultSurvivorshipDefinitions>();
}
if (overwrite) {
functions.clear();
}
for (ParticularDefaultSurvivorshipDefinitions def : matchRuleDef.getParticularDefaultSurvivorshipDefinitions()) {
// note that here must copy first then modify it else will impact original element
ParticularDefaultSurvivorshipDefinitions copyDef = EcoreUtil.copy(def);
setColumnValueIfMatch(copyDef);
boolean checkFunctionValid = checkFunctionValid(copyDef);
if (!checkFunctionValid) {
copyDef.getFunction().setAlgorithmType(SurvivorShipAlgorithmEnum.MOST_COMMON.getComponentValueName());
copyDef.getFunction().setAlgorithmParameters(StringUtils.EMPTY);
}
functions.add(copyDef);
}
tableComposite.setInput(functions);
}
Aggregations