Search in sources :

Example 6 with SurvivorshipFunction

use of org.talend.dataquality.record.linkage.grouping.swoosh.SurvivorShipAlgorithmParams.SurvivorshipFunction in project tdq-studio-se by Talend.

the class AnalysisMatchParameterAdapter method getSurvivorshipAlgosMap.

/*
     * (non-Javadoc)
     * 
     * @see org.talend.dataquality.record.linkage.grouping.adapter.MatchParameterAdapter#getSurvivorshipAlgosMap(java.util.Map)
     */
@Override
public Map<IRecordMatcher, SurvivorshipFunction[]> getSurvivorshipAlgosMap(Map<Integer, SurvivorshipFunction> colIdx2DefaultSurvFunc, List<SurvivorshipFunction> survFunctions) {
    Map<IRecordMatcher, SurvivorshipFunction[]> survAlgos = new HashMap<IRecordMatcher, SurvivorshipFunction[]>();
    int matchRuleIdx = -1;
    List<List<Map<String, String>>> multiRules = analysisMatchRecordGrouping.getMultiMatchRules();
    for (List<Map<String, String>> matchrule : multiRules) {
        matchRuleIdx++;
        if (matchrule == null) {
            continue;
        }
        SurvivorshipFunction[] surFuncsInMatcher = new SurvivorshipFunction[matchrule.size()];
        int idx = 0;
        for (Map<String, String> mkDef : matchrule) {
            String matcherType = mkDef.get(IRecordGrouping.MATCHING_TYPE);
            if (AttributeMatcherType.DUMMY.name().equalsIgnoreCase(matcherType)) {
                // Find the func from default survivorship rule.
                surFuncsInMatcher[idx] = colIdx2DefaultSurvFunc.get(Integer.valueOf(mkDef.get(IRecordGrouping.COLUMN_IDX)));
                if (surFuncsInMatcher[idx] == null) {
                    // Use CONCATENATE by default if not specified .
                    surFuncsInMatcher[idx] = new SurvivorShipAlgorithmParams().new SurvivorshipFunction();
                    surFuncsInMatcher[idx].setSurvivorShipAlgoEnum(SurvivorShipAlgorithmEnum.MOST_COMMON);
                    // MOD TDQ-11774 set a default parameter
                    surFuncsInMatcher[idx].setParameter(SurvivorshipUtils.DEFAULT_CONCATENATE_PARAMETER);
                }
            } else {
                // Find the func from existing survivorship rule list.
                for (SurvivorshipFunction survFunc : survFunctions) {
                    String keyName = mkDef.get(IRecordGrouping.MATCH_KEY_NAME);
                    if (keyName.equals(survFunc.getSurvivorShipKey())) {
                        surFuncsInMatcher[idx] = survFunc;
                        break;
                    }
                }
            }
            idx++;
        }
        // Add the funcs to a specific record matcher. NOTE that the index of matcher must be coincidence to the
        // index of match rule.
        survAlgos.put(this.getCombinedRecordMatcher().getMatchers().get(matchRuleIdx), surFuncsInMatcher);
    }
    return survAlgos;
}
Also used : IRecordMatcher(org.talend.dataquality.record.linkage.record.IRecordMatcher) HashMap(java.util.HashMap) SurvivorShipAlgorithmParams(org.talend.dataquality.record.linkage.grouping.swoosh.SurvivorShipAlgorithmParams) SurvivorshipFunction(org.talend.dataquality.record.linkage.grouping.swoosh.SurvivorShipAlgorithmParams.SurvivorshipFunction) EList(org.eclipse.emf.common.util.EList) ArrayList(java.util.ArrayList) List(java.util.List) HashMap(java.util.HashMap) Map(java.util.Map)

Aggregations

SurvivorshipFunction (org.talend.dataquality.record.linkage.grouping.swoosh.SurvivorShipAlgorithmParams.SurvivorshipFunction)6 SurvivorShipAlgorithmParams (org.talend.dataquality.record.linkage.grouping.swoosh.SurvivorShipAlgorithmParams)4 ArrayList (java.util.ArrayList)3 HashMap (java.util.HashMap)3 List (java.util.List)2 Map (java.util.Map)2 EList (org.eclipse.emf.common.util.EList)2 MetadataColumn (org.talend.core.model.metadata.builder.connection.MetadataColumn)2 IRecordMatcher (org.talend.dataquality.record.linkage.record.IRecordMatcher)2 DefaultSurvivorshipDefinition (org.talend.dataquality.rules.DefaultSurvivorshipDefinition)2 ParticularDefaultSurvivorshipDefinitions (org.talend.dataquality.rules.ParticularDefaultSurvivorshipDefinitions)2 SurvivorshipKeyDefinition (org.talend.dataquality.rules.SurvivorshipKeyDefinition)2 Test (org.junit.Test)1 RecordMatchingIndicator (org.talend.dataquality.indicators.columnset.RecordMatchingIndicator)1 AnalysisMatchRecordGrouping (org.talend.dataquality.record.linkage.grouping.AnalysisMatchRecordGrouping)1 MatchGroupResultConsumer (org.talend.dataquality.record.linkage.grouping.MatchGroupResultConsumer)1 DQMFBRecordMatcher (org.talend.dataquality.record.linkage.grouping.swoosh.DQMFBRecordMatcher)1 CombinedRecordMatcher (org.talend.dataquality.record.linkage.record.CombinedRecordMatcher)1 AlgorithmDefinition (org.talend.dataquality.rules.AlgorithmDefinition)1 MatchRuleDefinition (org.talend.dataquality.rules.MatchRuleDefinition)1