Search in sources :

Example 1 with SurvivorshipFunction

use of org.talend.dataquality.record.linkage.grouping.swoosh.SurvivorShipAlgorithmParams.SurvivorshipFunction in project tdq-studio-se by Talend.

the class AnalysisRecordGroupingUtilsTest method testCreateSurvivorShipAlgorithmParams.

/**
 * Test method for
 * {@link org.talend.dq.analysis.AnalysisRecordGroupingUtils#createSurvivorShipAlgorithmParams(org.talend.dataquality.record.linkage.grouping.AnalysisMatchRecordGrouping, org.talend.dataquality.indicators.columnset.RecordMatchingIndicator, Map)
 * .
 */
@Test
public void testCreateSurvivorShipAlgorithmParams() {
    // Survivorshipkey
    RecordMatchingIndicator recordMatchingIndicator = ColumnsetFactory.eINSTANCE.createRecordMatchingIndicator();
    MatchRuleDefinition createMatchRuleDefinition = RulesFactory.eINSTANCE.createMatchRuleDefinition();
    recordMatchingIndicator.setBuiltInMatchRuleDefinition(createMatchRuleDefinition);
    EList<SurvivorshipKeyDefinition> survivorshipKeys = createMatchRuleDefinition.getSurvivorshipKeys();
    AlgorithmDefinition createAlgorithmDefinition = RulesFactory.eINSTANCE.createAlgorithmDefinition();
    // $NON-NLS-1$
    createAlgorithmDefinition.setAlgorithmParameters("");
    // $NON-NLS-1$
    createAlgorithmDefinition.setAlgorithmType("Longest");
    // $NON-NLS-1$
    survivorshipKeys.add(createKeyDefinition("a1", createAlgorithmDefinition));
    createAlgorithmDefinition = RulesFactory.eINSTANCE.createAlgorithmDefinition();
    // $NON-NLS-1$
    createAlgorithmDefinition.setAlgorithmParameters("");
    // $NON-NLS-1$
    createAlgorithmDefinition.setAlgorithmType("Longest");
    // $NON-NLS-1$
    survivorshipKeys.add(createKeyDefinition("a2", createAlgorithmDefinition));
    // DefaultSurvivorship
    EList<DefaultSurvivorshipDefinition> defaultSurvivorshipDefinitions = createMatchRuleDefinition.getDefaultSurvivorshipDefinitions();
    createAlgorithmDefinition = RulesFactory.eINSTANCE.createAlgorithmDefinition();
    // $NON-NLS-1$
    createAlgorithmDefinition.setAlgorithmParameters("");
    // $NON-NLS-1$
    createAlgorithmDefinition.setAlgorithmType("MostCommon");
    // $NON-NLS-1$
    defaultSurvivorshipDefinitions.add(createDefaultsurvivShip("String", createAlgorithmDefinition));
    // DefaultSurvivorship
    EList<ParticularDefaultSurvivorshipDefinitions> particularDefaultSurvivorshipDefinitions = createMatchRuleDefinition.getParticularDefaultSurvivorshipDefinitions();
    createAlgorithmDefinition = RulesFactory.eINSTANCE.createAlgorithmDefinition();
    // $NON-NLS-1$
    createAlgorithmDefinition.setAlgorithmParameters("");
    // $NON-NLS-1$
    createAlgorithmDefinition.setAlgorithmType("Concatenate");
    particularDefaultSurvivorshipDefinitions.add(createParticularDefaultSurvivorshipDefinitions("a2", // $NON-NLS-1$
    createAlgorithmDefinition));
    // init columnMap
    Map<MetadataColumn, String> columnMap = new HashMap<MetadataColumn, String>();
    MetadataColumn col0 = ConnectionFactory.eINSTANCE.createMetadataColumn();
    // $NON-NLS-1$
    col0.setName("a1");
    // $NON-NLS-1$
    col0.setTalendType("id_String");
    // $NON-NLS-1$
    columnMap.put(col0, "0");
    MetadataColumn col1 = ConnectionFactory.eINSTANCE.createMetadataColumn();
    // $NON-NLS-1$
    col1.setTalendType("id_String");
    // $NON-NLS-1$
    col1.setName("a2");
    // $NON-NLS-1$
    columnMap.put(col1, "1");
    MetadataColumn col2 = ConnectionFactory.eINSTANCE.createMetadataColumn();
    // $NON-NLS-1$
    col2.setTalendType("id_String");
    // $NON-NLS-1$
    col2.setName("a3");
    // $NON-NLS-1$
    columnMap.put(col2, "2");
    MatchGroupResultConsumer matchGroupResultConsumer = new MatchGroupResultConsumer(true) {

        @Override
        public void handle(Object row) {
        // no need to implement
        }
    };
    AnalysisMatchRecordGrouping analysisMatchRecordGrouping = new AnalysisMatchRecordGrouping(matchGroupResultConsumer);
    CombinedRecordMatcher combinedRecordMatcher = analysisMatchRecordGrouping.getCombinedRecordMatcher();
    DQMFBRecordMatcher dqmfbRecordMatcher = new DQMFBRecordMatcher(0.9);
    combinedRecordMatcher.getMatchers().add(dqmfbRecordMatcher);
    List<List<Map<String, String>>> multiMatchRules = analysisMatchRecordGrouping.getMultiMatchRules();
    List<Map<String, String>> matchRuleList = new ArrayList<Map<String, String>>();
    Map<String, String> matchKeyMap1 = new HashMap<String, String>();
    Map<String, String> matchKeyMap2 = new HashMap<String, String>();
    Map<String, String> matchKeyMap3 = new HashMap<String, String>();
    matchKeyMap1.put(IRecordGrouping.MATCHING_TYPE, AttributeMatcherType.EXACT.name());
    // change by
    matchKeyMap2.put(IRecordGrouping.MATCHING_TYPE, AttributeMatcherType.DUMMY.name());
    // DefaultSurvivorshipDefinitions
    // change by
    matchKeyMap3.put(IRecordGrouping.MATCHING_TYPE, AttributeMatcherType.DUMMY.name());
    // ParticularDefaultSurvivorshipDefinitions
    matchRuleList.add(matchKeyMap1);
    matchRuleList.add(matchKeyMap2);
    matchRuleList.add(matchKeyMap3);
    // $NON-NLS-1$
    matchKeyMap1.put(IRecordGrouping.MATCH_KEY_NAME, "a1");
    // $NON-NLS-1$
    matchKeyMap2.put(IRecordGrouping.MATCH_KEY_NAME, "a2");
    // $NON-NLS-1$
    matchKeyMap3.put(IRecordGrouping.MATCH_KEY_NAME, "a3");
    // $NON-NLS-1$
    matchKeyMap1.put(IRecordGrouping.COLUMN_IDX, "0");
    // $NON-NLS-1$
    matchKeyMap2.put(IRecordGrouping.COLUMN_IDX, "1");
    // $NON-NLS-1$
    matchKeyMap3.put(IRecordGrouping.COLUMN_IDX, "2");
    multiMatchRules.add(matchRuleList);
    SurvivorShipAlgorithmParams createSurvivorShipAlgorithmParams = AnalysisRecordGroupingUtils.createSurvivorShipAlgorithmParams(analysisMatchRecordGrouping, recordMatchingIndicator, columnMap);
    // $NON-NLS-1$
    Assert.assertEquals(// $NON-NLS-1$
    "The size of SurvivorShipAlgos should be 2", // $NON-NLS-1$
    2, createSurvivorShipAlgorithmParams.getSurviorShipAlgos().length);
    // the size of default survivorshipRules is come from by (column size * default item size)
    Assert.assertEquals("The size of DefaultSurviorshipRules should be 3", 3, // $NON-NLS-1$
    createSurvivorShipAlgorithmParams.getDefaultSurviorshipRules().size());
    Map<IRecordMatcher, SurvivorshipFunction[]> survivorshipAlgosMap = createSurvivorShipAlgorithmParams.getSurvivorshipAlgosMap();
    // $NON-NLS-1$
    Assert.assertEquals("The size of survivorshipAlgosMap should be 1", 1, survivorshipAlgosMap.size());
    SurvivorshipFunction[] survivorshipFunctions = survivorshipAlgosMap.get(dqmfbRecordMatcher);
    // $NON-NLS-1$
    Assert.assertEquals("The size of survivorshipFunctions should be 3", 3, survivorshipFunctions.length);
    // $NON-NLS-1$
    Assert.assertEquals(// $NON-NLS-1$
    "The Algorithm of a1 function should be LONGEST", // $NON-NLS-1$
    SurvivorShipAlgorithmEnum.LONGEST, survivorshipFunctions[0].getSurvivorShipAlgoEnum());
    // $NON-NLS-1$
    Assert.assertEquals(// $NON-NLS-1$
    "The Algorithm of a2 function should be Concatenate", // $NON-NLS-1$
    SurvivorShipAlgorithmEnum.CONCATENATE, survivorshipFunctions[1].getSurvivorShipAlgoEnum());
    // $NON-NLS-1$
    Assert.assertEquals(// $NON-NLS-1$
    "The Algorithm of a3 function should be MostCommon", // $NON-NLS-1$
    SurvivorShipAlgorithmEnum.MOST_COMMON, survivorshipFunctions[2].getSurvivorShipAlgoEnum());
}
Also used : AnalysisMatchRecordGrouping(org.talend.dataquality.record.linkage.grouping.AnalysisMatchRecordGrouping) IRecordMatcher(org.talend.dataquality.record.linkage.record.IRecordMatcher) HashMap(java.util.HashMap) DefaultSurvivorshipDefinition(org.talend.dataquality.rules.DefaultSurvivorshipDefinition) SurvivorShipAlgorithmParams(org.talend.dataquality.record.linkage.grouping.swoosh.SurvivorShipAlgorithmParams) ArrayList(java.util.ArrayList) MatchRuleDefinition(org.talend.dataquality.rules.MatchRuleDefinition) DQMFBRecordMatcher(org.talend.dataquality.record.linkage.grouping.swoosh.DQMFBRecordMatcher) SurvivorshipFunction(org.talend.dataquality.record.linkage.grouping.swoosh.SurvivorShipAlgorithmParams.SurvivorshipFunction) MetadataColumn(org.talend.core.model.metadata.builder.connection.MetadataColumn) AlgorithmDefinition(org.talend.dataquality.rules.AlgorithmDefinition) ArrayList(java.util.ArrayList) EList(org.eclipse.emf.common.util.EList) List(java.util.List) ParticularDefaultSurvivorshipDefinitions(org.talend.dataquality.rules.ParticularDefaultSurvivorshipDefinitions) SurvivorshipKeyDefinition(org.talend.dataquality.rules.SurvivorshipKeyDefinition) RecordMatchingIndicator(org.talend.dataquality.indicators.columnset.RecordMatchingIndicator) MatchGroupResultConsumer(org.talend.dataquality.record.linkage.grouping.MatchGroupResultConsumer) HashMap(java.util.HashMap) Map(java.util.Map) CombinedRecordMatcher(org.talend.dataquality.record.linkage.record.CombinedRecordMatcher) Test(org.junit.Test)

Example 2 with SurvivorshipFunction

use of org.talend.dataquality.record.linkage.grouping.swoosh.SurvivorShipAlgorithmParams.SurvivorshipFunction in project tdq-studio-se by Talend.

the class AnalysisRecordGroupingUtils method putNewSurvFunc.

/**
 * Create a new surv function and put it into map given column index as the key.
 *
 * @param columnMap
 * @param survivorShipAlgorithmParams
 * @param defaultSurvRules
 * @param metaColumn
 * @param defSurvDef
 */
private static void putNewSurvFunc(Map<MetadataColumn, String> columnMap, SurvivorShipAlgorithmParams survivorShipAlgorithmParams, Map<Integer, SurvivorshipFunction> defaultSurvRules, MetadataColumn metaColumn, DefaultSurvivorshipDefinition defSurvDef) {
    SurvivorshipFunction survFunc = survivorShipAlgorithmParams.new SurvivorshipFunction();
    survFunc.setParameter(defSurvDef.getFunction().getAlgorithmParameters());
    survFunc.setSurvivorShipAlgoEnum(SurvivorShipAlgorithmEnum.getTypeBySavedValue(defSurvDef.getFunction().getAlgorithmType()));
    defaultSurvRules.put(Integer.valueOf(columnMap.get(metaColumn)), survFunc);
}
Also used : SurvivorshipFunction(org.talend.dataquality.record.linkage.grouping.swoosh.SurvivorShipAlgorithmParams.SurvivorshipFunction)

Example 3 with SurvivorshipFunction

use of org.talend.dataquality.record.linkage.grouping.swoosh.SurvivorShipAlgorithmParams.SurvivorshipFunction in project tdq-studio-se by Talend.

the class AnalysisMatchParameterAdapter method getAllSurvivorshipFunctions.

/*
     * (non-Javadoc)
     * 
     * @see org.talend.dataquality.record.linkage.grouping.adapter.MatchParameterAdapter#getAllSurvivorshipFunctions()
     */
@Override
public List<SurvivorshipFunction> getAllSurvivorshipFunctions() {
    List<SurvivorshipFunction> survFunctions = new ArrayList<SurvivorshipFunction>();
    // Survivorship functions.
    List<SurvivorshipKeyDefinition> survivorshipKeyDefs = recordMatchingIndicator.getBuiltInMatchRuleDefinition().getSurvivorshipKeys();
    for (SurvivorshipKeyDefinition survDef : survivorshipKeyDefs) {
        SurvivorshipFunction func = new SurvivorShipAlgorithmParams().new SurvivorshipFunction();
        func.setSurvivorShipKey(survDef.getName());
        func.setParameter(survDef.getFunction().getAlgorithmParameters());
        func.setSurvivorShipAlgoEnum(SurvivorShipAlgorithmEnum.getTypeBySavedValue(survDef.getFunction().getAlgorithmType()));
        survFunctions.add(func);
    }
    return survFunctions;
}
Also used : SurvivorShipAlgorithmParams(org.talend.dataquality.record.linkage.grouping.swoosh.SurvivorShipAlgorithmParams) ArrayList(java.util.ArrayList) SurvivorshipKeyDefinition(org.talend.dataquality.rules.SurvivorshipKeyDefinition) SurvivorshipFunction(org.talend.dataquality.record.linkage.grouping.swoosh.SurvivorShipAlgorithmParams.SurvivorshipFunction)

Example 4 with SurvivorshipFunction

use of org.talend.dataquality.record.linkage.grouping.swoosh.SurvivorShipAlgorithmParams.SurvivorshipFunction in project tdq-studio-se by Talend.

the class AnalysisMatchParameterAdapter method getDefaultSurviorShipRules.

/*
     * (non-Javadoc)
     * 
     * @see org.talend.dataquality.record.linkage.grouping.adapter.MatchParameterAdapter#getDefaultSurviorShipRules()
     */
@Override
public Map<Integer, SurvivorshipFunction> getDefaultSurviorShipRules() {
    Map<Integer, SurvivorshipFunction> defaultSurvRules = new HashMap<Integer, SurvivorshipFunction>();
    // Set default survivorship functions.
    List<DefaultSurvivorshipDefinition> defSurvDefs = recordMatchingIndicator.getBuiltInMatchRuleDefinition().getDefaultSurvivorshipDefinitions();
    // consisder ParticularDefaultSurvivorshipDefinitions too
    EList<ParticularDefaultSurvivorshipDefinitions> particularDefaultSurvivorshipDefinitions = recordMatchingIndicator.getBuiltInMatchRuleDefinition().getParticularDefaultSurvivorshipDefinitions();
    for (MetadataColumn metaColumn : columnMap.keySet()) {
        String dataTypeName = metaColumn.getTalendType();
        for (ParticularDefaultSurvivorshipDefinitions pdefaultSurvivdef : particularDefaultSurvivorshipDefinitions) {
            if (pdefaultSurvivdef.getColumn().equals(metaColumn.getName())) {
                putNewSurvFunc(columnMap, defaultSurvRules, metaColumn, pdefaultSurvivdef);
                break;
            }
        }
        // default survivorship has been handle by Particular
        if (defaultSurvRules.get(Integer.valueOf(columnMap.get(metaColumn))) != null) {
            continue;
        }
        for (DefaultSurvivorshipDefinition defSurvDef : defSurvDefs) {
            // type before judging if they are equal
            if (StringUtils.equals(dataTypeName, "id_" + defSurvDef.getDataType()) || StringUtils.equals(defSurvDef.getDataType(), "Number") && JavaTypesManager.isNumber(dataTypeName)) {
                // $NON-NLS-1$
                putNewSurvFunc(columnMap, defaultSurvRules, metaColumn, defSurvDef);
                break;
            }
        }
    // End for: if no func defined, then the value will be taken from one of the records in a group (1st
    // one ).
    }
    return defaultSurvRules;
}
Also used : MetadataColumn(org.talend.core.model.metadata.builder.connection.MetadataColumn) ParticularDefaultSurvivorshipDefinitions(org.talend.dataquality.rules.ParticularDefaultSurvivorshipDefinitions) HashMap(java.util.HashMap) DefaultSurvivorshipDefinition(org.talend.dataquality.rules.DefaultSurvivorshipDefinition) SurvivorshipFunction(org.talend.dataquality.record.linkage.grouping.swoosh.SurvivorShipAlgorithmParams.SurvivorshipFunction)

Example 5 with SurvivorshipFunction

use of org.talend.dataquality.record.linkage.grouping.swoosh.SurvivorShipAlgorithmParams.SurvivorshipFunction in project tdq-studio-se by Talend.

the class AnalysisMatchParameterAdapter method putNewSurvFunc.

/**
 * Create a new surv function and put it into map given column index as the key.
 *
 * @param columnMap
 * @param survivorShipAlgorithmParams
 * @param defaultSurvRules
 * @param metaColumn
 * @param defSurvDef
 */
private static void putNewSurvFunc(Map<MetadataColumn, String> columnMap, Map<Integer, SurvivorshipFunction> defaultSurvRules, MetadataColumn metaColumn, DefaultSurvivorshipDefinition defSurvDef) {
    SurvivorshipFunction survFunc = new SurvivorShipAlgorithmParams().new SurvivorshipFunction();
    survFunc.setParameter(defSurvDef.getFunction().getAlgorithmParameters());
    survFunc.setSurvivorShipAlgoEnum(SurvivorShipAlgorithmEnum.getTypeBySavedValue(defSurvDef.getFunction().getAlgorithmType()));
    defaultSurvRules.put(Integer.valueOf(columnMap.get(metaColumn)), survFunc);
}
Also used : SurvivorShipAlgorithmParams(org.talend.dataquality.record.linkage.grouping.swoosh.SurvivorShipAlgorithmParams) SurvivorshipFunction(org.talend.dataquality.record.linkage.grouping.swoosh.SurvivorShipAlgorithmParams.SurvivorshipFunction)

Aggregations

SurvivorshipFunction (org.talend.dataquality.record.linkage.grouping.swoosh.SurvivorShipAlgorithmParams.SurvivorshipFunction)6 SurvivorShipAlgorithmParams (org.talend.dataquality.record.linkage.grouping.swoosh.SurvivorShipAlgorithmParams)4 ArrayList (java.util.ArrayList)3 HashMap (java.util.HashMap)3 List (java.util.List)2 Map (java.util.Map)2 EList (org.eclipse.emf.common.util.EList)2 MetadataColumn (org.talend.core.model.metadata.builder.connection.MetadataColumn)2 IRecordMatcher (org.talend.dataquality.record.linkage.record.IRecordMatcher)2 DefaultSurvivorshipDefinition (org.talend.dataquality.rules.DefaultSurvivorshipDefinition)2 ParticularDefaultSurvivorshipDefinitions (org.talend.dataquality.rules.ParticularDefaultSurvivorshipDefinitions)2 SurvivorshipKeyDefinition (org.talend.dataquality.rules.SurvivorshipKeyDefinition)2 Test (org.junit.Test)1 RecordMatchingIndicator (org.talend.dataquality.indicators.columnset.RecordMatchingIndicator)1 AnalysisMatchRecordGrouping (org.talend.dataquality.record.linkage.grouping.AnalysisMatchRecordGrouping)1 MatchGroupResultConsumer (org.talend.dataquality.record.linkage.grouping.MatchGroupResultConsumer)1 DQMFBRecordMatcher (org.talend.dataquality.record.linkage.grouping.swoosh.DQMFBRecordMatcher)1 CombinedRecordMatcher (org.talend.dataquality.record.linkage.record.CombinedRecordMatcher)1 AlgorithmDefinition (org.talend.dataquality.rules.AlgorithmDefinition)1 MatchRuleDefinition (org.talend.dataquality.rules.MatchRuleDefinition)1