Search in sources :

Example 1 with MatchKeyDefinition

use of org.talend.dataquality.rules.MatchKeyDefinition in project tdq-studio-se by Talend.

the class ExecuteMatchRuleHandlerTest method testExecute4.

/**
 * Test method for
 * {@link org.talend.dq.analysis.ExecuteMatchRuleHandler#execute(java.util.Map, org.talend.dataquality.indicators.columnset.RecordMatchingIndicator, java.util.List, org.talend.dataquality.indicators.columnset.BlockKeyIndicator)}
 * .
 *
 * same to case 3 but the match rule order is exchange
 */
@Test
public void testExecute4() {
    Map<MetadataColumn, String> columnMap = new HashMap<MetadataColumn, String>();
    MetadataColumn col0 = ConnectionFactory.eINSTANCE.createMetadataColumn();
    col0.setName(columnName0);
    // $NON-NLS-1$
    columnMap.put(col0, "0");
    MetadataColumn col1 = ConnectionFactory.eINSTANCE.createMetadataColumn();
    col1.setName(columnName1);
    // $NON-NLS-1$
    columnMap.put(col1, "1");
    MetadataColumn col2 = ConnectionFactory.eINSTANCE.createMetadataColumn();
    col2.setName(columnName2);
    // $NON-NLS-1$
    columnMap.put(col2, "2");
    MetadataColumn col3 = ConnectionFactory.eINSTANCE.createMetadataColumn();
    col3.setName(columnName3);
    // $NON-NLS-1$
    columnMap.put(col3, "3");
    RecordMatchingIndicator recordMatchingIndicator = ColumnsetFactory.eINSTANCE.createRecordMatchingIndicator();
    MatchRuleDefinition matchRuleDef = RulesPackage.eINSTANCE.getRulesFactory().createMatchRuleDefinition();
    recordMatchingIndicator.setBuiltInMatchRuleDefinition(matchRuleDef);
    // create match rule
    MatchRule matchRule2 = RulesFactory.eINSTANCE.createMatchRule();
    MatchKeyDefinition createMatchKeyDefinition2 = RulesFactory.eINSTANCE.createMatchKeyDefinition();
    matchRule2.getMatchKeys().add(createMatchKeyDefinition2);
    createMatchKeyDefinition2.setColumn(columnName3);
    createMatchKeyDefinition2.setConfidenceWeight(1);
    // $NON-NLS-1$
    createMatchKeyDefinition2.setName("rule1.matchkey1");
    createMatchKeyDefinition2.setHandleNull(HandleNullEnum.NULL_MATCH_NULL.getValue());
    AlgorithmDefinition createAlgorithmDefinition2 = RulesFactory.eINSTANCE.createAlgorithmDefinition();
    createAlgorithmDefinition2.setAlgorithmType(AttributeMatcherType.EXACT.name());
    createMatchKeyDefinition2.setAlgorithm(createAlgorithmDefinition2);
    matchRuleDef.getMatchRules().add(matchRule2);
    // create match rule
    MatchRule matchRule1 = RulesFactory.eINSTANCE.createMatchRule();
    MatchKeyDefinition createMatchKeyDefinition1 = RulesFactory.eINSTANCE.createMatchKeyDefinition();
    matchRule1.getMatchKeys().add(createMatchKeyDefinition1);
    createMatchKeyDefinition1.setColumn(columnName2);
    createMatchKeyDefinition1.setConfidenceWeight(1);
    // $NON-NLS-1$
    createMatchKeyDefinition1.setName("rule1.matchkey1");
    createMatchKeyDefinition1.setHandleNull(HandleNullEnum.NULL_MATCH_NULL.getValue());
    AlgorithmDefinition createAlgorithmDefinition1 = RulesFactory.eINSTANCE.createAlgorithmDefinition();
    createAlgorithmDefinition1.setAlgorithmType(AttributeMatcherType.EXACT.name());
    createMatchKeyDefinition1.setAlgorithm(createAlgorithmDefinition1);
    matchRuleDef.getMatchRules().add(matchRule1);
    // create block key
    BlockKeyDefinition createBlockKeyDefinition = RulesFactory.eINSTANCE.createBlockKeyDefinition();
    createBlockKeyDefinition.setColumn(columnName1);
    // $NON-NLS-1$
    createBlockKeyDefinition.setName("blockKey1");
    // setPreAlgorithm
    AlgorithmDefinition blockPreAlgorithm = RulesFactory.eINSTANCE.createAlgorithmDefinition();
    blockPreAlgorithm.setAlgorithmType(BlockingKeyPreAlgorithmEnum.NON_ALGO.getValue());
    createBlockKeyDefinition.setPreAlgorithm(blockPreAlgorithm);
    // setAlgorithm
    AlgorithmDefinition blockAlgorithm = RulesFactory.eINSTANCE.createAlgorithmDefinition();
    blockAlgorithm.setAlgorithmType(BlockingKeyAlgorithmEnum.EXACT.getValue());
    createBlockKeyDefinition.setAlgorithm(blockAlgorithm);
    // setPostAlgorithm
    AlgorithmDefinition blockPostAlgorithm = RulesFactory.eINSTANCE.createAlgorithmDefinition();
    blockPostAlgorithm.setAlgorithmType(BlockingKeyPostAlgorithmEnum.NON_ALGO.getValue());
    createBlockKeyDefinition.setPostAlgorithm(blockPostAlgorithm);
    matchRuleDef.getBlockKeys().add(createBlockKeyDefinition);
    List<Object[]> matchRows = new ArrayList<Object[]>();
    // $NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
    matchRows.add(new String[] { "id1", "name1", "number1", "date1" });
    // $NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
    matchRows.add(new String[] { "id2", "name1", "number2", "date1" });
    // $NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
    matchRows.add(new String[] { "id3", "name2", "number2", "date3" });
    // $NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
    matchRows.add(new String[] { "id4", "name2", "number2", "date1" });
    BlockKeyIndicator blockKeyIndicator = ColumnsetFactory.eINSTANCE.createBlockKeyIndicator();
    ExecuteMatchRuleHandler execHandler = new ExecuteMatchRuleHandler();
    MatchGroupResultConsumer matchResultConsumer = createMatchGroupResultConsumer(columnMap, recordMatchingIndicator);
    TypedReturnCode<MatchGroupResultConsumer> executeResult = execHandler.execute(columnMap, recordMatchingIndicator, matchRows, blockKeyIndicator, matchResultConsumer);
    Assert.assertTrue(executeResult.isOk());
    Assert.assertTrue(executeResult.getMessage() == null);
    Assert.assertTrue(executeResult.getObject() != null);
    MatchGroupResultConsumer ResultConsumer = executeResult.getObject();
    List<Object[]> fullMatchResult = ResultConsumer.getFullMatchResult();
    Assert.assertTrue(fullMatchResult.size() == 4);
    for (int i = 0; i < fullMatchResult.size(); i++) {
        Object[] objectArray = fullMatchResult.get(i);
        Object masterValue = objectArray[7];
        Object idValue = objectArray[0];
        // id2 is because of matchRule1 id4 is because of matchRule2
        if ("id2".equals(idValue) || "id4".equals(idValue)) {
            // $NON-NLS-1$ //$NON-NLS-2$
            Assert.assertFalse(Boolean.parseBoolean(masterValue.toString()));
        } else {
            Assert.assertTrue(Boolean.parseBoolean(masterValue.toString()));
        }
    }
}
Also used : ExecuteMatchRuleHandler(org.talend.dq.analysis.match.ExecuteMatchRuleHandler) BlockKeyIndicator(org.talend.dataquality.indicators.columnset.BlockKeyIndicator) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) MatchRuleDefinition(org.talend.dataquality.rules.MatchRuleDefinition) MatchRule(org.talend.dataquality.rules.MatchRule) RecordMatchingIndicator(org.talend.dataquality.indicators.columnset.RecordMatchingIndicator) MetadataColumn(org.talend.core.model.metadata.builder.connection.MetadataColumn) MatchGroupResultConsumer(org.talend.dataquality.record.linkage.grouping.MatchGroupResultConsumer) MatchKeyDefinition(org.talend.dataquality.rules.MatchKeyDefinition) AlgorithmDefinition(org.talend.dataquality.rules.AlgorithmDefinition) BlockKeyDefinition(org.talend.dataquality.rules.BlockKeyDefinition) Test(org.junit.Test)

Example 2 with MatchKeyDefinition

use of org.talend.dataquality.rules.MatchKeyDefinition in project tdq-studio-se by Talend.

the class ExecuteMatchRuleHandlerTest method testExecute1.

/**
 * Test method for
 * {@link org.talend.dq.analysis.ExecuteMatchRuleHandler#execute(java.util.Map, org.talend.dataquality.indicators.columnset.RecordMatchingIndicator, java.util.List, org.talend.dataquality.indicators.columnset.BlockKeyIndicator)}
 * .
 *
 * no block key one match key
 */
@Test
public void testExecute1() {
    Map<MetadataColumn, String> columnMap = new HashMap<MetadataColumn, String>();
    MetadataColumn col0 = ConnectionFactory.eINSTANCE.createMetadataColumn();
    col0.setName(columnName0);
    // $NON-NLS-1$
    columnMap.put(col0, "0");
    MetadataColumn col1 = ConnectionFactory.eINSTANCE.createMetadataColumn();
    col1.setName(columnName1);
    // $NON-NLS-1$
    columnMap.put(col1, "1");
    MetadataColumn col2 = ConnectionFactory.eINSTANCE.createMetadataColumn();
    col2.setName(columnName2);
    // $NON-NLS-1$
    columnMap.put(col2, "2");
    MetadataColumn col3 = ConnectionFactory.eINSTANCE.createMetadataColumn();
    col3.setName(columnName3);
    // $NON-NLS-1$
    columnMap.put(col3, "3");
    // create match key
    RecordMatchingIndicator recordMatchingIndicator = ColumnsetFactory.eINSTANCE.createRecordMatchingIndicator();
    MatchRuleDefinition matchRuleDef = RulesPackage.eINSTANCE.getRulesFactory().createMatchRuleDefinition();
    recordMatchingIndicator.setBuiltInMatchRuleDefinition(matchRuleDef);
    MatchRule createMatchRule1 = RulesFactory.eINSTANCE.createMatchRule();
    MatchKeyDefinition createMatchKeyDefinition1 = RulesFactory.eINSTANCE.createMatchKeyDefinition();
    createMatchRule1.getMatchKeys().add(createMatchKeyDefinition1);
    createMatchKeyDefinition1.setColumn(columnName0);
    createMatchKeyDefinition1.setConfidenceWeight(1);
    // $NON-NLS-1$
    createMatchKeyDefinition1.setName("rule1.matchkey1");
    createMatchKeyDefinition1.setHandleNull(HandleNullEnum.NULL_MATCH_NULL.getValue());
    AlgorithmDefinition createAlgorithmDefinition1 = RulesFactory.eINSTANCE.createAlgorithmDefinition();
    createAlgorithmDefinition1.setAlgorithmType(AttributeMatcherType.EXACT.name());
    createMatchKeyDefinition1.setAlgorithm(createAlgorithmDefinition1);
    matchRuleDef.getMatchRules().add(createMatchRule1);
    // input data
    List<Object[]> matchRows = new ArrayList<Object[]>();
    // $NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
    matchRows.add(new String[] { "id1", "name1", "number1", "date1" });
    // $NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
    matchRows.add(new String[] { "id2", "name2", "number2", "date2" });
    // $NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
    matchRows.add(new String[] { "id3", "name1", "number3", "date3" });
    // $NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
    matchRows.add(new String[] { "id4", "name4", "number2", "date1" });
    BlockKeyIndicator blockKeyIndicator = ColumnsetFactory.eINSTANCE.createBlockKeyIndicator();
    ExecuteMatchRuleHandler execHandler = new ExecuteMatchRuleHandler();
    MatchGroupResultConsumer matchResultConsumer = createMatchGroupResultConsumer(columnMap, recordMatchingIndicator);
    TypedReturnCode<MatchGroupResultConsumer> executeResult = execHandler.execute(columnMap, recordMatchingIndicator, matchRows, blockKeyIndicator, matchResultConsumer);
    Assert.assertTrue(executeResult.isOk());
    Assert.assertTrue(executeResult.getMessage() == null);
    Assert.assertTrue(executeResult.getObject() != null);
    MatchGroupResultConsumer ResultConsumer = executeResult.getObject();
    List<Object[]> fullMatchResult = ResultConsumer.getFullMatchResult();
    Assert.assertTrue(fullMatchResult.size() == 4);
    // every input data is master data
    for (Object[] objectArray : fullMatchResult) {
        Object object = objectArray[7];
        Assert.assertTrue(Boolean.parseBoolean(object.toString()));
    }
}
Also used : ExecuteMatchRuleHandler(org.talend.dq.analysis.match.ExecuteMatchRuleHandler) BlockKeyIndicator(org.talend.dataquality.indicators.columnset.BlockKeyIndicator) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) MatchRuleDefinition(org.talend.dataquality.rules.MatchRuleDefinition) MatchRule(org.talend.dataquality.rules.MatchRule) RecordMatchingIndicator(org.talend.dataquality.indicators.columnset.RecordMatchingIndicator) MetadataColumn(org.talend.core.model.metadata.builder.connection.MetadataColumn) MatchGroupResultConsumer(org.talend.dataquality.record.linkage.grouping.MatchGroupResultConsumer) MatchKeyDefinition(org.talend.dataquality.rules.MatchKeyDefinition) AlgorithmDefinition(org.talend.dataquality.rules.AlgorithmDefinition) Test(org.junit.Test)

Example 3 with MatchKeyDefinition

use of org.talend.dataquality.rules.MatchKeyDefinition in project tdq-studio-se by Talend.

the class AnaMatchSurvivorSection method generateSurvivorKeyByMatchKey.

protected List<MatchKeyAndSurvivorDefinition> generateSurvivorKeyByMatchKey(MatchRule matchRule, boolean isMustCreateSurvivor) {
    List<MatchKeyAndSurvivorDefinition> matchAndSurvivorKeyList = matchRuleWithSurvMap.get(matchRule);
    if (matchAndSurvivorKeyList == null) {
        matchAndSurvivorKeyList = new ArrayList<MatchKeyAndSurvivorDefinition>();
        matchRuleWithSurvMap.put(matchRule, matchAndSurvivorKeyList);
    }
    EList<MatchKeyDefinition> matchKeys = matchRule.getMatchKeys();
    int index = 0;
    for (MatchKeyDefinition matchKey : matchKeys) {
        // first, find the current matchKey in MatchAndSurvivorKeyList
        if (matchAndSurvivorKeyList.size() > index) {
            MatchKeyAndSurvivorDefinition definition = matchAndSurvivorKeyList.get(index);
            // check if the position of the match key moved or not
            if (StringUtils.equals(matchKey.getName(), definition.getMatchKey().getName())) {
                // update the current match key
                definition.setMatchKey(matchKey);
                updateSurvivorKey(isMustCreateSurvivor, matchKey.getName(), definition);
            } else {
                // the position of the current match key moved, need to find its related mAndS key in list,
                MatchKeyAndSurvivorDefinition oldDefinition = findPositionOfCurrentMatchkey(matchKey, matchAndSurvivorKeyList);
                // if can't find, means that it is a new one
                if (oldDefinition == null) {
                    createMatchAndSurvivorKey(matchKey, isMustCreateSurvivor, matchAndSurvivorKeyList);
                } else {
                    // delete the old definition in current list
                    matchAndSurvivorKeyList.remove(oldDefinition);
                    // set new match key to it
                    oldDefinition.setMatchKey(matchKey);
                    updateSurvivorKey(isMustCreateSurvivor, matchKey.getName(), oldDefinition);
                    // insert it in the new position
                    matchAndSurvivorKeyList.add(index, oldDefinition);
                }
            }
        } else {
            // need to create a MatchAndSurvivorKey
            createMatchAndSurvivorKey(matchKey, isMustCreateSurvivor, matchAndSurvivorKeyList);
        }
        index++;
    }
    return matchAndSurvivorKeyList;
}
Also used : MatchKeyDefinition(org.talend.dataquality.rules.MatchKeyDefinition) MatchKeyAndSurvivorDefinition(org.talend.dataquality.record.linkage.ui.composite.tableviewer.definition.MatchKeyAndSurvivorDefinition)

Example 4 with MatchKeyDefinition

use of org.talend.dataquality.rules.MatchKeyDefinition in project tdq-studio-se by Talend.

the class MatchingKeySection method createMatchRuleByCopy.

// copy a match rule to a new one
private MatchRule createMatchRuleByCopy(MatchRule oldRule) {
    MatchRule newRule = RulesFactory.eINSTANCE.createMatchRule();
    if (oldRule.getMatchKeys() != null && oldRule.getMatchKeys().size() > 0) {
        for (MatchKeyDefinition matchKey : oldRule.getMatchKeys()) {
            newRule.getMatchKeys().add(EcoreUtil.copy(matchKey));
        }
    }
    newRule.setMatchInterval(oldRule.getMatchInterval());
    return newRule;
}
Also used : MatchKeyDefinition(org.talend.dataquality.rules.MatchKeyDefinition) MatchRule(org.talend.dataquality.rules.MatchRule)

Example 5 with MatchKeyDefinition

use of org.talend.dataquality.rules.MatchKeyDefinition in project tdq-studio-se by Talend.

the class MatchKeyAndSurvivorshipTableViewer method createNewKeyDefinition.

/*
     * (non-Javadoc)
     * 
     * @see
     * org.talend.dataquality.record.linkage.ui.composite.tableviewer.definition.MatchKeyAndSurvivorshipTableViewer#
     * createNewKeyDefinition(java.lang.String)
     */
@Override
protected MatchKeyAndSurvivorDefinition createNewKeyDefinition(String columnName) {
    MatchKeyDefinition matchKeyDefinition = MatchRuleAnlaysisUtils.createDefaultMatchRow(columnName);
    SurvivorshipKeyDefinition survivorshipKeyDefinition = createNewSurvivorshipKeyDefinition(columnName);
    MatchKeyAndSurvivorDefinition matchKeySurvDef = new MatchKeyAndSurvivorDefinition();
    matchKeySurvDef.setMatchKey(matchKeyDefinition);
    matchKeySurvDef.setSurvivorShipKey(survivorshipKeyDefinition);
    matchRule.getMatchKeys().add(matchKeySurvDef.getMatchKey());
    ((MatchRuleDefinition) matchRule.eContainer()).getSurvivorshipKeys().add(matchKeySurvDef.getSurvivorShipKey());
    return matchKeySurvDef;
}
Also used : MatchKeyDefinition(org.talend.dataquality.rules.MatchKeyDefinition) SurvivorshipKeyDefinition(org.talend.dataquality.rules.SurvivorshipKeyDefinition)

Aggregations

MatchKeyDefinition (org.talend.dataquality.rules.MatchKeyDefinition)19 MatchRule (org.talend.dataquality.rules.MatchRule)12 ArrayList (java.util.ArrayList)9 HashMap (java.util.HashMap)7 AlgorithmDefinition (org.talend.dataquality.rules.AlgorithmDefinition)7 MetadataColumn (org.talend.core.model.metadata.builder.connection.MetadataColumn)6 MatchRuleDefinition (org.talend.dataquality.rules.MatchRuleDefinition)6 RecordMatchingIndicator (org.talend.dataquality.indicators.columnset.RecordMatchingIndicator)5 Test (org.junit.Test)4 BlockKeyIndicator (org.talend.dataquality.indicators.columnset.BlockKeyIndicator)4 MatchGroupResultConsumer (org.talend.dataquality.record.linkage.grouping.MatchGroupResultConsumer)4 BlockKeyDefinition (org.talend.dataquality.rules.BlockKeyDefinition)4 ExecuteMatchRuleHandler (org.talend.dq.analysis.match.ExecuteMatchRuleHandler)4 Map (java.util.Map)2 TableItem (org.eclipse.swt.widgets.TableItem)2 AttributeMatcherType (org.talend.dataquality.record.linkage.constant.AttributeMatcherType)2 TokenizedResolutionMethod (org.talend.dataquality.record.linkage.constant.TokenizedResolutionMethod)2 MatchKeyAndSurvivorDefinition (org.talend.dataquality.record.linkage.ui.composite.tableviewer.definition.MatchKeyAndSurvivorDefinition)2 HandleNullEnum (org.talend.dataquality.record.linkage.utils.HandleNullEnum)2 SurvivorshipKeyDefinition (org.talend.dataquality.rules.SurvivorshipKeyDefinition)2