Search in sources :

Example 1 with MatchGroupResultConsumer

use of org.talend.dataquality.record.linkage.grouping.MatchGroupResultConsumer in project tdq-studio-se by Talend.

the class AnalysisRecordGroupingUtilsTest method testCreateSurvivorShipAlgorithmParams.

/**
 * Test method for
 * {@link org.talend.dq.analysis.AnalysisRecordGroupingUtils#createSurvivorShipAlgorithmParams(org.talend.dataquality.record.linkage.grouping.AnalysisMatchRecordGrouping, org.talend.dataquality.indicators.columnset.RecordMatchingIndicator, Map)
 * .
 */
@Test
public void testCreateSurvivorShipAlgorithmParams() {
    // Survivorshipkey
    RecordMatchingIndicator recordMatchingIndicator = ColumnsetFactory.eINSTANCE.createRecordMatchingIndicator();
    MatchRuleDefinition createMatchRuleDefinition = RulesFactory.eINSTANCE.createMatchRuleDefinition();
    recordMatchingIndicator.setBuiltInMatchRuleDefinition(createMatchRuleDefinition);
    EList<SurvivorshipKeyDefinition> survivorshipKeys = createMatchRuleDefinition.getSurvivorshipKeys();
    AlgorithmDefinition createAlgorithmDefinition = RulesFactory.eINSTANCE.createAlgorithmDefinition();
    // $NON-NLS-1$
    createAlgorithmDefinition.setAlgorithmParameters("");
    // $NON-NLS-1$
    createAlgorithmDefinition.setAlgorithmType("Longest");
    // $NON-NLS-1$
    survivorshipKeys.add(createKeyDefinition("a1", createAlgorithmDefinition));
    createAlgorithmDefinition = RulesFactory.eINSTANCE.createAlgorithmDefinition();
    // $NON-NLS-1$
    createAlgorithmDefinition.setAlgorithmParameters("");
    // $NON-NLS-1$
    createAlgorithmDefinition.setAlgorithmType("Longest");
    // $NON-NLS-1$
    survivorshipKeys.add(createKeyDefinition("a2", createAlgorithmDefinition));
    // DefaultSurvivorship
    EList<DefaultSurvivorshipDefinition> defaultSurvivorshipDefinitions = createMatchRuleDefinition.getDefaultSurvivorshipDefinitions();
    createAlgorithmDefinition = RulesFactory.eINSTANCE.createAlgorithmDefinition();
    // $NON-NLS-1$
    createAlgorithmDefinition.setAlgorithmParameters("");
    // $NON-NLS-1$
    createAlgorithmDefinition.setAlgorithmType("MostCommon");
    // $NON-NLS-1$
    defaultSurvivorshipDefinitions.add(createDefaultsurvivShip("String", createAlgorithmDefinition));
    // DefaultSurvivorship
    EList<ParticularDefaultSurvivorshipDefinitions> particularDefaultSurvivorshipDefinitions = createMatchRuleDefinition.getParticularDefaultSurvivorshipDefinitions();
    createAlgorithmDefinition = RulesFactory.eINSTANCE.createAlgorithmDefinition();
    // $NON-NLS-1$
    createAlgorithmDefinition.setAlgorithmParameters("");
    // $NON-NLS-1$
    createAlgorithmDefinition.setAlgorithmType("Concatenate");
    particularDefaultSurvivorshipDefinitions.add(createParticularDefaultSurvivorshipDefinitions("a2", // $NON-NLS-1$
    createAlgorithmDefinition));
    // init columnMap
    Map<MetadataColumn, String> columnMap = new HashMap<MetadataColumn, String>();
    MetadataColumn col0 = ConnectionFactory.eINSTANCE.createMetadataColumn();
    // $NON-NLS-1$
    col0.setName("a1");
    // $NON-NLS-1$
    col0.setTalendType("id_String");
    // $NON-NLS-1$
    columnMap.put(col0, "0");
    MetadataColumn col1 = ConnectionFactory.eINSTANCE.createMetadataColumn();
    // $NON-NLS-1$
    col1.setTalendType("id_String");
    // $NON-NLS-1$
    col1.setName("a2");
    // $NON-NLS-1$
    columnMap.put(col1, "1");
    MetadataColumn col2 = ConnectionFactory.eINSTANCE.createMetadataColumn();
    // $NON-NLS-1$
    col2.setTalendType("id_String");
    // $NON-NLS-1$
    col2.setName("a3");
    // $NON-NLS-1$
    columnMap.put(col2, "2");
    MatchGroupResultConsumer matchGroupResultConsumer = new MatchGroupResultConsumer(true) {

        @Override
        public void handle(Object row) {
        // no need to implement
        }
    };
    AnalysisMatchRecordGrouping analysisMatchRecordGrouping = new AnalysisMatchRecordGrouping(matchGroupResultConsumer);
    CombinedRecordMatcher combinedRecordMatcher = analysisMatchRecordGrouping.getCombinedRecordMatcher();
    DQMFBRecordMatcher dqmfbRecordMatcher = new DQMFBRecordMatcher(0.9);
    combinedRecordMatcher.getMatchers().add(dqmfbRecordMatcher);
    List<List<Map<String, String>>> multiMatchRules = analysisMatchRecordGrouping.getMultiMatchRules();
    List<Map<String, String>> matchRuleList = new ArrayList<Map<String, String>>();
    Map<String, String> matchKeyMap1 = new HashMap<String, String>();
    Map<String, String> matchKeyMap2 = new HashMap<String, String>();
    Map<String, String> matchKeyMap3 = new HashMap<String, String>();
    matchKeyMap1.put(IRecordGrouping.MATCHING_TYPE, AttributeMatcherType.EXACT.name());
    // change by
    matchKeyMap2.put(IRecordGrouping.MATCHING_TYPE, AttributeMatcherType.DUMMY.name());
    // DefaultSurvivorshipDefinitions
    // change by
    matchKeyMap3.put(IRecordGrouping.MATCHING_TYPE, AttributeMatcherType.DUMMY.name());
    // ParticularDefaultSurvivorshipDefinitions
    matchRuleList.add(matchKeyMap1);
    matchRuleList.add(matchKeyMap2);
    matchRuleList.add(matchKeyMap3);
    // $NON-NLS-1$
    matchKeyMap1.put(IRecordGrouping.MATCH_KEY_NAME, "a1");
    // $NON-NLS-1$
    matchKeyMap2.put(IRecordGrouping.MATCH_KEY_NAME, "a2");
    // $NON-NLS-1$
    matchKeyMap3.put(IRecordGrouping.MATCH_KEY_NAME, "a3");
    // $NON-NLS-1$
    matchKeyMap1.put(IRecordGrouping.COLUMN_IDX, "0");
    // $NON-NLS-1$
    matchKeyMap2.put(IRecordGrouping.COLUMN_IDX, "1");
    // $NON-NLS-1$
    matchKeyMap3.put(IRecordGrouping.COLUMN_IDX, "2");
    multiMatchRules.add(matchRuleList);
    SurvivorShipAlgorithmParams createSurvivorShipAlgorithmParams = AnalysisRecordGroupingUtils.createSurvivorShipAlgorithmParams(analysisMatchRecordGrouping, recordMatchingIndicator, columnMap);
    // $NON-NLS-1$
    Assert.assertEquals(// $NON-NLS-1$
    "The size of SurvivorShipAlgos should be 2", // $NON-NLS-1$
    2, createSurvivorShipAlgorithmParams.getSurviorShipAlgos().length);
    // the size of default survivorshipRules is come from by (column size * default item size)
    Assert.assertEquals("The size of DefaultSurviorshipRules should be 3", 3, // $NON-NLS-1$
    createSurvivorShipAlgorithmParams.getDefaultSurviorshipRules().size());
    Map<IRecordMatcher, SurvivorshipFunction[]> survivorshipAlgosMap = createSurvivorShipAlgorithmParams.getSurvivorshipAlgosMap();
    // $NON-NLS-1$
    Assert.assertEquals("The size of survivorshipAlgosMap should be 1", 1, survivorshipAlgosMap.size());
    SurvivorshipFunction[] survivorshipFunctions = survivorshipAlgosMap.get(dqmfbRecordMatcher);
    // $NON-NLS-1$
    Assert.assertEquals("The size of survivorshipFunctions should be 3", 3, survivorshipFunctions.length);
    // $NON-NLS-1$
    Assert.assertEquals(// $NON-NLS-1$
    "The Algorithm of a1 function should be LONGEST", // $NON-NLS-1$
    SurvivorShipAlgorithmEnum.LONGEST, survivorshipFunctions[0].getSurvivorShipAlgoEnum());
    // $NON-NLS-1$
    Assert.assertEquals(// $NON-NLS-1$
    "The Algorithm of a2 function should be Concatenate", // $NON-NLS-1$
    SurvivorShipAlgorithmEnum.CONCATENATE, survivorshipFunctions[1].getSurvivorShipAlgoEnum());
    // $NON-NLS-1$
    Assert.assertEquals(// $NON-NLS-1$
    "The Algorithm of a3 function should be MostCommon", // $NON-NLS-1$
    SurvivorShipAlgorithmEnum.MOST_COMMON, survivorshipFunctions[2].getSurvivorShipAlgoEnum());
}
Also used : AnalysisMatchRecordGrouping(org.talend.dataquality.record.linkage.grouping.AnalysisMatchRecordGrouping) IRecordMatcher(org.talend.dataquality.record.linkage.record.IRecordMatcher) HashMap(java.util.HashMap) DefaultSurvivorshipDefinition(org.talend.dataquality.rules.DefaultSurvivorshipDefinition) SurvivorShipAlgorithmParams(org.talend.dataquality.record.linkage.grouping.swoosh.SurvivorShipAlgorithmParams) ArrayList(java.util.ArrayList) MatchRuleDefinition(org.talend.dataquality.rules.MatchRuleDefinition) DQMFBRecordMatcher(org.talend.dataquality.record.linkage.grouping.swoosh.DQMFBRecordMatcher) SurvivorshipFunction(org.talend.dataquality.record.linkage.grouping.swoosh.SurvivorShipAlgorithmParams.SurvivorshipFunction) MetadataColumn(org.talend.core.model.metadata.builder.connection.MetadataColumn) AlgorithmDefinition(org.talend.dataquality.rules.AlgorithmDefinition) ArrayList(java.util.ArrayList) EList(org.eclipse.emf.common.util.EList) List(java.util.List) ParticularDefaultSurvivorshipDefinitions(org.talend.dataquality.rules.ParticularDefaultSurvivorshipDefinitions) SurvivorshipKeyDefinition(org.talend.dataquality.rules.SurvivorshipKeyDefinition) RecordMatchingIndicator(org.talend.dataquality.indicators.columnset.RecordMatchingIndicator) MatchGroupResultConsumer(org.talend.dataquality.record.linkage.grouping.MatchGroupResultConsumer) HashMap(java.util.HashMap) Map(java.util.Map) CombinedRecordMatcher(org.talend.dataquality.record.linkage.record.CombinedRecordMatcher) Test(org.junit.Test)

Example 2 with MatchGroupResultConsumer

use of org.talend.dataquality.record.linkage.grouping.MatchGroupResultConsumer in project tdq-studio-se by Talend.

the class ExecuteMatchRuleHandlerTest method testExecute4.

/**
 * Test method for
 * {@link org.talend.dq.analysis.ExecuteMatchRuleHandler#execute(java.util.Map, org.talend.dataquality.indicators.columnset.RecordMatchingIndicator, java.util.List, org.talend.dataquality.indicators.columnset.BlockKeyIndicator)}
 * .
 *
 * same to case 3 but the match rule order is exchange
 */
@Test
public void testExecute4() {
    Map<MetadataColumn, String> columnMap = new HashMap<MetadataColumn, String>();
    MetadataColumn col0 = ConnectionFactory.eINSTANCE.createMetadataColumn();
    col0.setName(columnName0);
    // $NON-NLS-1$
    columnMap.put(col0, "0");
    MetadataColumn col1 = ConnectionFactory.eINSTANCE.createMetadataColumn();
    col1.setName(columnName1);
    // $NON-NLS-1$
    columnMap.put(col1, "1");
    MetadataColumn col2 = ConnectionFactory.eINSTANCE.createMetadataColumn();
    col2.setName(columnName2);
    // $NON-NLS-1$
    columnMap.put(col2, "2");
    MetadataColumn col3 = ConnectionFactory.eINSTANCE.createMetadataColumn();
    col3.setName(columnName3);
    // $NON-NLS-1$
    columnMap.put(col3, "3");
    RecordMatchingIndicator recordMatchingIndicator = ColumnsetFactory.eINSTANCE.createRecordMatchingIndicator();
    MatchRuleDefinition matchRuleDef = RulesPackage.eINSTANCE.getRulesFactory().createMatchRuleDefinition();
    recordMatchingIndicator.setBuiltInMatchRuleDefinition(matchRuleDef);
    // create match rule
    MatchRule matchRule2 = RulesFactory.eINSTANCE.createMatchRule();
    MatchKeyDefinition createMatchKeyDefinition2 = RulesFactory.eINSTANCE.createMatchKeyDefinition();
    matchRule2.getMatchKeys().add(createMatchKeyDefinition2);
    createMatchKeyDefinition2.setColumn(columnName3);
    createMatchKeyDefinition2.setConfidenceWeight(1);
    // $NON-NLS-1$
    createMatchKeyDefinition2.setName("rule1.matchkey1");
    createMatchKeyDefinition2.setHandleNull(HandleNullEnum.NULL_MATCH_NULL.getValue());
    AlgorithmDefinition createAlgorithmDefinition2 = RulesFactory.eINSTANCE.createAlgorithmDefinition();
    createAlgorithmDefinition2.setAlgorithmType(AttributeMatcherType.EXACT.name());
    createMatchKeyDefinition2.setAlgorithm(createAlgorithmDefinition2);
    matchRuleDef.getMatchRules().add(matchRule2);
    // create match rule
    MatchRule matchRule1 = RulesFactory.eINSTANCE.createMatchRule();
    MatchKeyDefinition createMatchKeyDefinition1 = RulesFactory.eINSTANCE.createMatchKeyDefinition();
    matchRule1.getMatchKeys().add(createMatchKeyDefinition1);
    createMatchKeyDefinition1.setColumn(columnName2);
    createMatchKeyDefinition1.setConfidenceWeight(1);
    // $NON-NLS-1$
    createMatchKeyDefinition1.setName("rule1.matchkey1");
    createMatchKeyDefinition1.setHandleNull(HandleNullEnum.NULL_MATCH_NULL.getValue());
    AlgorithmDefinition createAlgorithmDefinition1 = RulesFactory.eINSTANCE.createAlgorithmDefinition();
    createAlgorithmDefinition1.setAlgorithmType(AttributeMatcherType.EXACT.name());
    createMatchKeyDefinition1.setAlgorithm(createAlgorithmDefinition1);
    matchRuleDef.getMatchRules().add(matchRule1);
    // create block key
    BlockKeyDefinition createBlockKeyDefinition = RulesFactory.eINSTANCE.createBlockKeyDefinition();
    createBlockKeyDefinition.setColumn(columnName1);
    // $NON-NLS-1$
    createBlockKeyDefinition.setName("blockKey1");
    // setPreAlgorithm
    AlgorithmDefinition blockPreAlgorithm = RulesFactory.eINSTANCE.createAlgorithmDefinition();
    blockPreAlgorithm.setAlgorithmType(BlockingKeyPreAlgorithmEnum.NON_ALGO.getValue());
    createBlockKeyDefinition.setPreAlgorithm(blockPreAlgorithm);
    // setAlgorithm
    AlgorithmDefinition blockAlgorithm = RulesFactory.eINSTANCE.createAlgorithmDefinition();
    blockAlgorithm.setAlgorithmType(BlockingKeyAlgorithmEnum.EXACT.getValue());
    createBlockKeyDefinition.setAlgorithm(blockAlgorithm);
    // setPostAlgorithm
    AlgorithmDefinition blockPostAlgorithm = RulesFactory.eINSTANCE.createAlgorithmDefinition();
    blockPostAlgorithm.setAlgorithmType(BlockingKeyPostAlgorithmEnum.NON_ALGO.getValue());
    createBlockKeyDefinition.setPostAlgorithm(blockPostAlgorithm);
    matchRuleDef.getBlockKeys().add(createBlockKeyDefinition);
    List<Object[]> matchRows = new ArrayList<Object[]>();
    // $NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
    matchRows.add(new String[] { "id1", "name1", "number1", "date1" });
    // $NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
    matchRows.add(new String[] { "id2", "name1", "number2", "date1" });
    // $NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
    matchRows.add(new String[] { "id3", "name2", "number2", "date3" });
    // $NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
    matchRows.add(new String[] { "id4", "name2", "number2", "date1" });
    BlockKeyIndicator blockKeyIndicator = ColumnsetFactory.eINSTANCE.createBlockKeyIndicator();
    ExecuteMatchRuleHandler execHandler = new ExecuteMatchRuleHandler();
    MatchGroupResultConsumer matchResultConsumer = createMatchGroupResultConsumer(columnMap, recordMatchingIndicator);
    TypedReturnCode<MatchGroupResultConsumer> executeResult = execHandler.execute(columnMap, recordMatchingIndicator, matchRows, blockKeyIndicator, matchResultConsumer);
    Assert.assertTrue(executeResult.isOk());
    Assert.assertTrue(executeResult.getMessage() == null);
    Assert.assertTrue(executeResult.getObject() != null);
    MatchGroupResultConsumer ResultConsumer = executeResult.getObject();
    List<Object[]> fullMatchResult = ResultConsumer.getFullMatchResult();
    Assert.assertTrue(fullMatchResult.size() == 4);
    for (int i = 0; i < fullMatchResult.size(); i++) {
        Object[] objectArray = fullMatchResult.get(i);
        Object masterValue = objectArray[7];
        Object idValue = objectArray[0];
        // id2 is because of matchRule1 id4 is because of matchRule2
        if ("id2".equals(idValue) || "id4".equals(idValue)) {
            // $NON-NLS-1$ //$NON-NLS-2$
            Assert.assertFalse(Boolean.parseBoolean(masterValue.toString()));
        } else {
            Assert.assertTrue(Boolean.parseBoolean(masterValue.toString()));
        }
    }
}
Also used : ExecuteMatchRuleHandler(org.talend.dq.analysis.match.ExecuteMatchRuleHandler) BlockKeyIndicator(org.talend.dataquality.indicators.columnset.BlockKeyIndicator) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) MatchRuleDefinition(org.talend.dataquality.rules.MatchRuleDefinition) MatchRule(org.talend.dataquality.rules.MatchRule) RecordMatchingIndicator(org.talend.dataquality.indicators.columnset.RecordMatchingIndicator) MetadataColumn(org.talend.core.model.metadata.builder.connection.MetadataColumn) MatchGroupResultConsumer(org.talend.dataquality.record.linkage.grouping.MatchGroupResultConsumer) MatchKeyDefinition(org.talend.dataquality.rules.MatchKeyDefinition) AlgorithmDefinition(org.talend.dataquality.rules.AlgorithmDefinition) BlockKeyDefinition(org.talend.dataquality.rules.BlockKeyDefinition) Test(org.junit.Test)

Example 3 with MatchGroupResultConsumer

use of org.talend.dataquality.record.linkage.grouping.MatchGroupResultConsumer in project tdq-studio-se by Talend.

the class ExecuteMatchRuleHandlerTest method testExecute1.

/**
 * Test method for
 * {@link org.talend.dq.analysis.ExecuteMatchRuleHandler#execute(java.util.Map, org.talend.dataquality.indicators.columnset.RecordMatchingIndicator, java.util.List, org.talend.dataquality.indicators.columnset.BlockKeyIndicator)}
 * .
 *
 * no block key one match key
 */
@Test
public void testExecute1() {
    Map<MetadataColumn, String> columnMap = new HashMap<MetadataColumn, String>();
    MetadataColumn col0 = ConnectionFactory.eINSTANCE.createMetadataColumn();
    col0.setName(columnName0);
    // $NON-NLS-1$
    columnMap.put(col0, "0");
    MetadataColumn col1 = ConnectionFactory.eINSTANCE.createMetadataColumn();
    col1.setName(columnName1);
    // $NON-NLS-1$
    columnMap.put(col1, "1");
    MetadataColumn col2 = ConnectionFactory.eINSTANCE.createMetadataColumn();
    col2.setName(columnName2);
    // $NON-NLS-1$
    columnMap.put(col2, "2");
    MetadataColumn col3 = ConnectionFactory.eINSTANCE.createMetadataColumn();
    col3.setName(columnName3);
    // $NON-NLS-1$
    columnMap.put(col3, "3");
    // create match key
    RecordMatchingIndicator recordMatchingIndicator = ColumnsetFactory.eINSTANCE.createRecordMatchingIndicator();
    MatchRuleDefinition matchRuleDef = RulesPackage.eINSTANCE.getRulesFactory().createMatchRuleDefinition();
    recordMatchingIndicator.setBuiltInMatchRuleDefinition(matchRuleDef);
    MatchRule createMatchRule1 = RulesFactory.eINSTANCE.createMatchRule();
    MatchKeyDefinition createMatchKeyDefinition1 = RulesFactory.eINSTANCE.createMatchKeyDefinition();
    createMatchRule1.getMatchKeys().add(createMatchKeyDefinition1);
    createMatchKeyDefinition1.setColumn(columnName0);
    createMatchKeyDefinition1.setConfidenceWeight(1);
    // $NON-NLS-1$
    createMatchKeyDefinition1.setName("rule1.matchkey1");
    createMatchKeyDefinition1.setHandleNull(HandleNullEnum.NULL_MATCH_NULL.getValue());
    AlgorithmDefinition createAlgorithmDefinition1 = RulesFactory.eINSTANCE.createAlgorithmDefinition();
    createAlgorithmDefinition1.setAlgorithmType(AttributeMatcherType.EXACT.name());
    createMatchKeyDefinition1.setAlgorithm(createAlgorithmDefinition1);
    matchRuleDef.getMatchRules().add(createMatchRule1);
    // input data
    List<Object[]> matchRows = new ArrayList<Object[]>();
    // $NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
    matchRows.add(new String[] { "id1", "name1", "number1", "date1" });
    // $NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
    matchRows.add(new String[] { "id2", "name2", "number2", "date2" });
    // $NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
    matchRows.add(new String[] { "id3", "name1", "number3", "date3" });
    // $NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
    matchRows.add(new String[] { "id4", "name4", "number2", "date1" });
    BlockKeyIndicator blockKeyIndicator = ColumnsetFactory.eINSTANCE.createBlockKeyIndicator();
    ExecuteMatchRuleHandler execHandler = new ExecuteMatchRuleHandler();
    MatchGroupResultConsumer matchResultConsumer = createMatchGroupResultConsumer(columnMap, recordMatchingIndicator);
    TypedReturnCode<MatchGroupResultConsumer> executeResult = execHandler.execute(columnMap, recordMatchingIndicator, matchRows, blockKeyIndicator, matchResultConsumer);
    Assert.assertTrue(executeResult.isOk());
    Assert.assertTrue(executeResult.getMessage() == null);
    Assert.assertTrue(executeResult.getObject() != null);
    MatchGroupResultConsumer ResultConsumer = executeResult.getObject();
    List<Object[]> fullMatchResult = ResultConsumer.getFullMatchResult();
    Assert.assertTrue(fullMatchResult.size() == 4);
    // every input data is master data
    for (Object[] objectArray : fullMatchResult) {
        Object object = objectArray[7];
        Assert.assertTrue(Boolean.parseBoolean(object.toString()));
    }
}
Also used : ExecuteMatchRuleHandler(org.talend.dq.analysis.match.ExecuteMatchRuleHandler) BlockKeyIndicator(org.talend.dataquality.indicators.columnset.BlockKeyIndicator) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) MatchRuleDefinition(org.talend.dataquality.rules.MatchRuleDefinition) MatchRule(org.talend.dataquality.rules.MatchRule) RecordMatchingIndicator(org.talend.dataquality.indicators.columnset.RecordMatchingIndicator) MetadataColumn(org.talend.core.model.metadata.builder.connection.MetadataColumn) MatchGroupResultConsumer(org.talend.dataquality.record.linkage.grouping.MatchGroupResultConsumer) MatchKeyDefinition(org.talend.dataquality.rules.MatchKeyDefinition) AlgorithmDefinition(org.talend.dataquality.rules.AlgorithmDefinition) Test(org.junit.Test)

Example 4 with MatchGroupResultConsumer

use of org.talend.dataquality.record.linkage.grouping.MatchGroupResultConsumer in project tdq-studio-se by Talend.

the class ExecuteMatchRuleHandler method execute.

public TypedReturnCode<MatchGroupResultConsumer> execute(Map<MetadataColumn, String> columnMap, RecordMatchingIndicator recordMatchingIndicator, List<Object[]> matchRows, BlockKeyIndicator blockKeyIndicator, MatchGroupResultConsumer matchResultConsumer) {
    TypedReturnCode<MatchGroupResultConsumer> returnCode = new TypedReturnCode<MatchGroupResultConsumer>(false);
    returnCode.setObject(matchResultConsumer);
    // By default for analysis, the applied blocking key will be the key
    // from key generation definition. This
    // will be refined when there is a need to define the applied blocking
    // key manually by user later.
    AnalysisRecordGroupingUtils.createAppliedBlockKeyByGenKey(recordMatchingIndicator);
    ReturnCode computeMatchGroupReturnCode = null;
    // Blocking key specified.
    computeMatchGroupReturnCode = computeMatchGroupWithBlockKey(recordMatchingIndicator, blockKeyIndicator, columnMap, matchResultConsumer, matchRows);
    returnCode.setOk(computeMatchGroupReturnCode.isOk());
    returnCode.setMessage(computeMatchGroupReturnCode.getMessage());
    return returnCode;
}
Also used : ReturnCode(org.talend.utils.sugars.ReturnCode) TypedReturnCode(org.talend.utils.sugars.TypedReturnCode) MatchGroupResultConsumer(org.talend.dataquality.record.linkage.grouping.MatchGroupResultConsumer) TypedReturnCode(org.talend.utils.sugars.TypedReturnCode)

Example 5 with MatchGroupResultConsumer

use of org.talend.dataquality.record.linkage.grouping.MatchGroupResultConsumer in project tdq-studio-se by Talend.

the class MatchAnalysisExecutor method execute.

/*
     * (non-Javadoc)
     * 
     * @see org.talend.dq.analysis.IAnalysisExecutor#execute(org.talend.dataquality.analysis.Analysis)
     */
public ReturnCode execute(Analysis analysis) {
    assert analysis != null;
    // --- preconditions
    ReturnCode rc = AnalysisExecutorHelper.check(analysis);
    if (!rc.isOk()) {
        AnalysisExecutorHelper.setExecutionInfoInAnalysisResult(analysis, rc.isOk(), rc.getMessage());
        return rc;
    }
    // --- creation time
    final long startime = AnalysisExecutorHelper.setExecutionDateInAnalysisResult(analysis);
    EList<Indicator> indicators = analysis.getResults().getIndicators();
    RecordMatchingIndicator recordMatchingIndicator = null;
    BlockKeyIndicator blockKeyIndicator = null;
    for (Indicator ind : indicators) {
        if (ind instanceof RecordMatchingIndicator) {
            recordMatchingIndicator = (RecordMatchingIndicator) ind;
        } else if (ind instanceof BlockKeyIndicator) {
            blockKeyIndicator = (BlockKeyIndicator) ind;
        }
    }
    if (recordMatchingIndicator == null || blockKeyIndicator == null) {
        rc.setOk(Boolean.FALSE);
        // $NON-NLS-1$
        rc.setMessage(Messages.getString("MatchAnalysisExecutor.noIndicators"));
        AnalysisExecutorHelper.setExecutionInfoInAnalysisResult(analysis, rc.isOk(), rc.getMessage());
        return rc;
    }
    List<ModelElement> anlayzedElements = analysis.getContext().getAnalysedElements();
    if (anlayzedElements == null || anlayzedElements.size() == 0) {
        rc.setOk(Boolean.FALSE);
        // $NON-NLS-1$
        rc.setMessage(Messages.getString("MatchAnalysisExecutor.EmptyAnalyzedElement"));
        AnalysisExecutorHelper.setExecutionInfoInAnalysisResult(analysis, rc.isOk(), rc.getMessage());
        return rc;
    }
    // TDQ-9664 msjian: check the store on disk path.
    Boolean isStoreOnDisk = TaggedValueHelper.getValueBoolean(SQLExecutor.STORE_ON_DISK_KEY, analysis);
    if (isStoreOnDisk) {
        String tempDataPath = TaggedValueHelper.getValueString(SQLExecutor.TEMP_DATA_DIR, analysis);
        File file = new File(tempDataPath);
        if (!file.exists() || !file.isDirectory()) {
            rc.setOk(Boolean.FALSE);
            // $NON-NLS-1$
            rc.setMessage(Messages.getString("MatchAnalysisExecutor.InvalidPath", file.getPath()));
            AnalysisExecutorHelper.setExecutionInfoInAnalysisResult(analysis, rc.isOk(), rc.getMessage());
            return rc;
        }
    }
    // TDQ-9664~
    Map<MetadataColumn, String> columnMap = getColumn2IndexMap(anlayzedElements);
    ISQLExecutor sqlExecutor = getSQLExectutor(analysis, recordMatchingIndicator, columnMap);
    if (sqlExecutor == null) {
        rc.setOk(Boolean.FALSE);
        // $NON-NLS-1$
        rc.setMessage(Messages.getString("MatchAnalysisExecutor.noSqlExecutor"));
        AnalysisExecutorHelper.setExecutionInfoInAnalysisResult(analysis, rc.isOk(), rc.getMessage());
        return rc;
    }
    if (getMonitor() != null) {
        getMonitor().worked(20);
    }
    // Set schema for match key.
    TypedReturnCode<MatchGroupResultConsumer> returnCode = new TypedReturnCode<MatchGroupResultConsumer>();
    MetadataColumn[] completeColumnSchema = AnalysisRecordGroupingUtils.getCompleteColumnSchema(columnMap);
    String[] colSchemaString = new String[completeColumnSchema.length];
    int idx = 0;
    for (MetadataColumn metadataCol : completeColumnSchema) {
        colSchemaString[idx++] = metadataCol.getName();
    }
    recordMatchingIndicator.setMatchRowSchema(colSchemaString);
    recordMatchingIndicator.reset();
    MatchGroupResultConsumer matchResultConsumer = createMatchGroupResultConsumer(recordMatchingIndicator);
    if (sqlExecutor.isStoreOnDisk()) {
        // need to execute the query
        try {
            sqlExecutor.executeQuery(analysis.getContext().getConnection(), analysis.getContext().getAnalysedElements());
        } catch (SQLException e) {
            log.error(e, e);
            rc.setOk(Boolean.FALSE);
            rc.setMessage(e.getMessage());
            AnalysisExecutorHelper.setExecutionInfoInAnalysisResult(analysis, rc.isOk(), rc.getMessage());
            return rc;
        }
        try {
            TypedReturnCode<Object> result = StoreOnDiskUtils.getDefault().executeWithStoreOnDisk(columnMap, recordMatchingIndicator, blockKeyIndicator, sqlExecutor.getStoreOnDiskHandler(), matchResultConsumer);
            if (result != null) {
                returnCode.setObject((MatchGroupResultConsumer) result.getObject());
                returnCode.setOk(result.isOk());
                returnCode.setMessage(result.getMessage());
            }
        } catch (Exception e) {
            log.error(e, e);
            returnCode.setMessage(e.getMessage());
            returnCode.setOk(false);
        }
    } else {
        // Added TDQ-9320 , use the result set iterator to replace the list of result in the memory.
        try {
            Iterator<Record> resultSetIterator = sqlExecutor.getResultSetIterator(analysis.getContext().getConnection(), anlayzedElements);
            BlockAndMatchManager bAndmManager = new BlockAndMatchManager(resultSetIterator, matchResultConsumer, columnMap, recordMatchingIndicator, blockKeyIndicator);
            bAndmManager.run();
        } catch (SQLException e) {
            log.error(e, e);
            rc.setOk(Boolean.FALSE);
            rc.setMessage(e.getMessage());
            AnalysisExecutorHelper.setExecutionInfoInAnalysisResult(analysis, rc.isOk(), rc.getMessage());
            return rc;
        } catch (BusinessException e) {
            log.error(e, e);
            rc.setOk(Boolean.FALSE);
            rc.setMessage(e.getMessage());
            AnalysisExecutorHelper.setExecutionInfoInAnalysisResult(analysis, rc.isOk(), rc.getMessage());
            return rc;
        }
    }
    if (!returnCode.isOk()) {
        rc.setOk(returnCode.isOk());
        rc.setMessage(returnCode.getMessage());
    }
    if (getMonitor() != null) {
        getMonitor().worked(20);
    }
    if (isLowMemory) {
        // $NON-NLS-1$
        rc.setMessage(Messages.getString("Evaluator.OutOfMomory", usedMemory));
    }
    // nodify the master page
    refreshTableWithMatchFullResult(analysis);
    // --- set metadata information of analysis
    AnalysisExecutorHelper.setExecutionInfoInAnalysisResult(analysis, rc.isOk(), rc.getMessage());
    // --- compute execution duration
    if (this.continueRun()) {
        long endtime = System.currentTimeMillis();
        final ExecutionInformations resultMetadata = analysis.getResults().getResultMetadata();
        resultMetadata.setExecutionDuration((int) (endtime - startime));
        resultMetadata.setOutThreshold(false);
    }
    if (getMonitor() != null) {
        getMonitor().worked(20);
    }
    return rc;
}
Also used : BlockKeyIndicator(org.talend.dataquality.indicators.columnset.BlockKeyIndicator) ISQLExecutor(org.talend.cwm.db.connection.ISQLExecutor) SQLException(java.sql.SQLException) ModelElement(orgomg.cwm.objectmodel.core.ModelElement) MetadataColumn(org.talend.core.model.metadata.builder.connection.MetadataColumn) BusinessException(org.talend.commons.exception.BusinessException) Record(org.talend.dataquality.matchmerge.Record) TypedReturnCode(org.talend.utils.sugars.TypedReturnCode) ReturnCode(org.talend.utils.sugars.ReturnCode) RecordMatchingIndicator(org.talend.dataquality.indicators.columnset.RecordMatchingIndicator) RecordMatchingIndicator(org.talend.dataquality.indicators.columnset.RecordMatchingIndicator) Indicator(org.talend.dataquality.indicators.Indicator) BlockKeyIndicator(org.talend.dataquality.indicators.columnset.BlockKeyIndicator) SQLException(java.sql.SQLException) BusinessException(org.talend.commons.exception.BusinessException) ExecutionInformations(org.talend.dataquality.analysis.ExecutionInformations) MatchGroupResultConsumer(org.talend.dataquality.record.linkage.grouping.MatchGroupResultConsumer) TypedReturnCode(org.talend.utils.sugars.TypedReturnCode) BlockAndMatchManager(org.talend.dq.analysis.match.BlockAndMatchManager) File(java.io.File)

Aggregations

MatchGroupResultConsumer (org.talend.dataquality.record.linkage.grouping.MatchGroupResultConsumer)10 MetadataColumn (org.talend.core.model.metadata.builder.connection.MetadataColumn)8 RecordMatchingIndicator (org.talend.dataquality.indicators.columnset.RecordMatchingIndicator)7 BlockKeyIndicator (org.talend.dataquality.indicators.columnset.BlockKeyIndicator)6 ArrayList (java.util.ArrayList)5 HashMap (java.util.HashMap)5 Test (org.junit.Test)5 AlgorithmDefinition (org.talend.dataquality.rules.AlgorithmDefinition)5 MatchRuleDefinition (org.talend.dataquality.rules.MatchRuleDefinition)5 ExecuteMatchRuleHandler (org.talend.dq.analysis.match.ExecuteMatchRuleHandler)5 MatchKeyDefinition (org.talend.dataquality.rules.MatchKeyDefinition)4 MatchRule (org.talend.dataquality.rules.MatchRule)4 BlockKeyDefinition (org.talend.dataquality.rules.BlockKeyDefinition)3 TypedReturnCode (org.talend.utils.sugars.TypedReturnCode)3 ReturnCode (org.talend.utils.sugars.ReturnCode)2 File (java.io.File)1 SQLException (java.sql.SQLException)1 List (java.util.List)1 Map (java.util.Map)1 EList (org.eclipse.emf.common.util.EList)1