Search in sources :

Example 6 with BlockKeyIndicator

use of org.talend.dataquality.indicators.columnset.BlockKeyIndicator in project tdq-studio-se by Talend.

the class ExecuteMatchRuleHandlerTest method testExecute2.

/**
 * Test method for
 * {@link org.talend.dq.analysis.ExecuteMatchRuleHandler#execute(java.util.Map, org.talend.dataquality.indicators.columnset.RecordMatchingIndicator, java.util.List, org.talend.dataquality.indicators.columnset.BlockKeyIndicator)}
 * .
 *
 * one block key one match key
 */
@Test
public void testExecute2() {
    Map<MetadataColumn, String> columnMap = new HashMap<MetadataColumn, String>();
    MetadataColumn col0 = ConnectionFactory.eINSTANCE.createMetadataColumn();
    col0.setName(columnName0);
    // $NON-NLS-1$
    columnMap.put(col0, "0");
    MetadataColumn col1 = ConnectionFactory.eINSTANCE.createMetadataColumn();
    col1.setName(columnName1);
    // $NON-NLS-1$
    columnMap.put(col1, "1");
    MetadataColumn col2 = ConnectionFactory.eINSTANCE.createMetadataColumn();
    col2.setName(columnName2);
    // $NON-NLS-1$
    columnMap.put(col2, "2");
    MetadataColumn col3 = ConnectionFactory.eINSTANCE.createMetadataColumn();
    col3.setName(columnName3);
    // $NON-NLS-1$
    columnMap.put(col3, "3");
    RecordMatchingIndicator recordMatchingIndicator = ColumnsetFactory.eINSTANCE.createRecordMatchingIndicator();
    MatchRuleDefinition matchRuleDef = RulesPackage.eINSTANCE.getRulesFactory().createMatchRuleDefinition();
    recordMatchingIndicator.setBuiltInMatchRuleDefinition(matchRuleDef);
    // create match key
    MatchRule createMatchRule1 = RulesFactory.eINSTANCE.createMatchRule();
    MatchKeyDefinition createMatchKeyDefinition1 = RulesFactory.eINSTANCE.createMatchKeyDefinition();
    createMatchRule1.getMatchKeys().add(createMatchKeyDefinition1);
    createMatchKeyDefinition1.setColumn(columnName2);
    createMatchKeyDefinition1.setConfidenceWeight(1);
    // $NON-NLS-1$
    createMatchKeyDefinition1.setName("rule1.matchkey1");
    createMatchKeyDefinition1.setHandleNull(HandleNullEnum.NULL_MATCH_NULL.getValue());
    AlgorithmDefinition createAlgorithmDefinition1 = RulesFactory.eINSTANCE.createAlgorithmDefinition();
    createAlgorithmDefinition1.setAlgorithmType(AttributeMatcherType.EXACT.name());
    createMatchKeyDefinition1.setAlgorithm(createAlgorithmDefinition1);
    matchRuleDef.getMatchRules().add(createMatchRule1);
    // create block key
    BlockKeyDefinition createBlockKeyDefinition = RulesFactory.eINSTANCE.createBlockKeyDefinition();
    createBlockKeyDefinition.setColumn(columnName1);
    // $NON-NLS-1$
    createBlockKeyDefinition.setName("blockKey1");
    // setPreAlgorithm
    AlgorithmDefinition blockPreAlgorithm = RulesFactory.eINSTANCE.createAlgorithmDefinition();
    blockPreAlgorithm.setAlgorithmType(BlockingKeyPreAlgorithmEnum.NON_ALGO.getValue());
    createBlockKeyDefinition.setPreAlgorithm(blockPreAlgorithm);
    // setAlgorithm
    AlgorithmDefinition blockAlgorithm = RulesFactory.eINSTANCE.createAlgorithmDefinition();
    blockAlgorithm.setAlgorithmType(BlockingKeyAlgorithmEnum.EXACT.getValue());
    createBlockKeyDefinition.setAlgorithm(blockAlgorithm);
    // setPostAlgorithm
    AlgorithmDefinition blockPostAlgorithm = RulesFactory.eINSTANCE.createAlgorithmDefinition();
    blockPostAlgorithm.setAlgorithmType(BlockingKeyPostAlgorithmEnum.NON_ALGO.getValue());
    createBlockKeyDefinition.setPostAlgorithm(blockPostAlgorithm);
    matchRuleDef.getBlockKeys().add(createBlockKeyDefinition);
    List<Object[]> matchRows = new ArrayList<Object[]>();
    // $NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
    matchRows.add(new String[] { "id1", "name1", "number1", "date1" });
    // $NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
    matchRows.add(new String[] { "id2", "name1", "number2", "date2" });
    // $NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
    matchRows.add(new String[] { "id3", "name2", "number2", "date3" });
    // $NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
    matchRows.add(new String[] { "id4", "name2", "number2", "date1" });
    BlockKeyIndicator blockKeyIndicator = ColumnsetFactory.eINSTANCE.createBlockKeyIndicator();
    ExecuteMatchRuleHandler execHandler = new ExecuteMatchRuleHandler();
    MatchGroupResultConsumer matchResultConsumer = createMatchGroupResultConsumer(columnMap, recordMatchingIndicator);
    TypedReturnCode<MatchGroupResultConsumer> executeResult = execHandler.execute(columnMap, recordMatchingIndicator, matchRows, blockKeyIndicator, matchResultConsumer);
    Assert.assertTrue(executeResult.isOk());
    Assert.assertTrue(executeResult.getMessage() == null);
    Assert.assertTrue(executeResult.getObject() != null);
    MatchGroupResultConsumer ResultConsumer = executeResult.getObject();
    List<Object[]> fullMatchResult = ResultConsumer.getFullMatchResult();
    Assert.assertTrue(fullMatchResult.size() == 4);
    for (int i = 0; i < fullMatchResult.size(); i++) {
        Object[] objectArray = fullMatchResult.get(i);
        Object masterValue = objectArray[7];
        Object idValue = objectArray[0];
        // judge id1 is master id2 is master id3 is master and id4 is not master
        if ("id4".equals(idValue)) {
            // $NON-NLS-1$
            Assert.assertFalse(Boolean.parseBoolean(masterValue.toString()));
        } else {
            Assert.assertTrue(Boolean.parseBoolean(masterValue.toString()));
        }
    }
}
Also used : ExecuteMatchRuleHandler(org.talend.dq.analysis.match.ExecuteMatchRuleHandler) BlockKeyIndicator(org.talend.dataquality.indicators.columnset.BlockKeyIndicator) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) MatchRuleDefinition(org.talend.dataquality.rules.MatchRuleDefinition) MatchRule(org.talend.dataquality.rules.MatchRule) RecordMatchingIndicator(org.talend.dataquality.indicators.columnset.RecordMatchingIndicator) MetadataColumn(org.talend.core.model.metadata.builder.connection.MetadataColumn) MatchGroupResultConsumer(org.talend.dataquality.record.linkage.grouping.MatchGroupResultConsumer) MatchKeyDefinition(org.talend.dataquality.rules.MatchKeyDefinition) AlgorithmDefinition(org.talend.dataquality.rules.AlgorithmDefinition) BlockKeyDefinition(org.talend.dataquality.rules.BlockKeyDefinition) Test(org.junit.Test)

Example 7 with BlockKeyIndicator

use of org.talend.dataquality.indicators.columnset.BlockKeyIndicator in project tdq-studio-se by Talend.

the class ExecuteMatchRuleHandlerTest method testExecute3.

/**
 * Test method for
 * {@link org.talend.dq.analysis.ExecuteMatchRuleHandler#execute(java.util.Map, org.talend.dataquality.indicators.columnset.RecordMatchingIndicator, java.util.List, org.talend.dataquality.indicators.columnset.BlockKeyIndicator)}
 * .
 *
 * one block key, two match rule
 */
@Test
public void testExecute3() {
    Map<MetadataColumn, String> columnMap = new HashMap<MetadataColumn, String>();
    MetadataColumn col0 = ConnectionFactory.eINSTANCE.createMetadataColumn();
    col0.setName(columnName0);
    // $NON-NLS-1$
    columnMap.put(col0, "0");
    MetadataColumn col1 = ConnectionFactory.eINSTANCE.createMetadataColumn();
    col1.setName(columnName1);
    // $NON-NLS-1$
    columnMap.put(col1, "1");
    MetadataColumn col2 = ConnectionFactory.eINSTANCE.createMetadataColumn();
    col2.setName(columnName2);
    // $NON-NLS-1$
    columnMap.put(col2, "2");
    MetadataColumn col3 = ConnectionFactory.eINSTANCE.createMetadataColumn();
    col3.setName(columnName3);
    // $NON-NLS-1$
    columnMap.put(col3, "3");
    RecordMatchingIndicator recordMatchingIndicator = ColumnsetFactory.eINSTANCE.createRecordMatchingIndicator();
    MatchRuleDefinition matchRuleDef = RulesPackage.eINSTANCE.getRulesFactory().createMatchRuleDefinition();
    recordMatchingIndicator.setBuiltInMatchRuleDefinition(matchRuleDef);
    // create match rule
    MatchRule matchRule1 = RulesFactory.eINSTANCE.createMatchRule();
    MatchKeyDefinition createMatchKeyDefinition1 = RulesFactory.eINSTANCE.createMatchKeyDefinition();
    matchRule1.getMatchKeys().add(createMatchKeyDefinition1);
    createMatchKeyDefinition1.setColumn(columnName2);
    createMatchKeyDefinition1.setConfidenceWeight(1);
    // $NON-NLS-1$
    createMatchKeyDefinition1.setName("rule1.matchkey1");
    createMatchKeyDefinition1.setHandleNull(HandleNullEnum.NULL_MATCH_NULL.getValue());
    AlgorithmDefinition createAlgorithmDefinition1 = RulesFactory.eINSTANCE.createAlgorithmDefinition();
    createAlgorithmDefinition1.setAlgorithmType(AttributeMatcherType.EXACT.name());
    createMatchKeyDefinition1.setAlgorithm(createAlgorithmDefinition1);
    matchRuleDef.getMatchRules().add(matchRule1);
    // create match rule
    MatchRule matchRule2 = RulesFactory.eINSTANCE.createMatchRule();
    MatchKeyDefinition createMatchKeyDefinition2 = RulesFactory.eINSTANCE.createMatchKeyDefinition();
    matchRule2.getMatchKeys().add(createMatchKeyDefinition2);
    createMatchKeyDefinition2.setColumn(columnName3);
    createMatchKeyDefinition2.setConfidenceWeight(1);
    // $NON-NLS-1$
    createMatchKeyDefinition2.setName("rule1.matchkey1");
    createMatchKeyDefinition2.setHandleNull(HandleNullEnum.NULL_MATCH_NULL.getValue());
    AlgorithmDefinition createAlgorithmDefinition2 = RulesFactory.eINSTANCE.createAlgorithmDefinition();
    createAlgorithmDefinition2.setAlgorithmType(AttributeMatcherType.EXACT.name());
    createMatchKeyDefinition2.setAlgorithm(createAlgorithmDefinition2);
    matchRuleDef.getMatchRules().add(matchRule2);
    // create block key
    BlockKeyDefinition createBlockKeyDefinition = RulesFactory.eINSTANCE.createBlockKeyDefinition();
    createBlockKeyDefinition.setColumn(columnName1);
    // $NON-NLS-1$
    createBlockKeyDefinition.setName("blockKey1");
    // setPreAlgorithm
    AlgorithmDefinition blockPreAlgorithm = RulesFactory.eINSTANCE.createAlgorithmDefinition();
    blockPreAlgorithm.setAlgorithmType(BlockingKeyPreAlgorithmEnum.NON_ALGO.getValue());
    createBlockKeyDefinition.setPreAlgorithm(blockPreAlgorithm);
    // setAlgorithm
    AlgorithmDefinition blockAlgorithm = RulesFactory.eINSTANCE.createAlgorithmDefinition();
    blockAlgorithm.setAlgorithmType(BlockingKeyAlgorithmEnum.EXACT.getValue());
    createBlockKeyDefinition.setAlgorithm(blockAlgorithm);
    // setPostAlgorithm
    AlgorithmDefinition blockPostAlgorithm = RulesFactory.eINSTANCE.createAlgorithmDefinition();
    blockPostAlgorithm.setAlgorithmType(BlockingKeyPostAlgorithmEnum.NON_ALGO.getValue());
    createBlockKeyDefinition.setPostAlgorithm(blockPostAlgorithm);
    matchRuleDef.getBlockKeys().add(createBlockKeyDefinition);
    List<Object[]> matchRows = new ArrayList<Object[]>();
    // $NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
    matchRows.add(new String[] { "id1", "name1", "number1", "date1" });
    // $NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
    matchRows.add(new String[] { "id2", "name1", "number2", "date1" });
    // $NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
    matchRows.add(new String[] { "id3", "name2", "number2", "date3" });
    // $NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
    matchRows.add(new String[] { "id4", "name2", "number2", "date1" });
    BlockKeyIndicator blockKeyIndicator = ColumnsetFactory.eINSTANCE.createBlockKeyIndicator();
    ExecuteMatchRuleHandler execHandler = new ExecuteMatchRuleHandler();
    MatchGroupResultConsumer matchResultConsumer = createMatchGroupResultConsumer(columnMap, recordMatchingIndicator);
    TypedReturnCode<MatchGroupResultConsumer> executeResult = execHandler.execute(columnMap, recordMatchingIndicator, matchRows, blockKeyIndicator, matchResultConsumer);
    Assert.assertTrue(executeResult.isOk());
    Assert.assertTrue(executeResult.getMessage() == null);
    Assert.assertTrue(executeResult.getObject() != null);
    MatchGroupResultConsumer ResultConsumer = executeResult.getObject();
    List<Object[]> fullMatchResult = ResultConsumer.getFullMatchResult();
    Assert.assertTrue(fullMatchResult.size() == 4);
    for (int i = 0; i < fullMatchResult.size(); i++) {
        Object[] objectArray = fullMatchResult.get(i);
        Object masterValue = objectArray[7];
        Object idValue = objectArray[0];
        // id2 is because of matchRule1 id4 is because of matchRule2
        if ("id2".equals(idValue) || "id4".equals(idValue)) {
            // $NON-NLS-1$ //$NON-NLS-2$
            Assert.assertFalse(Boolean.parseBoolean(masterValue.toString()));
        } else {
            Assert.assertTrue(Boolean.parseBoolean(masterValue.toString()));
        }
    }
}
Also used : ExecuteMatchRuleHandler(org.talend.dq.analysis.match.ExecuteMatchRuleHandler) BlockKeyIndicator(org.talend.dataquality.indicators.columnset.BlockKeyIndicator) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) MatchRuleDefinition(org.talend.dataquality.rules.MatchRuleDefinition) MatchRule(org.talend.dataquality.rules.MatchRule) RecordMatchingIndicator(org.talend.dataquality.indicators.columnset.RecordMatchingIndicator) MetadataColumn(org.talend.core.model.metadata.builder.connection.MetadataColumn) MatchGroupResultConsumer(org.talend.dataquality.record.linkage.grouping.MatchGroupResultConsumer) MatchKeyDefinition(org.talend.dataquality.rules.MatchKeyDefinition) AlgorithmDefinition(org.talend.dataquality.rules.AlgorithmDefinition) BlockKeyDefinition(org.talend.dataquality.rules.BlockKeyDefinition) Test(org.junit.Test)

Example 8 with BlockKeyIndicator

use of org.talend.dataquality.indicators.columnset.BlockKeyIndicator in project tdq-studio-se by Talend.

the class AbstractMatchKeyWithChartTableSection method computeMatchResult.

protected TypedReturnCode<RecordMatchingIndicator> computeMatchResult() {
    TypedReturnCode<RecordMatchingIndicator> rc = new TypedReturnCode<RecordMatchingIndicator>(false);
    final Object[] IndicatorList = MatchRuleAnlaysisUtils.getNeedIndicatorFromAna(analysis);
    final RecordMatchingIndicator recordMatchingIndicator = EcoreUtil.copy((RecordMatchingIndicator) IndicatorList[0]);
    BlockKeyIndicator blockKeyIndicator = EcoreUtil.copy((BlockKeyIndicator) IndicatorList[1]);
    ExecuteMatchRuleHandler execHandler = new ExecuteMatchRuleHandler();
    MatchGroupResultConsumer matchResultConsumer = createMatchGroupResultConsumer(recordMatchingIndicator);
    // Set match key schema to the record matching indicator.
    MetadataColumn[] completeColumnSchema = AnalysisRecordGroupingUtils.getCompleteColumnSchema(columnMap);
    String[] colSchemaString = new String[completeColumnSchema.length];
    int idx = 0;
    for (MetadataColumn metadataCol : completeColumnSchema) {
        colSchemaString[idx++] = metadataCol.getName();
    }
    recordMatchingIndicator.setMatchRowSchema(colSchemaString);
    recordMatchingIndicator.reset();
    TypedReturnCode<MatchGroupResultConsumer> execute = execHandler.execute(columnMap, recordMatchingIndicator, matchRows, blockKeyIndicator, matchResultConsumer);
    if (!execute.isOk()) {
        rc.setMessage(DefaultMessagesImpl.getString("RunAnalysisAction.failRunAnalysis", analysis.getName(), // $NON-NLS-1$
        execute.getMessage()));
        return rc;
    } else {
        if (execute.getObject().getFullMatchResult() == null) {
            return rc;
        }
        // TDQ-9741, the "chart" result must be stored for hiding group(not compute again)
        tableResult = execute.getObject().getFullMatchResult();
        groupSize2groupFrequency = recordMatchingIndicator.getGroupSize2groupFrequency();
    }
    rc.setOk(true);
    rc.setObject(recordMatchingIndicator);
    return rc;
}
Also used : ExecuteMatchRuleHandler(org.talend.dq.analysis.match.ExecuteMatchRuleHandler) BlockKeyIndicator(org.talend.dataquality.indicators.columnset.BlockKeyIndicator) RecordMatchingIndicator(org.talend.dataquality.indicators.columnset.RecordMatchingIndicator) MetadataColumn(org.talend.core.model.metadata.builder.connection.MetadataColumn) MatchGroupResultConsumer(org.talend.dataquality.record.linkage.grouping.MatchGroupResultConsumer) TypedReturnCode(org.talend.utils.sugars.TypedReturnCode)

Aggregations

BlockKeyIndicator (org.talend.dataquality.indicators.columnset.BlockKeyIndicator)8 RecordMatchingIndicator (org.talend.dataquality.indicators.columnset.RecordMatchingIndicator)7 MetadataColumn (org.talend.core.model.metadata.builder.connection.MetadataColumn)6 MatchGroupResultConsumer (org.talend.dataquality.record.linkage.grouping.MatchGroupResultConsumer)6 ExecuteMatchRuleHandler (org.talend.dq.analysis.match.ExecuteMatchRuleHandler)5 ArrayList (java.util.ArrayList)4 HashMap (java.util.HashMap)4 Test (org.junit.Test)4 AlgorithmDefinition (org.talend.dataquality.rules.AlgorithmDefinition)4 MatchKeyDefinition (org.talend.dataquality.rules.MatchKeyDefinition)4 MatchRule (org.talend.dataquality.rules.MatchRule)4 MatchRuleDefinition (org.talend.dataquality.rules.MatchRuleDefinition)4 BlockKeyDefinition (org.talend.dataquality.rules.BlockKeyDefinition)3 Indicator (org.talend.dataquality.indicators.Indicator)2 TypedReturnCode (org.talend.utils.sugars.TypedReturnCode)2 File (java.io.File)1 SQLException (java.sql.SQLException)1 NullProgressMonitor (org.eclipse.core.runtime.NullProgressMonitor)1 BusinessException (org.talend.commons.exception.BusinessException)1 ISQLExecutor (org.talend.cwm.db.connection.ISQLExecutor)1