Search in sources :

Example 6 with RecordMatchingIndicator

use of org.talend.dataquality.indicators.columnset.RecordMatchingIndicator in project tdq-studio-se by Talend.

the class ExecuteMatchRuleHandlerTest method testExecute1.

/**
 * Test method for
 * {@link org.talend.dq.analysis.ExecuteMatchRuleHandler#execute(java.util.Map, org.talend.dataquality.indicators.columnset.RecordMatchingIndicator, java.util.List, org.talend.dataquality.indicators.columnset.BlockKeyIndicator)}
 * .
 *
 * no block key one match key
 */
@Test
public void testExecute1() {
    Map<MetadataColumn, String> columnMap = new HashMap<MetadataColumn, String>();
    MetadataColumn col0 = ConnectionFactory.eINSTANCE.createMetadataColumn();
    col0.setName(columnName0);
    // $NON-NLS-1$
    columnMap.put(col0, "0");
    MetadataColumn col1 = ConnectionFactory.eINSTANCE.createMetadataColumn();
    col1.setName(columnName1);
    // $NON-NLS-1$
    columnMap.put(col1, "1");
    MetadataColumn col2 = ConnectionFactory.eINSTANCE.createMetadataColumn();
    col2.setName(columnName2);
    // $NON-NLS-1$
    columnMap.put(col2, "2");
    MetadataColumn col3 = ConnectionFactory.eINSTANCE.createMetadataColumn();
    col3.setName(columnName3);
    // $NON-NLS-1$
    columnMap.put(col3, "3");
    // create match key
    RecordMatchingIndicator recordMatchingIndicator = ColumnsetFactory.eINSTANCE.createRecordMatchingIndicator();
    MatchRuleDefinition matchRuleDef = RulesPackage.eINSTANCE.getRulesFactory().createMatchRuleDefinition();
    recordMatchingIndicator.setBuiltInMatchRuleDefinition(matchRuleDef);
    MatchRule createMatchRule1 = RulesFactory.eINSTANCE.createMatchRule();
    MatchKeyDefinition createMatchKeyDefinition1 = RulesFactory.eINSTANCE.createMatchKeyDefinition();
    createMatchRule1.getMatchKeys().add(createMatchKeyDefinition1);
    createMatchKeyDefinition1.setColumn(columnName0);
    createMatchKeyDefinition1.setConfidenceWeight(1);
    // $NON-NLS-1$
    createMatchKeyDefinition1.setName("rule1.matchkey1");
    createMatchKeyDefinition1.setHandleNull(HandleNullEnum.NULL_MATCH_NULL.getValue());
    AlgorithmDefinition createAlgorithmDefinition1 = RulesFactory.eINSTANCE.createAlgorithmDefinition();
    createAlgorithmDefinition1.setAlgorithmType(AttributeMatcherType.EXACT.name());
    createMatchKeyDefinition1.setAlgorithm(createAlgorithmDefinition1);
    matchRuleDef.getMatchRules().add(createMatchRule1);
    // input data
    List<Object[]> matchRows = new ArrayList<Object[]>();
    // $NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
    matchRows.add(new String[] { "id1", "name1", "number1", "date1" });
    // $NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
    matchRows.add(new String[] { "id2", "name2", "number2", "date2" });
    // $NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
    matchRows.add(new String[] { "id3", "name1", "number3", "date3" });
    // $NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
    matchRows.add(new String[] { "id4", "name4", "number2", "date1" });
    BlockKeyIndicator blockKeyIndicator = ColumnsetFactory.eINSTANCE.createBlockKeyIndicator();
    ExecuteMatchRuleHandler execHandler = new ExecuteMatchRuleHandler();
    MatchGroupResultConsumer matchResultConsumer = createMatchGroupResultConsumer(columnMap, recordMatchingIndicator);
    TypedReturnCode<MatchGroupResultConsumer> executeResult = execHandler.execute(columnMap, recordMatchingIndicator, matchRows, blockKeyIndicator, matchResultConsumer);
    Assert.assertTrue(executeResult.isOk());
    Assert.assertTrue(executeResult.getMessage() == null);
    Assert.assertTrue(executeResult.getObject() != null);
    MatchGroupResultConsumer ResultConsumer = executeResult.getObject();
    List<Object[]> fullMatchResult = ResultConsumer.getFullMatchResult();
    Assert.assertTrue(fullMatchResult.size() == 4);
    // every input data is master data
    for (Object[] objectArray : fullMatchResult) {
        Object object = objectArray[7];
        Assert.assertTrue(Boolean.parseBoolean(object.toString()));
    }
}
Also used : ExecuteMatchRuleHandler(org.talend.dq.analysis.match.ExecuteMatchRuleHandler) BlockKeyIndicator(org.talend.dataquality.indicators.columnset.BlockKeyIndicator) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) MatchRuleDefinition(org.talend.dataquality.rules.MatchRuleDefinition) MatchRule(org.talend.dataquality.rules.MatchRule) RecordMatchingIndicator(org.talend.dataquality.indicators.columnset.RecordMatchingIndicator) MetadataColumn(org.talend.core.model.metadata.builder.connection.MetadataColumn) MatchGroupResultConsumer(org.talend.dataquality.record.linkage.grouping.MatchGroupResultConsumer) MatchKeyDefinition(org.talend.dataquality.rules.MatchKeyDefinition) AlgorithmDefinition(org.talend.dataquality.rules.AlgorithmDefinition) Test(org.junit.Test)

Example 7 with RecordMatchingIndicator

use of org.talend.dataquality.indicators.columnset.RecordMatchingIndicator in project tdq-studio-se by Talend.

the class MatchAnalysisExecutorTest method assertScenario1.

/**
 * DOC zhao Comment method "assertScenario1".
 *
 * @param matchAnalysisExecutor
 * @param analysis
 * @param name
 * @param nameVar
 */
private void assertScenario1(MatchAnalysisExecutor matchAnalysisExecutor, Analysis analysis, MetadataColumn name, String nameVar, double groupQualityThreshold, double matchInterval) {
    // Set indicators into analysis result.
    RecordMatchingIndicator matchIndicator = ColumnsetPackage.eINSTANCE.getColumnsetFactory().createRecordMatchingIndicator();
    // Match key: name, no block key, levenshtein attribute algorithm.
    matchIndicator.setAnalyzedElement(name);
    createMatchIndicatorWithOneMathRule(nameVar, matchIndicator, groupQualityThreshold, matchInterval);
    executeAnalysis(matchAnalysisExecutor, analysis, matchIndicator);
    // Assert group size and frequency.
    Map<Object, Long> size2Frequency = matchIndicator.getGroupSize2groupFrequency();
    // For 4 -> "seb"
    assertTrue(size2Frequency.get(String.valueOf(4)) == 1l);
    // For 1 -> "Sebastião","babass","nico","nicola"
    assertTrue(size2Frequency.get(String.valueOf(1)) == 4l);
    // For 2 -> "sebas","nicolas","nigula"
    assertTrue(size2Frequency.get(String.valueOf(2)) == 3l);
    // Assert row count, unique records, matched records and suspect records.
    assertTrue(matchIndicator.getCount() == 14);
    assertTrue(matchIndicator.getMatchedRecordCount() == 10);
    assertTrue(matchIndicator.getSuspectRecordCount() == 0);
}
Also used : RecordMatchingIndicator(org.talend.dataquality.indicators.columnset.RecordMatchingIndicator)

Example 8 with RecordMatchingIndicator

use of org.talend.dataquality.indicators.columnset.RecordMatchingIndicator in project tdq-studio-se by Talend.

the class MatchAnalysisExecutorTest method assertScenario2.

/**
 * DOC zhao Comment method "assertScenario2".
 *
 * @param matchAnalysisExecutor
 * @param analysis
 * @param name
 * @param nameVar
 */
private void assertScenario2(MatchAnalysisExecutor matchAnalysisExecutor, Analysis analysis, MetadataColumn name, String nameVar, double groupQualityThreshold, double matchInterval) {
    // Set indicators into analysis result.
    RecordMatchingIndicator matchIndicator = ColumnsetPackage.eINSTANCE.getColumnsetFactory().createRecordMatchingIndicator();
    // Match key: name, no block key, levenshtein attribute algorithm.
    matchIndicator.setAnalyzedElement(name);
    createMatchIndicatorWithOneMathRule(nameVar, matchIndicator, groupQualityThreshold, matchInterval);
    executeAnalysis(matchAnalysisExecutor, analysis, matchIndicator);
    // Assert group size and frequency.
    Map<Object, Long> size2Frequency = matchIndicator.getGroupSize2groupFrequency();
    // For 4 -> "seb"
    assertTrue(size2Frequency.get(String.valueOf(4)) == 1l);
    // For 1 -> "Sebastião","babass","nico"
    assertTrue(size2Frequency.get(String.valueOf(1)) == 3l);
    // For 3 -> "nicolas"("nicola")
    assertTrue(size2Frequency.get(String.valueOf(3)) == 1l);
    // For 2 -> "sebas","nigula"
    assertTrue(size2Frequency.get(String.valueOf(2)) == 2l);
    // Assert row count, unique records, matched records and suspect records.
    assertTrue(matchIndicator.getCount() == 14);
    assertTrue(matchIndicator.getMatchedRecordCount() == 11);
    assertTrue(matchIndicator.getSuspectRecordCount() == 0);
}
Also used : RecordMatchingIndicator(org.talend.dataquality.indicators.columnset.RecordMatchingIndicator)

Example 9 with RecordMatchingIndicator

use of org.talend.dataquality.indicators.columnset.RecordMatchingIndicator in project tdq-studio-se by Talend.

the class MatchAnalysisExecutorTest method assertScenario3.

/**
 * DOC zhao Comment method "assertScenario3".
 *
 * @param matchAnalysisExecutor
 * @param analysis
 * @param name
 * @param nameVar
 */
private void assertScenario3(MatchAnalysisExecutor matchAnalysisExecutor, Analysis analysis, MetadataColumn name, String nameVar, double groupQualityThreshold, double matchInterval) {
    // Set indicators into analysis result.
    RecordMatchingIndicator matchIndicator = ColumnsetPackage.eINSTANCE.getColumnsetFactory().createRecordMatchingIndicator();
    // Match key: name, no block key, levenshtein attribute algorithm.
    matchIndicator.setAnalyzedElement(name);
    createMatchIndicatorWithOneMathRule(nameVar, matchIndicator, groupQualityThreshold, matchInterval);
    executeAnalysis(matchAnalysisExecutor, analysis, matchIndicator);
    // Assert group size and frequency.
    Map<Object, Long> size2Frequency = matchIndicator.getGroupSize2groupFrequency();
    // For 4 -> "seb"
    assertTrue(size2Frequency.get(String.valueOf(4)) == 1l);
    // For 1 -> "Sebastião","babass","nico"
    assertTrue(size2Frequency.get(String.valueOf(1)) == 3l);
    // For 3 -> "nicolas"("nicola")
    assertTrue(size2Frequency.get(String.valueOf(3)) == 1l);
    // For 2 -> "sebas","nigula"
    assertTrue(size2Frequency.get(String.valueOf(2)) == 2l);
    // Assert row count, unique records, matched records and suspect records.
    assertTrue(matchIndicator.getCount() == 14);
    assertTrue(matchIndicator.getMatchedRecordCount() == 8);
    // For 3 -> "nicolas"("nicola"), group score: 0.9 <
    assertTrue(matchIndicator.getSuspectRecordCount() == 3);
// 0.95
}
Also used : RecordMatchingIndicator(org.talend.dataquality.indicators.columnset.RecordMatchingIndicator)

Example 10 with RecordMatchingIndicator

use of org.talend.dataquality.indicators.columnset.RecordMatchingIndicator in project tdq-studio-se by Talend.

the class ItemRecord method includeCustomMatcherJarDependencies.

/**
 * DOC zshen Comment method "includeCustomMatcherJarDependencies".
 *
 * @param matchAnalysis
 */
private void includeCustomMatcherJarDependencies(Analysis matchAnalysis) {
    RecordMatchingIndicator recordMatchIndicatorFromAna = AnalysisHelper.getRecordMatchIndicatorFromAna(matchAnalysis);
    MatchRuleDefinition builtInMatchRuleDefinition = recordMatchIndicatorFromAna.getBuiltInMatchRuleDefinition();
    includeCustomMatcherJarDependencies(builtInMatchRuleDefinition);
}
Also used : MatchRuleDefinition(org.talend.dataquality.rules.MatchRuleDefinition) RecordMatchingIndicator(org.talend.dataquality.indicators.columnset.RecordMatchingIndicator)

Aggregations

RecordMatchingIndicator (org.talend.dataquality.indicators.columnset.RecordMatchingIndicator)32 ArrayList (java.util.ArrayList)9 MetadataColumn (org.talend.core.model.metadata.builder.connection.MetadataColumn)8 MatchRuleDefinition (org.talend.dataquality.rules.MatchRuleDefinition)8 BlockKeyIndicator (org.talend.dataquality.indicators.columnset.BlockKeyIndicator)7 MatchGroupResultConsumer (org.talend.dataquality.record.linkage.grouping.MatchGroupResultConsumer)7 BlockKeyDefinition (org.talend.dataquality.rules.BlockKeyDefinition)7 HashMap (java.util.HashMap)6 AlgorithmDefinition (org.talend.dataquality.rules.AlgorithmDefinition)6 MatchRule (org.talend.dataquality.rules.MatchRule)6 Test (org.junit.Test)5 Indicator (org.talend.dataquality.indicators.Indicator)5 MatchKeyDefinition (org.talend.dataquality.rules.MatchKeyDefinition)5 ExecuteMatchRuleHandler (org.talend.dq.analysis.match.ExecuteMatchRuleHandler)5 ReturnCode (org.talend.utils.sugars.ReturnCode)3 TypedReturnCode (org.talend.utils.sugars.TypedReturnCode)3 GridLayout (org.eclipse.swt.layout.GridLayout)2 Composite (org.eclipse.swt.widgets.Composite)2 Analysis (org.talend.dataquality.analysis.Analysis)2 KeyDefinition (org.talend.dataquality.rules.KeyDefinition)2