use of org.talend.dataquality.indicators.columnset.BlockKeyIndicator in project tdq-studio-se by Talend.
the class ExecuteMatchRuleHandlerTest method testExecute4.
/**
* Test method for
* {@link org.talend.dq.analysis.ExecuteMatchRuleHandler#execute(java.util.Map, org.talend.dataquality.indicators.columnset.RecordMatchingIndicator, java.util.List, org.talend.dataquality.indicators.columnset.BlockKeyIndicator)}
* .
*
* same to case 3 but the match rule order is exchange
*/
@Test
public void testExecute4() {
Map<MetadataColumn, String> columnMap = new HashMap<MetadataColumn, String>();
MetadataColumn col0 = ConnectionFactory.eINSTANCE.createMetadataColumn();
col0.setName(columnName0);
// $NON-NLS-1$
columnMap.put(col0, "0");
MetadataColumn col1 = ConnectionFactory.eINSTANCE.createMetadataColumn();
col1.setName(columnName1);
// $NON-NLS-1$
columnMap.put(col1, "1");
MetadataColumn col2 = ConnectionFactory.eINSTANCE.createMetadataColumn();
col2.setName(columnName2);
// $NON-NLS-1$
columnMap.put(col2, "2");
MetadataColumn col3 = ConnectionFactory.eINSTANCE.createMetadataColumn();
col3.setName(columnName3);
// $NON-NLS-1$
columnMap.put(col3, "3");
RecordMatchingIndicator recordMatchingIndicator = ColumnsetFactory.eINSTANCE.createRecordMatchingIndicator();
MatchRuleDefinition matchRuleDef = RulesPackage.eINSTANCE.getRulesFactory().createMatchRuleDefinition();
recordMatchingIndicator.setBuiltInMatchRuleDefinition(matchRuleDef);
// create match rule
MatchRule matchRule2 = RulesFactory.eINSTANCE.createMatchRule();
MatchKeyDefinition createMatchKeyDefinition2 = RulesFactory.eINSTANCE.createMatchKeyDefinition();
matchRule2.getMatchKeys().add(createMatchKeyDefinition2);
createMatchKeyDefinition2.setColumn(columnName3);
createMatchKeyDefinition2.setConfidenceWeight(1);
// $NON-NLS-1$
createMatchKeyDefinition2.setName("rule1.matchkey1");
createMatchKeyDefinition2.setHandleNull(HandleNullEnum.NULL_MATCH_NULL.getValue());
AlgorithmDefinition createAlgorithmDefinition2 = RulesFactory.eINSTANCE.createAlgorithmDefinition();
createAlgorithmDefinition2.setAlgorithmType(AttributeMatcherType.EXACT.name());
createMatchKeyDefinition2.setAlgorithm(createAlgorithmDefinition2);
matchRuleDef.getMatchRules().add(matchRule2);
// create match rule
MatchRule matchRule1 = RulesFactory.eINSTANCE.createMatchRule();
MatchKeyDefinition createMatchKeyDefinition1 = RulesFactory.eINSTANCE.createMatchKeyDefinition();
matchRule1.getMatchKeys().add(createMatchKeyDefinition1);
createMatchKeyDefinition1.setColumn(columnName2);
createMatchKeyDefinition1.setConfidenceWeight(1);
// $NON-NLS-1$
createMatchKeyDefinition1.setName("rule1.matchkey1");
createMatchKeyDefinition1.setHandleNull(HandleNullEnum.NULL_MATCH_NULL.getValue());
AlgorithmDefinition createAlgorithmDefinition1 = RulesFactory.eINSTANCE.createAlgorithmDefinition();
createAlgorithmDefinition1.setAlgorithmType(AttributeMatcherType.EXACT.name());
createMatchKeyDefinition1.setAlgorithm(createAlgorithmDefinition1);
matchRuleDef.getMatchRules().add(matchRule1);
// create block key
BlockKeyDefinition createBlockKeyDefinition = RulesFactory.eINSTANCE.createBlockKeyDefinition();
createBlockKeyDefinition.setColumn(columnName1);
// $NON-NLS-1$
createBlockKeyDefinition.setName("blockKey1");
// setPreAlgorithm
AlgorithmDefinition blockPreAlgorithm = RulesFactory.eINSTANCE.createAlgorithmDefinition();
blockPreAlgorithm.setAlgorithmType(BlockingKeyPreAlgorithmEnum.NON_ALGO.getValue());
createBlockKeyDefinition.setPreAlgorithm(blockPreAlgorithm);
// setAlgorithm
AlgorithmDefinition blockAlgorithm = RulesFactory.eINSTANCE.createAlgorithmDefinition();
blockAlgorithm.setAlgorithmType(BlockingKeyAlgorithmEnum.EXACT.getValue());
createBlockKeyDefinition.setAlgorithm(blockAlgorithm);
// setPostAlgorithm
AlgorithmDefinition blockPostAlgorithm = RulesFactory.eINSTANCE.createAlgorithmDefinition();
blockPostAlgorithm.setAlgorithmType(BlockingKeyPostAlgorithmEnum.NON_ALGO.getValue());
createBlockKeyDefinition.setPostAlgorithm(blockPostAlgorithm);
matchRuleDef.getBlockKeys().add(createBlockKeyDefinition);
List<Object[]> matchRows = new ArrayList<Object[]>();
// $NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
matchRows.add(new String[] { "id1", "name1", "number1", "date1" });
// $NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
matchRows.add(new String[] { "id2", "name1", "number2", "date1" });
// $NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
matchRows.add(new String[] { "id3", "name2", "number2", "date3" });
// $NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
matchRows.add(new String[] { "id4", "name2", "number2", "date1" });
BlockKeyIndicator blockKeyIndicator = ColumnsetFactory.eINSTANCE.createBlockKeyIndicator();
ExecuteMatchRuleHandler execHandler = new ExecuteMatchRuleHandler();
MatchGroupResultConsumer matchResultConsumer = createMatchGroupResultConsumer(columnMap, recordMatchingIndicator);
TypedReturnCode<MatchGroupResultConsumer> executeResult = execHandler.execute(columnMap, recordMatchingIndicator, matchRows, blockKeyIndicator, matchResultConsumer);
Assert.assertTrue(executeResult.isOk());
Assert.assertTrue(executeResult.getMessage() == null);
Assert.assertTrue(executeResult.getObject() != null);
MatchGroupResultConsumer ResultConsumer = executeResult.getObject();
List<Object[]> fullMatchResult = ResultConsumer.getFullMatchResult();
Assert.assertTrue(fullMatchResult.size() == 4);
for (int i = 0; i < fullMatchResult.size(); i++) {
Object[] objectArray = fullMatchResult.get(i);
Object masterValue = objectArray[7];
Object idValue = objectArray[0];
// id2 is because of matchRule1 id4 is because of matchRule2
if ("id2".equals(idValue) || "id4".equals(idValue)) {
// $NON-NLS-1$ //$NON-NLS-2$
Assert.assertFalse(Boolean.parseBoolean(masterValue.toString()));
} else {
Assert.assertTrue(Boolean.parseBoolean(masterValue.toString()));
}
}
}
use of org.talend.dataquality.indicators.columnset.BlockKeyIndicator in project tdq-studio-se by Talend.
the class ExecuteMatchRuleHandlerTest method testExecute1.
/**
* Test method for
* {@link org.talend.dq.analysis.ExecuteMatchRuleHandler#execute(java.util.Map, org.talend.dataquality.indicators.columnset.RecordMatchingIndicator, java.util.List, org.talend.dataquality.indicators.columnset.BlockKeyIndicator)}
* .
*
* no block key one match key
*/
@Test
public void testExecute1() {
Map<MetadataColumn, String> columnMap = new HashMap<MetadataColumn, String>();
MetadataColumn col0 = ConnectionFactory.eINSTANCE.createMetadataColumn();
col0.setName(columnName0);
// $NON-NLS-1$
columnMap.put(col0, "0");
MetadataColumn col1 = ConnectionFactory.eINSTANCE.createMetadataColumn();
col1.setName(columnName1);
// $NON-NLS-1$
columnMap.put(col1, "1");
MetadataColumn col2 = ConnectionFactory.eINSTANCE.createMetadataColumn();
col2.setName(columnName2);
// $NON-NLS-1$
columnMap.put(col2, "2");
MetadataColumn col3 = ConnectionFactory.eINSTANCE.createMetadataColumn();
col3.setName(columnName3);
// $NON-NLS-1$
columnMap.put(col3, "3");
// create match key
RecordMatchingIndicator recordMatchingIndicator = ColumnsetFactory.eINSTANCE.createRecordMatchingIndicator();
MatchRuleDefinition matchRuleDef = RulesPackage.eINSTANCE.getRulesFactory().createMatchRuleDefinition();
recordMatchingIndicator.setBuiltInMatchRuleDefinition(matchRuleDef);
MatchRule createMatchRule1 = RulesFactory.eINSTANCE.createMatchRule();
MatchKeyDefinition createMatchKeyDefinition1 = RulesFactory.eINSTANCE.createMatchKeyDefinition();
createMatchRule1.getMatchKeys().add(createMatchKeyDefinition1);
createMatchKeyDefinition1.setColumn(columnName0);
createMatchKeyDefinition1.setConfidenceWeight(1);
// $NON-NLS-1$
createMatchKeyDefinition1.setName("rule1.matchkey1");
createMatchKeyDefinition1.setHandleNull(HandleNullEnum.NULL_MATCH_NULL.getValue());
AlgorithmDefinition createAlgorithmDefinition1 = RulesFactory.eINSTANCE.createAlgorithmDefinition();
createAlgorithmDefinition1.setAlgorithmType(AttributeMatcherType.EXACT.name());
createMatchKeyDefinition1.setAlgorithm(createAlgorithmDefinition1);
matchRuleDef.getMatchRules().add(createMatchRule1);
// input data
List<Object[]> matchRows = new ArrayList<Object[]>();
// $NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
matchRows.add(new String[] { "id1", "name1", "number1", "date1" });
// $NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
matchRows.add(new String[] { "id2", "name2", "number2", "date2" });
// $NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
matchRows.add(new String[] { "id3", "name1", "number3", "date3" });
// $NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
matchRows.add(new String[] { "id4", "name4", "number2", "date1" });
BlockKeyIndicator blockKeyIndicator = ColumnsetFactory.eINSTANCE.createBlockKeyIndicator();
ExecuteMatchRuleHandler execHandler = new ExecuteMatchRuleHandler();
MatchGroupResultConsumer matchResultConsumer = createMatchGroupResultConsumer(columnMap, recordMatchingIndicator);
TypedReturnCode<MatchGroupResultConsumer> executeResult = execHandler.execute(columnMap, recordMatchingIndicator, matchRows, blockKeyIndicator, matchResultConsumer);
Assert.assertTrue(executeResult.isOk());
Assert.assertTrue(executeResult.getMessage() == null);
Assert.assertTrue(executeResult.getObject() != null);
MatchGroupResultConsumer ResultConsumer = executeResult.getObject();
List<Object[]> fullMatchResult = ResultConsumer.getFullMatchResult();
Assert.assertTrue(fullMatchResult.size() == 4);
// every input data is master data
for (Object[] objectArray : fullMatchResult) {
Object object = objectArray[7];
Assert.assertTrue(Boolean.parseBoolean(object.toString()));
}
}
use of org.talend.dataquality.indicators.columnset.BlockKeyIndicator in project tdq-studio-se by Talend.
the class MatchAnalysisExecutorTest method executeAnalysis.
/**
* DOC zhao Comment method "executeAnalysis".
*
* @param matchAnalysisExecutor
* @param analysis
* @param matchIndicator
*/
private void executeAnalysis(MatchAnalysisExecutor matchAnalysisExecutor, Analysis analysis, RecordMatchingIndicator matchIndicator) {
BlockKeyIndicator blockKeyIndicator = ColumnsetPackage.eINSTANCE.getColumnsetFactory().createBlockKeyIndicator();
AnalysisResult anaResult = AnalysisPackage.eINSTANCE.getAnalysisFactory().createAnalysisResult();
anaResult.setResultMetadata(AnalysisPackage.eINSTANCE.getAnalysisFactory().createExecutionInformations());
analysis.setResults(anaResult);
analysis.getResults().getIndicators().add(matchIndicator);
analysis.getResults().getIndicators().add(blockKeyIndicator);
matchAnalysisExecutor.setMonitor(new NullProgressMonitor());
matchAnalysisExecutor.execute(analysis);
}
use of org.talend.dataquality.indicators.columnset.BlockKeyIndicator in project tdq-studio-se by Talend.
the class MatchAnalysisExecutor method execute.
/*
* (non-Javadoc)
*
* @see org.talend.dq.analysis.IAnalysisExecutor#execute(org.talend.dataquality.analysis.Analysis)
*/
public ReturnCode execute(Analysis analysis) {
assert analysis != null;
// --- preconditions
ReturnCode rc = AnalysisExecutorHelper.check(analysis);
if (!rc.isOk()) {
AnalysisExecutorHelper.setExecutionInfoInAnalysisResult(analysis, rc.isOk(), rc.getMessage());
return rc;
}
// --- creation time
final long startime = AnalysisExecutorHelper.setExecutionDateInAnalysisResult(analysis);
EList<Indicator> indicators = analysis.getResults().getIndicators();
RecordMatchingIndicator recordMatchingIndicator = null;
BlockKeyIndicator blockKeyIndicator = null;
for (Indicator ind : indicators) {
if (ind instanceof RecordMatchingIndicator) {
recordMatchingIndicator = (RecordMatchingIndicator) ind;
} else if (ind instanceof BlockKeyIndicator) {
blockKeyIndicator = (BlockKeyIndicator) ind;
}
}
if (recordMatchingIndicator == null || blockKeyIndicator == null) {
rc.setOk(Boolean.FALSE);
// $NON-NLS-1$
rc.setMessage(Messages.getString("MatchAnalysisExecutor.noIndicators"));
AnalysisExecutorHelper.setExecutionInfoInAnalysisResult(analysis, rc.isOk(), rc.getMessage());
return rc;
}
List<ModelElement> anlayzedElements = analysis.getContext().getAnalysedElements();
if (anlayzedElements == null || anlayzedElements.size() == 0) {
rc.setOk(Boolean.FALSE);
// $NON-NLS-1$
rc.setMessage(Messages.getString("MatchAnalysisExecutor.EmptyAnalyzedElement"));
AnalysisExecutorHelper.setExecutionInfoInAnalysisResult(analysis, rc.isOk(), rc.getMessage());
return rc;
}
// TDQ-9664 msjian: check the store on disk path.
Boolean isStoreOnDisk = TaggedValueHelper.getValueBoolean(SQLExecutor.STORE_ON_DISK_KEY, analysis);
if (isStoreOnDisk) {
String tempDataPath = TaggedValueHelper.getValueString(SQLExecutor.TEMP_DATA_DIR, analysis);
File file = new File(tempDataPath);
if (!file.exists() || !file.isDirectory()) {
rc.setOk(Boolean.FALSE);
// $NON-NLS-1$
rc.setMessage(Messages.getString("MatchAnalysisExecutor.InvalidPath", file.getPath()));
AnalysisExecutorHelper.setExecutionInfoInAnalysisResult(analysis, rc.isOk(), rc.getMessage());
return rc;
}
}
// TDQ-9664~
Map<MetadataColumn, String> columnMap = getColumn2IndexMap(anlayzedElements);
ISQLExecutor sqlExecutor = getSQLExectutor(analysis, recordMatchingIndicator, columnMap);
if (sqlExecutor == null) {
rc.setOk(Boolean.FALSE);
// $NON-NLS-1$
rc.setMessage(Messages.getString("MatchAnalysisExecutor.noSqlExecutor"));
AnalysisExecutorHelper.setExecutionInfoInAnalysisResult(analysis, rc.isOk(), rc.getMessage());
return rc;
}
if (getMonitor() != null) {
getMonitor().worked(20);
}
// Set schema for match key.
TypedReturnCode<MatchGroupResultConsumer> returnCode = new TypedReturnCode<MatchGroupResultConsumer>();
MetadataColumn[] completeColumnSchema = AnalysisRecordGroupingUtils.getCompleteColumnSchema(columnMap);
String[] colSchemaString = new String[completeColumnSchema.length];
int idx = 0;
for (MetadataColumn metadataCol : completeColumnSchema) {
colSchemaString[idx++] = metadataCol.getName();
}
recordMatchingIndicator.setMatchRowSchema(colSchemaString);
recordMatchingIndicator.reset();
MatchGroupResultConsumer matchResultConsumer = createMatchGroupResultConsumer(recordMatchingIndicator);
if (sqlExecutor.isStoreOnDisk()) {
// need to execute the query
try {
sqlExecutor.executeQuery(analysis.getContext().getConnection(), analysis.getContext().getAnalysedElements());
} catch (SQLException e) {
log.error(e, e);
rc.setOk(Boolean.FALSE);
rc.setMessage(e.getMessage());
AnalysisExecutorHelper.setExecutionInfoInAnalysisResult(analysis, rc.isOk(), rc.getMessage());
return rc;
}
try {
TypedReturnCode<Object> result = StoreOnDiskUtils.getDefault().executeWithStoreOnDisk(columnMap, recordMatchingIndicator, blockKeyIndicator, sqlExecutor.getStoreOnDiskHandler(), matchResultConsumer);
if (result != null) {
returnCode.setObject((MatchGroupResultConsumer) result.getObject());
returnCode.setOk(result.isOk());
returnCode.setMessage(result.getMessage());
}
} catch (Exception e) {
log.error(e, e);
returnCode.setMessage(e.getMessage());
returnCode.setOk(false);
}
} else {
// Added TDQ-9320 , use the result set iterator to replace the list of result in the memory.
try {
Iterator<Record> resultSetIterator = sqlExecutor.getResultSetIterator(analysis.getContext().getConnection(), anlayzedElements);
BlockAndMatchManager bAndmManager = new BlockAndMatchManager(resultSetIterator, matchResultConsumer, columnMap, recordMatchingIndicator, blockKeyIndicator);
bAndmManager.run();
} catch (SQLException e) {
log.error(e, e);
rc.setOk(Boolean.FALSE);
rc.setMessage(e.getMessage());
AnalysisExecutorHelper.setExecutionInfoInAnalysisResult(analysis, rc.isOk(), rc.getMessage());
return rc;
} catch (BusinessException e) {
log.error(e, e);
rc.setOk(Boolean.FALSE);
rc.setMessage(e.getMessage());
AnalysisExecutorHelper.setExecutionInfoInAnalysisResult(analysis, rc.isOk(), rc.getMessage());
return rc;
}
}
if (!returnCode.isOk()) {
rc.setOk(returnCode.isOk());
rc.setMessage(returnCode.getMessage());
}
if (getMonitor() != null) {
getMonitor().worked(20);
}
if (isLowMemory) {
// $NON-NLS-1$
rc.setMessage(Messages.getString("Evaluator.OutOfMomory", usedMemory));
}
// nodify the master page
refreshTableWithMatchFullResult(analysis);
// --- set metadata information of analysis
AnalysisExecutorHelper.setExecutionInfoInAnalysisResult(analysis, rc.isOk(), rc.getMessage());
// --- compute execution duration
if (this.continueRun()) {
long endtime = System.currentTimeMillis();
final ExecutionInformations resultMetadata = analysis.getResults().getResultMetadata();
resultMetadata.setExecutionDuration((int) (endtime - startime));
resultMetadata.setOutThreshold(false);
}
if (getMonitor() != null) {
getMonitor().worked(20);
}
return rc;
}
use of org.talend.dataquality.indicators.columnset.BlockKeyIndicator in project tdq-studio-se by Talend.
the class MatchRuleAnlaysisUtils method getNeedIndicatorFromAna.
/**
* Get recording matching indicator and Blocking Indicator from analysis
*
* @param analysis
* @return the index 0 will be RecordMatchingIndicator and index 1 will be BlockKeyIndicator
*/
public static Object[] getNeedIndicatorFromAna(Analysis analysis) {
Object[] returnList = new Object[2];
EList<Indicator> indicators = analysis.getResults().getIndicators();
for (Indicator ind : indicators) {
if (ind instanceof RecordMatchingIndicator) {
returnList[0] = ind;
} else if (ind instanceof BlockKeyIndicator) {
returnList[1] = ind;
}
}
// If match rule definition is null, create a default.
if (returnList[0] == null) {
returnList[0] = ColumnsetPackage.eINSTANCE.getColumnsetFactory().createRecordMatchingIndicator();
}
// If blocking key indicator is nul, create a default.
if (returnList[1] == null) {
returnList[1] = ColumnsetPackage.eINSTANCE.getColumnsetFactory().createBlockKeyIndicator();
}
return returnList;
}
Aggregations