use of org.talend.dataquality.indicators.columnset.RecordMatchingIndicator in project tdq-studio-se by Talend.
the class MatchAnalysisExecutor method execute.
/*
* (non-Javadoc)
*
* @see org.talend.dq.analysis.IAnalysisExecutor#execute(org.talend.dataquality.analysis.Analysis)
*/
public ReturnCode execute(Analysis analysis) {
assert analysis != null;
// --- preconditions
ReturnCode rc = AnalysisExecutorHelper.check(analysis);
if (!rc.isOk()) {
AnalysisExecutorHelper.setExecutionInfoInAnalysisResult(analysis, rc.isOk(), rc.getMessage());
return rc;
}
// --- creation time
final long startime = AnalysisExecutorHelper.setExecutionDateInAnalysisResult(analysis);
EList<Indicator> indicators = analysis.getResults().getIndicators();
RecordMatchingIndicator recordMatchingIndicator = null;
BlockKeyIndicator blockKeyIndicator = null;
for (Indicator ind : indicators) {
if (ind instanceof RecordMatchingIndicator) {
recordMatchingIndicator = (RecordMatchingIndicator) ind;
} else if (ind instanceof BlockKeyIndicator) {
blockKeyIndicator = (BlockKeyIndicator) ind;
}
}
if (recordMatchingIndicator == null || blockKeyIndicator == null) {
rc.setOk(Boolean.FALSE);
// $NON-NLS-1$
rc.setMessage(Messages.getString("MatchAnalysisExecutor.noIndicators"));
AnalysisExecutorHelper.setExecutionInfoInAnalysisResult(analysis, rc.isOk(), rc.getMessage());
return rc;
}
List<ModelElement> anlayzedElements = analysis.getContext().getAnalysedElements();
if (anlayzedElements == null || anlayzedElements.size() == 0) {
rc.setOk(Boolean.FALSE);
// $NON-NLS-1$
rc.setMessage(Messages.getString("MatchAnalysisExecutor.EmptyAnalyzedElement"));
AnalysisExecutorHelper.setExecutionInfoInAnalysisResult(analysis, rc.isOk(), rc.getMessage());
return rc;
}
// TDQ-9664 msjian: check the store on disk path.
Boolean isStoreOnDisk = TaggedValueHelper.getValueBoolean(SQLExecutor.STORE_ON_DISK_KEY, analysis);
if (isStoreOnDisk) {
String tempDataPath = TaggedValueHelper.getValueString(SQLExecutor.TEMP_DATA_DIR, analysis);
File file = new File(tempDataPath);
if (!file.exists() || !file.isDirectory()) {
rc.setOk(Boolean.FALSE);
// $NON-NLS-1$
rc.setMessage(Messages.getString("MatchAnalysisExecutor.InvalidPath", file.getPath()));
AnalysisExecutorHelper.setExecutionInfoInAnalysisResult(analysis, rc.isOk(), rc.getMessage());
return rc;
}
}
// TDQ-9664~
Map<MetadataColumn, String> columnMap = getColumn2IndexMap(anlayzedElements);
ISQLExecutor sqlExecutor = getSQLExectutor(analysis, recordMatchingIndicator, columnMap);
if (sqlExecutor == null) {
rc.setOk(Boolean.FALSE);
// $NON-NLS-1$
rc.setMessage(Messages.getString("MatchAnalysisExecutor.noSqlExecutor"));
AnalysisExecutorHelper.setExecutionInfoInAnalysisResult(analysis, rc.isOk(), rc.getMessage());
return rc;
}
if (getMonitor() != null) {
getMonitor().worked(20);
}
// Set schema for match key.
TypedReturnCode<MatchGroupResultConsumer> returnCode = new TypedReturnCode<MatchGroupResultConsumer>();
MetadataColumn[] completeColumnSchema = AnalysisRecordGroupingUtils.getCompleteColumnSchema(columnMap);
String[] colSchemaString = new String[completeColumnSchema.length];
int idx = 0;
for (MetadataColumn metadataCol : completeColumnSchema) {
colSchemaString[idx++] = metadataCol.getName();
}
recordMatchingIndicator.setMatchRowSchema(colSchemaString);
recordMatchingIndicator.reset();
MatchGroupResultConsumer matchResultConsumer = createMatchGroupResultConsumer(recordMatchingIndicator);
if (sqlExecutor.isStoreOnDisk()) {
// need to execute the query
try {
sqlExecutor.executeQuery(analysis.getContext().getConnection(), analysis.getContext().getAnalysedElements());
} catch (SQLException e) {
log.error(e, e);
rc.setOk(Boolean.FALSE);
rc.setMessage(e.getMessage());
AnalysisExecutorHelper.setExecutionInfoInAnalysisResult(analysis, rc.isOk(), rc.getMessage());
return rc;
}
try {
TypedReturnCode<Object> result = StoreOnDiskUtils.getDefault().executeWithStoreOnDisk(columnMap, recordMatchingIndicator, blockKeyIndicator, sqlExecutor.getStoreOnDiskHandler(), matchResultConsumer);
if (result != null) {
returnCode.setObject((MatchGroupResultConsumer) result.getObject());
returnCode.setOk(result.isOk());
returnCode.setMessage(result.getMessage());
}
} catch (Exception e) {
log.error(e, e);
returnCode.setMessage(e.getMessage());
returnCode.setOk(false);
}
} else {
// Added TDQ-9320 , use the result set iterator to replace the list of result in the memory.
try {
Iterator<Record> resultSetIterator = sqlExecutor.getResultSetIterator(analysis.getContext().getConnection(), anlayzedElements);
BlockAndMatchManager bAndmManager = new BlockAndMatchManager(resultSetIterator, matchResultConsumer, columnMap, recordMatchingIndicator, blockKeyIndicator);
bAndmManager.run();
} catch (SQLException e) {
log.error(e, e);
rc.setOk(Boolean.FALSE);
rc.setMessage(e.getMessage());
AnalysisExecutorHelper.setExecutionInfoInAnalysisResult(analysis, rc.isOk(), rc.getMessage());
return rc;
} catch (BusinessException e) {
log.error(e, e);
rc.setOk(Boolean.FALSE);
rc.setMessage(e.getMessage());
AnalysisExecutorHelper.setExecutionInfoInAnalysisResult(analysis, rc.isOk(), rc.getMessage());
return rc;
}
}
if (!returnCode.isOk()) {
rc.setOk(returnCode.isOk());
rc.setMessage(returnCode.getMessage());
}
if (getMonitor() != null) {
getMonitor().worked(20);
}
if (isLowMemory) {
// $NON-NLS-1$
rc.setMessage(Messages.getString("Evaluator.OutOfMomory", usedMemory));
}
// nodify the master page
refreshTableWithMatchFullResult(analysis);
// --- set metadata information of analysis
AnalysisExecutorHelper.setExecutionInfoInAnalysisResult(analysis, rc.isOk(), rc.getMessage());
// --- compute execution duration
if (this.continueRun()) {
long endtime = System.currentTimeMillis();
final ExecutionInformations resultMetadata = analysis.getResults().getResultMetadata();
resultMetadata.setExecutionDuration((int) (endtime - startime));
resultMetadata.setOutThreshold(false);
}
if (getMonitor() != null) {
getMonitor().worked(20);
}
return rc;
}
use of org.talend.dataquality.indicators.columnset.RecordMatchingIndicator in project tdq-studio-se by Talend.
the class MatchWizard method initCWMResourceBuilder.
/*
* (non-Javadoc)
*
* @see org.talend.dataprofiler.core.ui.wizard.analysis.column.ColumnSetWizard#initCWMResourceBuilder()
*/
@Override
public ModelElement initCWMResourceBuilder() {
Analysis analysis = (Analysis) super.initCWMResourceBuilder();
// New blocking key indicator.
Indicator blockKeyIndicator = ColumnsetFactory.eINSTANCE.createBlockKeyIndicator();
analysis.getResults().getIndicators().add(blockKeyIndicator);
// Match rule indicator
RecordMatchingIndicator matchRuleIndicator = ColumnsetFactory.eINSTANCE.createRecordMatchingIndicator();
MatchRuleDefinition matchRuleDefinition = RulesFactory.eINSTANCE.createMatchRuleDefinition();
matchRuleIndicator.setBuiltInMatchRuleDefinition(matchRuleDefinition);
analysis.getResults().getIndicators().add(matchRuleIndicator);
// default loaded row count
IPreferenceStore preferenceStore = CorePlugin.getDefault().getPreferenceStore();
int maxRows = preferenceStore.getInt(PluginConstant.MAX_NB_ROWS_ANALYSIS_EDITOR);
TaggedValueHelper.setTaggedValue(analysis, TaggedValueHelper.PREVIEW_ROW_NUMBER, String.valueOf(maxRows));
return analysis;
}
use of org.talend.dataquality.indicators.columnset.RecordMatchingIndicator in project tdq-studio-se by Talend.
the class ExportMatchRuleAction method run.
/*
* (non-Javadoc)
*
* @see org.eclipse.ui.cheatsheets.ICheatSheetAction#run(java.lang.String[],
* org.eclipse.ui.cheatsheets.ICheatSheetManager)
*/
public void run(String[] arg0, ICheatSheetManager arg1) {
IEditorPart editor = PlatformUI.getWorkbench().getActiveWorkbenchWindow().getActivePage().getActiveEditor();
if (editor instanceof MatchAnalysisEditor) {
MatchAnalysisDetailsPage masterPage = (MatchAnalysisDetailsPage) ((MatchAnalysisEditor) editor).getMasterPage();
RecordMatchingIndicator rmIndicator = MatchRuleAnlaysisUtils.getRecordMatchIndicatorFromAna(masterPage.getCurrentModelElement());
this.matchRule = rmIndicator.getBuiltInMatchRuleDefinition();
this.run();
}
}
use of org.talend.dataquality.indicators.columnset.RecordMatchingIndicator in project tdq-studio-se by Talend.
the class BlockingKeySection method isKeyDefinitionAdded.
/*
* (non-Javadoc)
*
* @see
* org.talend.dataquality.record.linkage.ui.section.AbstractMatchAnaysisTableSection#isKeyDefinitionAdded(java.lang
* .String)
*/
@Override
public Boolean isKeyDefinitionAdded(String columnName) {
Boolean isAdded = Boolean.FALSE;
RecordMatchingIndicator recordMatchingIndicator = MatchRuleAnlaysisUtils.getRecordMatchIndicatorFromAna(analysis);
List<BlockKeyDefinition> keyDefs = recordMatchingIndicator.getBuiltInMatchRuleDefinition().getBlockKeys();
for (KeyDefinition keyDef : keyDefs) {
// the key's name can NOT be same, the column can be same
if (StringUtils.equals(columnName, keyDef.getName())) {
isAdded = Boolean.TRUE;
break;
}
}
return isAdded;
}
use of org.talend.dataquality.indicators.columnset.RecordMatchingIndicator in project tdq-studio-se by Talend.
the class BlockingKeySection method getSelectedColumnAsBlockKeys.
/**
* get all columns which is selected as blocking key
*
* @return
*/
public List<String> getSelectedColumnAsBlockKeys() {
List<String> keyColumns = new ArrayList<String>();
RecordMatchingIndicator recordMatchingIndicator = MatchRuleAnlaysisUtils.getRecordMatchIndicatorFromAna(analysis);
List<BlockKeyDefinition> keyDefs = recordMatchingIndicator.getBuiltInMatchRuleDefinition().getBlockKeys();
if (keyDefs.size() > 0) {
for (KeyDefinition keydef : keyDefs) {
keyColumns.add(keydef.getColumn());
}
}
return keyColumns;
}
Aggregations