Search in sources :

Example 11 with RecordMatchingIndicator

use of org.talend.dataquality.indicators.columnset.RecordMatchingIndicator in project tdq-studio-se by Talend.

the class MatchAnalysisExecutor method execute.

/*
     * (non-Javadoc)
     * 
     * @see org.talend.dq.analysis.IAnalysisExecutor#execute(org.talend.dataquality.analysis.Analysis)
     */
public ReturnCode execute(Analysis analysis) {
    assert analysis != null;
    // --- preconditions
    ReturnCode rc = AnalysisExecutorHelper.check(analysis);
    if (!rc.isOk()) {
        AnalysisExecutorHelper.setExecutionInfoInAnalysisResult(analysis, rc.isOk(), rc.getMessage());
        return rc;
    }
    // --- creation time
    final long startime = AnalysisExecutorHelper.setExecutionDateInAnalysisResult(analysis);
    EList<Indicator> indicators = analysis.getResults().getIndicators();
    RecordMatchingIndicator recordMatchingIndicator = null;
    BlockKeyIndicator blockKeyIndicator = null;
    for (Indicator ind : indicators) {
        if (ind instanceof RecordMatchingIndicator) {
            recordMatchingIndicator = (RecordMatchingIndicator) ind;
        } else if (ind instanceof BlockKeyIndicator) {
            blockKeyIndicator = (BlockKeyIndicator) ind;
        }
    }
    if (recordMatchingIndicator == null || blockKeyIndicator == null) {
        rc.setOk(Boolean.FALSE);
        // $NON-NLS-1$
        rc.setMessage(Messages.getString("MatchAnalysisExecutor.noIndicators"));
        AnalysisExecutorHelper.setExecutionInfoInAnalysisResult(analysis, rc.isOk(), rc.getMessage());
        return rc;
    }
    List<ModelElement> anlayzedElements = analysis.getContext().getAnalysedElements();
    if (anlayzedElements == null || anlayzedElements.size() == 0) {
        rc.setOk(Boolean.FALSE);
        // $NON-NLS-1$
        rc.setMessage(Messages.getString("MatchAnalysisExecutor.EmptyAnalyzedElement"));
        AnalysisExecutorHelper.setExecutionInfoInAnalysisResult(analysis, rc.isOk(), rc.getMessage());
        return rc;
    }
    // TDQ-9664 msjian: check the store on disk path.
    Boolean isStoreOnDisk = TaggedValueHelper.getValueBoolean(SQLExecutor.STORE_ON_DISK_KEY, analysis);
    if (isStoreOnDisk) {
        String tempDataPath = TaggedValueHelper.getValueString(SQLExecutor.TEMP_DATA_DIR, analysis);
        File file = new File(tempDataPath);
        if (!file.exists() || !file.isDirectory()) {
            rc.setOk(Boolean.FALSE);
            // $NON-NLS-1$
            rc.setMessage(Messages.getString("MatchAnalysisExecutor.InvalidPath", file.getPath()));
            AnalysisExecutorHelper.setExecutionInfoInAnalysisResult(analysis, rc.isOk(), rc.getMessage());
            return rc;
        }
    }
    // TDQ-9664~
    Map<MetadataColumn, String> columnMap = getColumn2IndexMap(anlayzedElements);
    ISQLExecutor sqlExecutor = getSQLExectutor(analysis, recordMatchingIndicator, columnMap);
    if (sqlExecutor == null) {
        rc.setOk(Boolean.FALSE);
        // $NON-NLS-1$
        rc.setMessage(Messages.getString("MatchAnalysisExecutor.noSqlExecutor"));
        AnalysisExecutorHelper.setExecutionInfoInAnalysisResult(analysis, rc.isOk(), rc.getMessage());
        return rc;
    }
    if (getMonitor() != null) {
        getMonitor().worked(20);
    }
    // Set schema for match key.
    TypedReturnCode<MatchGroupResultConsumer> returnCode = new TypedReturnCode<MatchGroupResultConsumer>();
    MetadataColumn[] completeColumnSchema = AnalysisRecordGroupingUtils.getCompleteColumnSchema(columnMap);
    String[] colSchemaString = new String[completeColumnSchema.length];
    int idx = 0;
    for (MetadataColumn metadataCol : completeColumnSchema) {
        colSchemaString[idx++] = metadataCol.getName();
    }
    recordMatchingIndicator.setMatchRowSchema(colSchemaString);
    recordMatchingIndicator.reset();
    MatchGroupResultConsumer matchResultConsumer = createMatchGroupResultConsumer(recordMatchingIndicator);
    if (sqlExecutor.isStoreOnDisk()) {
        // need to execute the query
        try {
            sqlExecutor.executeQuery(analysis.getContext().getConnection(), analysis.getContext().getAnalysedElements());
        } catch (SQLException e) {
            log.error(e, e);
            rc.setOk(Boolean.FALSE);
            rc.setMessage(e.getMessage());
            AnalysisExecutorHelper.setExecutionInfoInAnalysisResult(analysis, rc.isOk(), rc.getMessage());
            return rc;
        }
        try {
            TypedReturnCode<Object> result = StoreOnDiskUtils.getDefault().executeWithStoreOnDisk(columnMap, recordMatchingIndicator, blockKeyIndicator, sqlExecutor.getStoreOnDiskHandler(), matchResultConsumer);
            if (result != null) {
                returnCode.setObject((MatchGroupResultConsumer) result.getObject());
                returnCode.setOk(result.isOk());
                returnCode.setMessage(result.getMessage());
            }
        } catch (Exception e) {
            log.error(e, e);
            returnCode.setMessage(e.getMessage());
            returnCode.setOk(false);
        }
    } else {
        // Added TDQ-9320 , use the result set iterator to replace the list of result in the memory.
        try {
            Iterator<Record> resultSetIterator = sqlExecutor.getResultSetIterator(analysis.getContext().getConnection(), anlayzedElements);
            BlockAndMatchManager bAndmManager = new BlockAndMatchManager(resultSetIterator, matchResultConsumer, columnMap, recordMatchingIndicator, blockKeyIndicator);
            bAndmManager.run();
        } catch (SQLException e) {
            log.error(e, e);
            rc.setOk(Boolean.FALSE);
            rc.setMessage(e.getMessage());
            AnalysisExecutorHelper.setExecutionInfoInAnalysisResult(analysis, rc.isOk(), rc.getMessage());
            return rc;
        } catch (BusinessException e) {
            log.error(e, e);
            rc.setOk(Boolean.FALSE);
            rc.setMessage(e.getMessage());
            AnalysisExecutorHelper.setExecutionInfoInAnalysisResult(analysis, rc.isOk(), rc.getMessage());
            return rc;
        }
    }
    if (!returnCode.isOk()) {
        rc.setOk(returnCode.isOk());
        rc.setMessage(returnCode.getMessage());
    }
    if (getMonitor() != null) {
        getMonitor().worked(20);
    }
    if (isLowMemory) {
        // $NON-NLS-1$
        rc.setMessage(Messages.getString("Evaluator.OutOfMomory", usedMemory));
    }
    // nodify the master page
    refreshTableWithMatchFullResult(analysis);
    // --- set metadata information of analysis
    AnalysisExecutorHelper.setExecutionInfoInAnalysisResult(analysis, rc.isOk(), rc.getMessage());
    // --- compute execution duration
    if (this.continueRun()) {
        long endtime = System.currentTimeMillis();
        final ExecutionInformations resultMetadata = analysis.getResults().getResultMetadata();
        resultMetadata.setExecutionDuration((int) (endtime - startime));
        resultMetadata.setOutThreshold(false);
    }
    if (getMonitor() != null) {
        getMonitor().worked(20);
    }
    return rc;
}
Also used : BlockKeyIndicator(org.talend.dataquality.indicators.columnset.BlockKeyIndicator) ISQLExecutor(org.talend.cwm.db.connection.ISQLExecutor) SQLException(java.sql.SQLException) ModelElement(orgomg.cwm.objectmodel.core.ModelElement) MetadataColumn(org.talend.core.model.metadata.builder.connection.MetadataColumn) BusinessException(org.talend.commons.exception.BusinessException) Record(org.talend.dataquality.matchmerge.Record) TypedReturnCode(org.talend.utils.sugars.TypedReturnCode) ReturnCode(org.talend.utils.sugars.ReturnCode) RecordMatchingIndicator(org.talend.dataquality.indicators.columnset.RecordMatchingIndicator) RecordMatchingIndicator(org.talend.dataquality.indicators.columnset.RecordMatchingIndicator) Indicator(org.talend.dataquality.indicators.Indicator) BlockKeyIndicator(org.talend.dataquality.indicators.columnset.BlockKeyIndicator) SQLException(java.sql.SQLException) BusinessException(org.talend.commons.exception.BusinessException) ExecutionInformations(org.talend.dataquality.analysis.ExecutionInformations) MatchGroupResultConsumer(org.talend.dataquality.record.linkage.grouping.MatchGroupResultConsumer) TypedReturnCode(org.talend.utils.sugars.TypedReturnCode) BlockAndMatchManager(org.talend.dq.analysis.match.BlockAndMatchManager) File(java.io.File)

Example 12 with RecordMatchingIndicator

use of org.talend.dataquality.indicators.columnset.RecordMatchingIndicator in project tdq-studio-se by Talend.

the class MatchWizard method initCWMResourceBuilder.

/*
     * (non-Javadoc)
     * 
     * @see org.talend.dataprofiler.core.ui.wizard.analysis.column.ColumnSetWizard#initCWMResourceBuilder()
     */
@Override
public ModelElement initCWMResourceBuilder() {
    Analysis analysis = (Analysis) super.initCWMResourceBuilder();
    // New blocking key indicator.
    Indicator blockKeyIndicator = ColumnsetFactory.eINSTANCE.createBlockKeyIndicator();
    analysis.getResults().getIndicators().add(blockKeyIndicator);
    // Match rule indicator
    RecordMatchingIndicator matchRuleIndicator = ColumnsetFactory.eINSTANCE.createRecordMatchingIndicator();
    MatchRuleDefinition matchRuleDefinition = RulesFactory.eINSTANCE.createMatchRuleDefinition();
    matchRuleIndicator.setBuiltInMatchRuleDefinition(matchRuleDefinition);
    analysis.getResults().getIndicators().add(matchRuleIndicator);
    // default loaded row count
    IPreferenceStore preferenceStore = CorePlugin.getDefault().getPreferenceStore();
    int maxRows = preferenceStore.getInt(PluginConstant.MAX_NB_ROWS_ANALYSIS_EDITOR);
    TaggedValueHelper.setTaggedValue(analysis, TaggedValueHelper.PREVIEW_ROW_NUMBER, String.valueOf(maxRows));
    return analysis;
}
Also used : Analysis(org.talend.dataquality.analysis.Analysis) MatchRuleDefinition(org.talend.dataquality.rules.MatchRuleDefinition) RecordMatchingIndicator(org.talend.dataquality.indicators.columnset.RecordMatchingIndicator) IPreferenceStore(org.eclipse.jface.preference.IPreferenceStore) RecordMatchingIndicator(org.talend.dataquality.indicators.columnset.RecordMatchingIndicator) Indicator(org.talend.dataquality.indicators.Indicator)

Example 13 with RecordMatchingIndicator

use of org.talend.dataquality.indicators.columnset.RecordMatchingIndicator in project tdq-studio-se by Talend.

the class ExportMatchRuleAction method run.

/*
     * (non-Javadoc)
     * 
     * @see org.eclipse.ui.cheatsheets.ICheatSheetAction#run(java.lang.String[],
     * org.eclipse.ui.cheatsheets.ICheatSheetManager)
     */
public void run(String[] arg0, ICheatSheetManager arg1) {
    IEditorPart editor = PlatformUI.getWorkbench().getActiveWorkbenchWindow().getActivePage().getActiveEditor();
    if (editor instanceof MatchAnalysisEditor) {
        MatchAnalysisDetailsPage masterPage = (MatchAnalysisDetailsPage) ((MatchAnalysisEditor) editor).getMasterPage();
        RecordMatchingIndicator rmIndicator = MatchRuleAnlaysisUtils.getRecordMatchIndicatorFromAna(masterPage.getCurrentModelElement());
        this.matchRule = rmIndicator.getBuiltInMatchRuleDefinition();
        this.run();
    }
}
Also used : IEditorPart(org.eclipse.ui.IEditorPart) RecordMatchingIndicator(org.talend.dataquality.indicators.columnset.RecordMatchingIndicator) MatchAnalysisDetailsPage(org.talend.dataprofiler.core.ui.editor.analysis.MatchAnalysisDetailsPage) MatchAnalysisEditor(org.talend.dataprofiler.core.ui.editor.analysis.MatchAnalysisEditor)

Example 14 with RecordMatchingIndicator

use of org.talend.dataquality.indicators.columnset.RecordMatchingIndicator in project tdq-studio-se by Talend.

the class BlockingKeySection method isKeyDefinitionAdded.

/*
     * (non-Javadoc)
     * 
     * @see
     * org.talend.dataquality.record.linkage.ui.section.AbstractMatchAnaysisTableSection#isKeyDefinitionAdded(java.lang
     * .String)
     */
@Override
public Boolean isKeyDefinitionAdded(String columnName) {
    Boolean isAdded = Boolean.FALSE;
    RecordMatchingIndicator recordMatchingIndicator = MatchRuleAnlaysisUtils.getRecordMatchIndicatorFromAna(analysis);
    List<BlockKeyDefinition> keyDefs = recordMatchingIndicator.getBuiltInMatchRuleDefinition().getBlockKeys();
    for (KeyDefinition keyDef : keyDefs) {
        // the key's name can NOT be same, the column can be same
        if (StringUtils.equals(columnName, keyDef.getName())) {
            isAdded = Boolean.TRUE;
            break;
        }
    }
    return isAdded;
}
Also used : BlockKeyDefinition(org.talend.dataquality.rules.BlockKeyDefinition) RecordMatchingIndicator(org.talend.dataquality.indicators.columnset.RecordMatchingIndicator) BlockKeyDefinition(org.talend.dataquality.rules.BlockKeyDefinition) KeyDefinition(org.talend.dataquality.rules.KeyDefinition)

Example 15 with RecordMatchingIndicator

use of org.talend.dataquality.indicators.columnset.RecordMatchingIndicator in project tdq-studio-se by Talend.

the class BlockingKeySection method getSelectedColumnAsBlockKeys.

/**
 * get all columns which is selected as blocking key
 *
 * @return
 */
public List<String> getSelectedColumnAsBlockKeys() {
    List<String> keyColumns = new ArrayList<String>();
    RecordMatchingIndicator recordMatchingIndicator = MatchRuleAnlaysisUtils.getRecordMatchIndicatorFromAna(analysis);
    List<BlockKeyDefinition> keyDefs = recordMatchingIndicator.getBuiltInMatchRuleDefinition().getBlockKeys();
    if (keyDefs.size() > 0) {
        for (KeyDefinition keydef : keyDefs) {
            keyColumns.add(keydef.getColumn());
        }
    }
    return keyColumns;
}
Also used : ArrayList(java.util.ArrayList) BlockKeyDefinition(org.talend.dataquality.rules.BlockKeyDefinition) RecordMatchingIndicator(org.talend.dataquality.indicators.columnset.RecordMatchingIndicator) BlockKeyDefinition(org.talend.dataquality.rules.BlockKeyDefinition) KeyDefinition(org.talend.dataquality.rules.KeyDefinition)

Aggregations

RecordMatchingIndicator (org.talend.dataquality.indicators.columnset.RecordMatchingIndicator)32 ArrayList (java.util.ArrayList)9 MetadataColumn (org.talend.core.model.metadata.builder.connection.MetadataColumn)8 MatchRuleDefinition (org.talend.dataquality.rules.MatchRuleDefinition)8 BlockKeyIndicator (org.talend.dataquality.indicators.columnset.BlockKeyIndicator)7 MatchGroupResultConsumer (org.talend.dataquality.record.linkage.grouping.MatchGroupResultConsumer)7 BlockKeyDefinition (org.talend.dataquality.rules.BlockKeyDefinition)7 HashMap (java.util.HashMap)6 AlgorithmDefinition (org.talend.dataquality.rules.AlgorithmDefinition)6 MatchRule (org.talend.dataquality.rules.MatchRule)6 Test (org.junit.Test)5 Indicator (org.talend.dataquality.indicators.Indicator)5 MatchKeyDefinition (org.talend.dataquality.rules.MatchKeyDefinition)5 ExecuteMatchRuleHandler (org.talend.dq.analysis.match.ExecuteMatchRuleHandler)5 ReturnCode (org.talend.utils.sugars.ReturnCode)3 TypedReturnCode (org.talend.utils.sugars.TypedReturnCode)3 GridLayout (org.eclipse.swt.layout.GridLayout)2 Composite (org.eclipse.swt.widgets.Composite)2 Analysis (org.talend.dataquality.analysis.Analysis)2 KeyDefinition (org.talend.dataquality.rules.KeyDefinition)2