Search in sources :

Example 86 with Indicator

use of org.talend.dataquality.indicators.Indicator in project tdq-studio-se by Talend.

the class DelimitedFileIndicatorEvaluator method executeSqlQuery.

@Override
protected ReturnCode executeSqlQuery(String sqlStatement) {
    ReturnCode returnCode = new ReturnCode(true);
    if (delimitedFileconnection == null) {
        delimitedFileconnection = (DelimitedFileConnection) analysis.getContext().getConnection();
    }
    if (delimitedFileconnection.isContextMode()) {
        IRepositoryContextService service = CoreRuntimePlugin.getInstance().getRepositoryContextService();
        delimitedFileconnection = (DelimitedFileConnection) service.cloneOriginalValueConnection(delimitedFileconnection);
    }
    String path = JavaSqlFactory.getURL(delimitedFileconnection);
    IPath iPath = new Path(path);
    File file = iPath.toFile();
    if (!file.exists()) {
        // $NON-NLS-1$
        returnCode.setReturnCode(Messages.getString("DelimitedFileIndicatorEvaluator.CanNotFindFile"), false);
        return returnCode;
    }
    List<ModelElement> analysisElementList = this.analysis.getContext().getAnalysedElements();
    EMap<Indicator, AnalyzedDataSet> indicToRowMap = analysis.getResults().getIndicToRowMap();
    indicToRowMap.clear();
    List<MetadataColumn> columnElementList = new ArrayList<MetadataColumn>();
    for (int i = 0; i < analysisElementList.size(); i++) {
        MetadataColumn mColumn = (MetadataColumn) analysisElementList.get(i);
        MetadataTable mTable = ColumnHelper.getColumnOwnerAsMetadataTable(mColumn);
        columnElementList = mTable == null ? columnElementList : mTable.getColumns();
        if (!columnElementList.isEmpty()) {
            break;
        }
    }
    ReturnCode readDataReturnCode = new ReturnCode(true);
    // use CsvReader to parse.
    if (Escape.CSV.equals(delimitedFileconnection.getEscapeType())) {
        readDataReturnCode = useCsvReader(file, analysisElementList, columnElementList, indicToRowMap);
    } else {
        readDataReturnCode = useDelimitedReader(analysisElementList, columnElementList, indicToRowMap);
    }
    // handle error message
    if (!readDataReturnCode.isOk()) {
        Display.getDefault().asyncExec(new Runnable() {

            public void run() {
                MessageDialog.openWarning(PlatformUI.getWorkbench().getActiveWorkbenchWindow().getShell(), // $NON-NLS-1$
                Messages.getString("DelimitedFileIndicatorEvaluator.badlyForm.Title"), // $NON-NLS-1$
                Messages.getString("DelimitedFileIndicatorEvaluator.badlyForm.Message"));
            }
        });
    }
    // Added yyin 20120608 TDQ-3589
    for (MetadataColumn col : columnElementList) {
        List<Indicator> indicators = getIndicators(col.getLabel());
        for (Indicator indicator : indicators) {
            if (indicator instanceof DuplicateCountIndicator) {
                AnalyzedDataSet analyzedDataSet = indicToRowMap.get(indicator);
                if (analyzedDataSet == null) {
                    analyzedDataSet = AnalysisFactory.eINSTANCE.createAnalyzedDataSet();
                    indicToRowMap.put(indicator, analyzedDataSet);
                    analyzedDataSet.setDataCount(analysis.getParameters().getMaxNumberRows());
                    analyzedDataSet.setRecordSize(0);
                }
                // indicator.finalizeComputation();
                addResultToIndicatorToRowMap(indicator, indicToRowMap);
            }
        }
    }
    return returnCode;
}
Also used : IPath(org.eclipse.core.runtime.IPath) Path(org.eclipse.core.runtime.Path) DuplicateCountIndicator(org.talend.dataquality.indicators.DuplicateCountIndicator) ReturnCode(org.talend.utils.sugars.ReturnCode) IPath(org.eclipse.core.runtime.IPath) AnalyzedDataSet(org.talend.dataquality.analysis.AnalyzedDataSet) ArrayList(java.util.ArrayList) IRepositoryContextService(org.talend.core.IRepositoryContextService) UniqueCountIndicator(org.talend.dataquality.indicators.UniqueCountIndicator) Indicator(org.talend.dataquality.indicators.Indicator) DuplicateCountIndicator(org.talend.dataquality.indicators.DuplicateCountIndicator) RowCountIndicator(org.talend.dataquality.indicators.RowCountIndicator) ModelElement(orgomg.cwm.objectmodel.core.ModelElement) MetadataColumn(org.talend.core.model.metadata.builder.connection.MetadataColumn) MetadataTable(org.talend.core.model.metadata.builder.connection.MetadataTable) File(java.io.File)

Example 87 with Indicator

use of org.talend.dataquality.indicators.Indicator in project tdq-studio-se by Talend.

the class DelimitedFileIndicatorEvaluator method handleByARow.

private ReturnCode handleByARow(String[] rowValues, long currentRow, List<ModelElement> analysisElementList, List<MetadataColumn> columnElementList, EMap<Indicator, AnalyzedDataSet> indicToRowMap) {
    ReturnCode returnCode = new ReturnCode(true);
    Object object = null;
    int maxNumberRows = analysis.getParameters().getMaxNumberRows();
    int recordIncrement = 0;
    element: for (int i = 0; i < analysisElementList.size(); i++) {
        MetadataColumn mColumn = (MetadataColumn) analysisElementList.get(i);
        Integer position = ColumnHelper.getColumnIndex(mColumn);
        // warning with a file of badly form
        if (position == null || position >= rowValues.length) {
            log.warn(// $NON-NLS-1$
            Messages.getString(// $NON-NLS-1$
            "DelimitedFileIndicatorEvaluator.incorrectData", mColumn.getLabel(), currentRow, delimitedFileconnection.getFilePath()));
            returnCode.setOk(false);
            continue;
        }
        object = TalendTypeConvert.convertToObject(mColumn.getTalendType(), rowValues[position], mColumn.getPattern());
        List<Indicator> indicators = getIndicators(mColumn.getLabel());
        for (Indicator indicator : indicators) {
            if (!continueRun()) {
                break element;
            }
            // bug 19036,to irregularly data,still compute for RowCountIndicator
            if (object == null && !(indicator instanceof RowCountIndicator)) {
                continue element;
            }
            // Added yyin 20120608 TDQ-3589
            if (indicator instanceof DuplicateCountIndicator) {
                ((DuplicateCountIndicator) indicator).handle(object, rowValues);
            } else {
                // ~
                indicator.handle(object);
            }
            AnalyzedDataSet analyzedDataSet = indicToRowMap.get(indicator);
            if (analyzedDataSet == null) {
                analyzedDataSet = AnalysisFactory.eINSTANCE.createAnalyzedDataSet();
                indicToRowMap.put(indicator, analyzedDataSet);
                analyzedDataSet.setDataCount(maxNumberRows);
                analyzedDataSet.setRecordSize(0);
            }
            // see IndicatorEvaluator line 166, the logic is almost the same
            if (analysis.getParameters().isStoreData()) {
                if (indicator.mustStoreRow()) {
                    List<Object[]> valueObjectList = initDataSet(indicator, indicToRowMap, object);
                    recordIncrement = valueObjectList.size();
                    List<Object> inputRowList = new ArrayList<Object>();
                    for (int j = 0; j < rowValues.length; j++) {
                        Object newobject = rowValues[j];
                        if (indicator.isUsedMapDBMode()) {
                            inputRowList.add(newobject == null ? PluginConstant.NULL_STRING : newobject);
                            continue;
                        } else {
                            if (recordIncrement < maxNumberRows) {
                                if (recordIncrement < valueObjectList.size()) {
                                    valueObjectList.get(recordIncrement)[j] = newobject;
                                } else {
                                    Object[] valueObject = new Object[rowValues.length];
                                    valueObject[j] = newobject;
                                    valueObjectList.add(valueObject);
                                }
                            } else {
                                break;
                            }
                        }
                    }
                    if (indicator.isUsedMapDBMode()) {
                        MapDBUtils.handleDrillDownData(object, inputRowList, indicator);
                    }
                } else if (indicator instanceof UniqueCountIndicator && analysis.getResults().getIndicToRowMap().get(indicator).getData() != null) {
                    List<Object[]> removeValueObjectList = analysis.getResults().getIndicToRowMap().get(indicator).getData();
                    if (columnElementList.size() == 0) {
                        continue;
                    }
                    int offsetting = columnElementList.indexOf(indicator.getAnalyzedElement());
                    for (Object[] dataObject : removeValueObjectList) {
                        // Added yyin 20120611 TDQ5279
                        if (object instanceof Integer) {
                            if (object.equals(Integer.parseInt((String) dataObject[offsetting]))) {
                                removeValueObjectList.remove(dataObject);
                                break;
                            }
                        }
                        // ~
                        if (dataObject[offsetting].equals(object)) {
                            removeValueObjectList.remove(dataObject);
                            break;
                        }
                    }
                }
            }
        }
    }
    return returnCode;
}
Also used : DuplicateCountIndicator(org.talend.dataquality.indicators.DuplicateCountIndicator) ReturnCode(org.talend.utils.sugars.ReturnCode) AnalyzedDataSet(org.talend.dataquality.analysis.AnalyzedDataSet) UniqueCountIndicator(org.talend.dataquality.indicators.UniqueCountIndicator) UniqueCountIndicator(org.talend.dataquality.indicators.UniqueCountIndicator) Indicator(org.talend.dataquality.indicators.Indicator) DuplicateCountIndicator(org.talend.dataquality.indicators.DuplicateCountIndicator) RowCountIndicator(org.talend.dataquality.indicators.RowCountIndicator) MetadataColumn(org.talend.core.model.metadata.builder.connection.MetadataColumn) ArrayList(java.util.ArrayList) List(java.util.List) RowCountIndicator(org.talend.dataquality.indicators.RowCountIndicator)

Example 88 with Indicator

use of org.talend.dataquality.indicators.Indicator in project tdq-studio-se by Talend.

the class ColumnSetIndicatorEvaluator method handleObjects.

/**
 * handle Objects and store data for delimited file .
 *
 * @param objectLs
 * @param rowValues
 * @param metadataColumn is one of analysedElements.it is used to get its Table then get the table's columns.
 */
private void handleObjects(EList<Object> objectLs, String[] rowValues, List<MetadataColumn> columnList) {
    if (objectLs.size() == 0) {
        return;
    }
    EList<Indicator> indicators = analysis.getResults().getIndicators();
    EMap<Indicator, AnalyzedDataSet> indicToRowMap = analysis.getResults().getIndicToRowMap();
    int recordIncrement = 0;
    if (indicators != null) {
        for (Indicator indicator : indicators) {
            if (!this.continueRun()) {
                break;
            }
            if (ColumnsetPackage.eINSTANCE.getColumnSetMultiValueIndicator().isSuperTypeOf(indicator.eClass())) {
                indicator.handle(objectLs);
                // feature 19192,store all rows value for RowCountIndicator
                if (indicator instanceof SimpleStatIndicator) {
                    SimpleStatIndicator simpIndi = (SimpleStatIndicator) indicator;
                    for (Indicator leafIndicator : simpIndi.getLeafIndicators()) {
                        if (!this.continueRun()) {
                            break;
                        }
                        // (distinct count, unique count, duplicate count, etc).
                        if (!analysis.getParameters().isStoreData()) {
                            // ~
                            continue;
                        }
                        List<Object[]> valueObjectList = initDataSet(leafIndicator, indicToRowMap);
                        recordIncrement = valueObjectList.size();
                        Object[] valueObject = new Object[columnList.size()];
                        if (recordIncrement < analysis.getParameters().getMaxNumberRows()) {
                            for (int j = 0; j < columnList.size(); j++) {
                                if (!this.continueRun()) {
                                    break;
                                }
                                Object newobject = PluginConstant.EMPTY_STRING;
                                // if (recordIncrement < analysis.getParameters().getMaxNumberRows()) {
                                if (j < rowValues.length) {
                                    newobject = rowValues[j];
                                }
                                if (recordIncrement < valueObjectList.size()) {
                                    valueObjectList.get(recordIncrement)[j] = newobject;
                                } else {
                                    valueObject[j] = newobject;
                                    valueObjectList.add(valueObject);
                                }
                            // }
                            }
                        }
                    }
                }
            }
        }
    }
}
Also used : AnalyzedDataSet(org.talend.dataquality.analysis.AnalyzedDataSet) SimpleStatIndicator(org.talend.dataquality.indicators.columnset.SimpleStatIndicator) UniqueCountIndicator(org.talend.dataquality.indicators.UniqueCountIndicator) Indicator(org.talend.dataquality.indicators.Indicator) DistinctCountIndicator(org.talend.dataquality.indicators.DistinctCountIndicator) AllMatchIndicator(org.talend.dataquality.indicators.columnset.AllMatchIndicator) SimpleStatIndicator(org.talend.dataquality.indicators.columnset.SimpleStatIndicator) DuplicateCountIndicator(org.talend.dataquality.indicators.DuplicateCountIndicator) RowCountIndicator(org.talend.dataquality.indicators.RowCountIndicator)

Example 89 with Indicator

use of org.talend.dataquality.indicators.Indicator in project tdq-studio-se by Talend.

the class AnalysisExecutorHelper method checkDependentFiles.

/**
 * Check the dependent file's existance. <br>
 * 1. If exist, do "hot" content copy from dependent file to built-in. <br>
 * 2. If not exist 1) built-in content is not empty, do nothing, 2) built-in content is empty, ReturnCode = false
 * and return. <br>
 * 3. Load indicator from built-in content.
 *
 * @param analysis
 * @return
 */
private static ReturnCode checkDependentFiles(Analysis analysis) {
    ReturnCode rc = new ReturnCode(Boolean.TRUE);
    List<Indicator> indicators = analysis.getResults().getIndicators();
    if (indicators.size() == 0) {
        rc.setOk(false);
        // $NON-NLS-1$
        rc.setMessage(Messages.getString("AnalysisExecutor.AnalysisNoIndicators", analysis.getName()));
        return rc;
    }
    // Loop indicators , check the dependeny file's existence.
    for (Indicator indicator : indicators) {
        if (indicator.getBuiltInIndicatorDefinition() != null) {
            // Built-in indicator already exist.
            continue;
        }
        // check pattern matching indicator
        rc = checkPatternMatchingIndicator(indicator);
        if (!rc.isOk()) {
            break;
        }
        // Check Indicators
        rc = checkIndicator(indicator);
        if (!rc.isOk()) {
            break;
        }
    }
    return rc;
}
Also used : ReturnCode(org.talend.utils.sugars.ReturnCode) RegexpMatchingIndicator(org.talend.dataquality.indicators.RegexpMatchingIndicator) AllMatchIndicator(org.talend.dataquality.indicators.columnset.AllMatchIndicator) UserDefIndicator(org.talend.dataquality.indicators.sql.UserDefIndicator) PatternMatchingIndicator(org.talend.dataquality.indicators.PatternMatchingIndicator) Indicator(org.talend.dataquality.indicators.Indicator) CompositeIndicator(org.talend.dataquality.indicators.CompositeIndicator)

Example 90 with Indicator

use of org.talend.dataquality.indicators.Indicator in project tdq-studio-se by Talend.

the class FrequencyTypeStateUtil method isWithRowCountIndicator.

public static boolean isWithRowCountIndicator(Indicator indicator) {
    ModelElement currentAnalyzedElement = indicator.getAnalyzedElement();
    InternalEObject eIndicator = (InternalEObject) indicator;
    AnalysisResult result = (AnalysisResult) eIndicator.eContainer();
    // MOD msjian TDQ-5960: fix a NPE
    if (result == null) {
        return false;
    }
    EList<Indicator> indicators = result.getIndicators();
    if (indicators != null) {
        for (Indicator indi : indicators) {
            ModelElement analyzedElement = indi.getAnalyzedElement();
            if (analyzedElement == currentAnalyzedElement) {
                if (indi instanceof RowCountIndicator) {
                    return true;
                } else if (indi instanceof CountsIndicator) {
                    CountsIndicator cindi = (CountsIndicator) indi;
                    return cindi.getRowCountIndicator() != null;
                }
            }
        }
    }
    return false;
}
Also used : ModelElement(orgomg.cwm.objectmodel.core.ModelElement) CountsIndicator(org.talend.dataquality.indicators.CountsIndicator) RowCountIndicator(org.talend.dataquality.indicators.RowCountIndicator) InternalEObject(org.eclipse.emf.ecore.InternalEObject) AnalysisResult(org.talend.dataquality.analysis.AnalysisResult) CountsIndicator(org.talend.dataquality.indicators.CountsIndicator) BenfordLawFrequencyIndicator(org.talend.dataquality.indicators.BenfordLawFrequencyIndicator) Indicator(org.talend.dataquality.indicators.Indicator) RowCountIndicator(org.talend.dataquality.indicators.RowCountIndicator)

Aggregations

Indicator (org.talend.dataquality.indicators.Indicator)178 ArrayList (java.util.ArrayList)58 RowCountIndicator (org.talend.dataquality.indicators.RowCountIndicator)40 CompositeIndicator (org.talend.dataquality.indicators.CompositeIndicator)31 ModelElement (orgomg.cwm.objectmodel.core.ModelElement)31 ModelElementIndicator (org.talend.dataprofiler.core.model.ModelElementIndicator)28 Analysis (org.talend.dataquality.analysis.Analysis)27 DuplicateCountIndicator (org.talend.dataquality.indicators.DuplicateCountIndicator)27 UniqueCountIndicator (org.talend.dataquality.indicators.UniqueCountIndicator)27 AllMatchIndicator (org.talend.dataquality.indicators.columnset.AllMatchIndicator)26 ReturnCode (org.talend.utils.sugars.ReturnCode)25 SimpleStatIndicator (org.talend.dataquality.indicators.columnset.SimpleStatIndicator)24 UserDefIndicator (org.talend.dataquality.indicators.sql.UserDefIndicator)24 PatternMatchingIndicator (org.talend.dataquality.indicators.PatternMatchingIndicator)23 DistinctCountIndicator (org.talend.dataquality.indicators.DistinctCountIndicator)22 NullCountIndicator (org.talend.dataquality.indicators.NullCountIndicator)22 TdColumn (org.talend.cwm.relational.TdColumn)20 WhereRuleIndicator (org.talend.dataquality.indicators.sql.WhereRuleIndicator)18 FrequencyIndicator (org.talend.dataquality.indicators.FrequencyIndicator)17 Test (org.junit.Test)16