Search in sources :

Example 6 with AnalyzedDataSet

use of org.talend.dataquality.analysis.AnalyzedDataSet in project tdq-studio-se by Talend.

the class DrillDownUtils method createDrillDownMenuForJava.

/**
 * DOC talend Comment method "createDrillDownMenu".
 *
 * @param analysis
 * @param dataEntity
 * @param menu
 * @param itemEntities
 * @param analysis
 */
public static void createDrillDownMenuForJava(final ChartDataEntity dataEntity, Menu menu, MenuItemEntity[] itemEntities, final Analysis analysis) {
    final Indicator indicator = dataEntity != null ? dataEntity.getIndicator() : null;
    AnalyzedDataSet analyDataSet = analysis.getResults().getIndicToRowMap().get(indicator);
    boolean hasData = analyDataSet != null && (analyDataSet.getData() != null && analyDataSet.getData().size() > 0 || analyDataSet.getFrequencyData() != null && analyDataSet.getFrequencyData().size() > 0 || analyDataSet.getPatternData() != null && analyDataSet.getPatternData().size() > 0);
    if (hasData) {
        createDrillDownMenu(dataEntity, menu, itemEntities, analysis);
    }
}
Also used : AnalyzedDataSet(org.talend.dataquality.analysis.AnalyzedDataSet) FrequencyIndicator(org.talend.dataquality.indicators.FrequencyIndicator) LengthIndicator(org.talend.dataquality.indicators.LengthIndicator) SimpleStatIndicator(org.talend.dataquality.indicators.columnset.SimpleStatIndicator) Indicator(org.talend.dataquality.indicators.Indicator)

Example 7 with AnalyzedDataSet

use of org.talend.dataquality.analysis.AnalyzedDataSet in project tdq-studio-se by Talend.

the class IndicatorEvaluator method initDataSet.

@SuppressWarnings("unchecked")
protected List<Object[]> initDataSet(Indicator indicator, EMap<Indicator, AnalyzedDataSet> indicToRowMap, Object object) {
    AnalyzedDataSet analyzedDataSet = indicToRowMap.get(indicator);
    List<Object[]> valueObjectList = null;
    if (analyzedDataSet == null) {
        analyzedDataSet = AnalysisFactory.eINSTANCE.createAnalyzedDataSet();
        indicToRowMap.put(indicator, analyzedDataSet);
        analyzedDataSet.setDataCount(analysis.getParameters().getMaxNumberRows());
        analyzedDataSet.setRecordSize(0);
    }
    if (indicator instanceof FrequencyIndicator || indicator instanceof MinLengthIndicator || indicator instanceof MaxLengthIndicator) {
        Map<Object, List<Object[]>> valueObjectListMap = analyzedDataSet.getFrequencyData();
        if (valueObjectListMap == null) {
            valueObjectListMap = new HashMap<Object, List<Object[]>>();
            analyzedDataSet.setFrequencyData(valueObjectListMap);
        }
        String key = null;
        if (object == null) {
            key = SpecialValueDisplay.NULL_FIELD;
        } else if (indicator instanceof MinLengthIndicator || indicator instanceof MaxLengthIndicator) {
            key = String.valueOf(object.toString().length());
        } else if (object.equals(PluginConstant.EMPTY_STRING)) {
            key = SpecialValueDisplay.EMPTY_FIELD;
        } else if (indicator instanceof PatternLowFreqIndicator) {
            key = ((PatternLowFreqIndicator) indicator).convertCharacters(object.toString());
        } else if (indicator instanceof PatternFreqIndicator) {
            key = ((PatternFreqIndicator) indicator).convertCharacters(object.toString());
        } else if (indicator instanceof FormatFreqPieIndicator) {
            // MOD qiongli 2011-8-26,feature TDQ-3253.
            key = ((FormatFreqPieIndicator) indicator).getCurrentKey();
        } else {
            key = object.toString();
        }
        valueObjectList = valueObjectListMap.get(key);
        if (valueObjectList == null) {
            valueObjectList = new ArrayList<Object[]>();
            valueObjectListMap.put(key, valueObjectList);
        }
    } else if (indicator.isInValidRow() || indicator.isValidRow()) {
        List<Object> patternData = analyzedDataSet.getPatternData();
        if (patternData == null) {
            patternData = new ArrayList<Object>();
            // mapping with AnalyzedDataSetImpl.VALID_VALUE
            patternData.add(new ArrayList<Object[]>());
            // mapping with AnalyzedDataSetImpl.INVALID_VALUE
            patternData.add(new ArrayList<Object[]>());
            analyzedDataSet.setPatternData(patternData);
        }
        Object listObject = indicator.isInValidRow() ? patternData.get(AnalyzedDataSetImpl.INVALID_VALUE) : patternData.get(AnalyzedDataSetImpl.VALID_VALUE);
        if (listObject instanceof ArrayList<?>) {
            valueObjectList = (ArrayList<Object[]>) listObject;
        }
    } else {
        valueObjectList = analyzedDataSet.getData();
        if (valueObjectList == null) {
            valueObjectList = new ArrayList<Object[]>();
            analyzedDataSet.setData(valueObjectList);
        }
    }
    return valueObjectList;
}
Also used : MaxLengthIndicator(org.talend.dataquality.indicators.MaxLengthIndicator) AnalyzedDataSet(org.talend.dataquality.analysis.AnalyzedDataSet) FormatFreqPieIndicator(org.talend.dataquality.indicators.FormatFreqPieIndicator) ArrayList(java.util.ArrayList) MinLengthIndicator(org.talend.dataquality.indicators.MinLengthIndicator) PatternLowFreqIndicator(org.talend.dataquality.indicators.PatternLowFreqIndicator) FrequencyIndicator(org.talend.dataquality.indicators.FrequencyIndicator) ArrayList(java.util.ArrayList) List(java.util.List) PatternFreqIndicator(org.talend.dataquality.indicators.PatternFreqIndicator)

Example 8 with AnalyzedDataSet

use of org.talend.dataquality.analysis.AnalyzedDataSet in project tdq-studio-se by Talend.

the class IndicToRowsMapImpl method setTypedValue.

/**
 * <!-- begin-user-doc -->
 * <!-- end-user-doc -->
 * @generated
 */
public void setTypedValue(AnalyzedDataSet newValue) {
    AnalyzedDataSet oldValue = value;
    value = newValue;
    if (eNotificationRequired())
        eNotify(new ENotificationImpl(this, Notification.SET, AnalysisPackage.INDIC_TO_ROWS_MAP__VALUE, oldValue, value));
}
Also used : AnalyzedDataSet(org.talend.dataquality.analysis.AnalyzedDataSet) ENotificationImpl(org.eclipse.emf.ecore.impl.ENotificationImpl)

Example 9 with AnalyzedDataSet

use of org.talend.dataquality.analysis.AnalyzedDataSet in project tdq-studio-se by Talend.

the class DelimitedFileIndicatorEvaluator method executeSqlQuery.

@Override
protected ReturnCode executeSqlQuery(String sqlStatement) {
    ReturnCode returnCode = new ReturnCode(true);
    if (delimitedFileconnection == null) {
        delimitedFileconnection = (DelimitedFileConnection) analysis.getContext().getConnection();
    }
    if (delimitedFileconnection.isContextMode()) {
        IRepositoryContextService service = CoreRuntimePlugin.getInstance().getRepositoryContextService();
        delimitedFileconnection = (DelimitedFileConnection) service.cloneOriginalValueConnection(delimitedFileconnection);
    }
    String path = JavaSqlFactory.getURL(delimitedFileconnection);
    IPath iPath = new Path(path);
    File file = iPath.toFile();
    if (!file.exists()) {
        // $NON-NLS-1$
        returnCode.setReturnCode(Messages.getString("DelimitedFileIndicatorEvaluator.CanNotFindFile"), false);
        return returnCode;
    }
    List<ModelElement> analysisElementList = this.analysis.getContext().getAnalysedElements();
    EMap<Indicator, AnalyzedDataSet> indicToRowMap = analysis.getResults().getIndicToRowMap();
    indicToRowMap.clear();
    List<MetadataColumn> columnElementList = new ArrayList<MetadataColumn>();
    for (int i = 0; i < analysisElementList.size(); i++) {
        MetadataColumn mColumn = (MetadataColumn) analysisElementList.get(i);
        MetadataTable mTable = ColumnHelper.getColumnOwnerAsMetadataTable(mColumn);
        columnElementList = mTable == null ? columnElementList : mTable.getColumns();
        if (!columnElementList.isEmpty()) {
            break;
        }
    }
    ReturnCode readDataReturnCode = new ReturnCode(true);
    // use CsvReader to parse.
    if (Escape.CSV.equals(delimitedFileconnection.getEscapeType())) {
        readDataReturnCode = useCsvReader(file, analysisElementList, columnElementList, indicToRowMap);
    } else {
        readDataReturnCode = useDelimitedReader(analysisElementList, columnElementList, indicToRowMap);
    }
    // handle error message
    if (!readDataReturnCode.isOk()) {
        Display.getDefault().asyncExec(new Runnable() {

            public void run() {
                MessageDialog.openWarning(PlatformUI.getWorkbench().getActiveWorkbenchWindow().getShell(), // $NON-NLS-1$
                Messages.getString("DelimitedFileIndicatorEvaluator.badlyForm.Title"), // $NON-NLS-1$
                Messages.getString("DelimitedFileIndicatorEvaluator.badlyForm.Message"));
            }
        });
    }
    // Added yyin 20120608 TDQ-3589
    for (MetadataColumn col : columnElementList) {
        List<Indicator> indicators = getIndicators(col.getLabel());
        for (Indicator indicator : indicators) {
            if (indicator instanceof DuplicateCountIndicator) {
                AnalyzedDataSet analyzedDataSet = indicToRowMap.get(indicator);
                if (analyzedDataSet == null) {
                    analyzedDataSet = AnalysisFactory.eINSTANCE.createAnalyzedDataSet();
                    indicToRowMap.put(indicator, analyzedDataSet);
                    analyzedDataSet.setDataCount(analysis.getParameters().getMaxNumberRows());
                    analyzedDataSet.setRecordSize(0);
                }
                // indicator.finalizeComputation();
                addResultToIndicatorToRowMap(indicator, indicToRowMap);
            }
        }
    }
    return returnCode;
}
Also used : IPath(org.eclipse.core.runtime.IPath) Path(org.eclipse.core.runtime.Path) DuplicateCountIndicator(org.talend.dataquality.indicators.DuplicateCountIndicator) ReturnCode(org.talend.utils.sugars.ReturnCode) IPath(org.eclipse.core.runtime.IPath) AnalyzedDataSet(org.talend.dataquality.analysis.AnalyzedDataSet) ArrayList(java.util.ArrayList) IRepositoryContextService(org.talend.core.IRepositoryContextService) UniqueCountIndicator(org.talend.dataquality.indicators.UniqueCountIndicator) Indicator(org.talend.dataquality.indicators.Indicator) DuplicateCountIndicator(org.talend.dataquality.indicators.DuplicateCountIndicator) RowCountIndicator(org.talend.dataquality.indicators.RowCountIndicator) ModelElement(orgomg.cwm.objectmodel.core.ModelElement) MetadataColumn(org.talend.core.model.metadata.builder.connection.MetadataColumn) MetadataTable(org.talend.core.model.metadata.builder.connection.MetadataTable) File(java.io.File)

Example 10 with AnalyzedDataSet

use of org.talend.dataquality.analysis.AnalyzedDataSet in project tdq-studio-se by Talend.

the class DelimitedFileIndicatorEvaluator method handleByARow.

private ReturnCode handleByARow(String[] rowValues, long currentRow, List<ModelElement> analysisElementList, List<MetadataColumn> columnElementList, EMap<Indicator, AnalyzedDataSet> indicToRowMap) {
    ReturnCode returnCode = new ReturnCode(true);
    Object object = null;
    int maxNumberRows = analysis.getParameters().getMaxNumberRows();
    int recordIncrement = 0;
    element: for (int i = 0; i < analysisElementList.size(); i++) {
        MetadataColumn mColumn = (MetadataColumn) analysisElementList.get(i);
        Integer position = ColumnHelper.getColumnIndex(mColumn);
        // warning with a file of badly form
        if (position == null || position >= rowValues.length) {
            log.warn(// $NON-NLS-1$
            Messages.getString(// $NON-NLS-1$
            "DelimitedFileIndicatorEvaluator.incorrectData", mColumn.getLabel(), currentRow, delimitedFileconnection.getFilePath()));
            returnCode.setOk(false);
            continue;
        }
        object = TalendTypeConvert.convertToObject(mColumn.getTalendType(), rowValues[position], mColumn.getPattern());
        List<Indicator> indicators = getIndicators(mColumn.getLabel());
        for (Indicator indicator : indicators) {
            if (!continueRun()) {
                break element;
            }
            // bug 19036,to irregularly data,still compute for RowCountIndicator
            if (object == null && !(indicator instanceof RowCountIndicator)) {
                continue element;
            }
            // Added yyin 20120608 TDQ-3589
            if (indicator instanceof DuplicateCountIndicator) {
                ((DuplicateCountIndicator) indicator).handle(object, rowValues);
            } else {
                // ~
                indicator.handle(object);
            }
            AnalyzedDataSet analyzedDataSet = indicToRowMap.get(indicator);
            if (analyzedDataSet == null) {
                analyzedDataSet = AnalysisFactory.eINSTANCE.createAnalyzedDataSet();
                indicToRowMap.put(indicator, analyzedDataSet);
                analyzedDataSet.setDataCount(maxNumberRows);
                analyzedDataSet.setRecordSize(0);
            }
            // see IndicatorEvaluator line 166, the logic is almost the same
            if (analysis.getParameters().isStoreData()) {
                if (indicator.mustStoreRow()) {
                    List<Object[]> valueObjectList = initDataSet(indicator, indicToRowMap, object);
                    recordIncrement = valueObjectList.size();
                    List<Object> inputRowList = new ArrayList<Object>();
                    for (int j = 0; j < rowValues.length; j++) {
                        Object newobject = rowValues[j];
                        if (indicator.isUsedMapDBMode()) {
                            inputRowList.add(newobject == null ? PluginConstant.NULL_STRING : newobject);
                            continue;
                        } else {
                            if (recordIncrement < maxNumberRows) {
                                if (recordIncrement < valueObjectList.size()) {
                                    valueObjectList.get(recordIncrement)[j] = newobject;
                                } else {
                                    Object[] valueObject = new Object[rowValues.length];
                                    valueObject[j] = newobject;
                                    valueObjectList.add(valueObject);
                                }
                            } else {
                                break;
                            }
                        }
                    }
                    if (indicator.isUsedMapDBMode()) {
                        MapDBUtils.handleDrillDownData(object, inputRowList, indicator);
                    }
                } else if (indicator instanceof UniqueCountIndicator && analysis.getResults().getIndicToRowMap().get(indicator).getData() != null) {
                    List<Object[]> removeValueObjectList = analysis.getResults().getIndicToRowMap().get(indicator).getData();
                    if (columnElementList.size() == 0) {
                        continue;
                    }
                    int offsetting = columnElementList.indexOf(indicator.getAnalyzedElement());
                    for (Object[] dataObject : removeValueObjectList) {
                        // Added yyin 20120611 TDQ5279
                        if (object instanceof Integer) {
                            if (object.equals(Integer.parseInt((String) dataObject[offsetting]))) {
                                removeValueObjectList.remove(dataObject);
                                break;
                            }
                        }
                        // ~
                        if (dataObject[offsetting].equals(object)) {
                            removeValueObjectList.remove(dataObject);
                            break;
                        }
                    }
                }
            }
        }
    }
    return returnCode;
}
Also used : DuplicateCountIndicator(org.talend.dataquality.indicators.DuplicateCountIndicator) ReturnCode(org.talend.utils.sugars.ReturnCode) AnalyzedDataSet(org.talend.dataquality.analysis.AnalyzedDataSet) UniqueCountIndicator(org.talend.dataquality.indicators.UniqueCountIndicator) UniqueCountIndicator(org.talend.dataquality.indicators.UniqueCountIndicator) Indicator(org.talend.dataquality.indicators.Indicator) DuplicateCountIndicator(org.talend.dataquality.indicators.DuplicateCountIndicator) RowCountIndicator(org.talend.dataquality.indicators.RowCountIndicator) MetadataColumn(org.talend.core.model.metadata.builder.connection.MetadataColumn) ArrayList(java.util.ArrayList) List(java.util.List) RowCountIndicator(org.talend.dataquality.indicators.RowCountIndicator)

Aggregations

AnalyzedDataSet (org.talend.dataquality.analysis.AnalyzedDataSet)16 Indicator (org.talend.dataquality.indicators.Indicator)11 DuplicateCountIndicator (org.talend.dataquality.indicators.DuplicateCountIndicator)8 UniqueCountIndicator (org.talend.dataquality.indicators.UniqueCountIndicator)8 RowCountIndicator (org.talend.dataquality.indicators.RowCountIndicator)7 ArrayList (java.util.ArrayList)6 DistinctCountIndicator (org.talend.dataquality.indicators.DistinctCountIndicator)5 SimpleStatIndicator (org.talend.dataquality.indicators.columnset.SimpleStatIndicator)5 ReturnCode (org.talend.utils.sugars.ReturnCode)5 File (java.io.File)4 List (java.util.List)4 IPath (org.eclipse.core.runtime.IPath)4 MetadataColumn (org.talend.core.model.metadata.builder.connection.MetadataColumn)4 AnalysisResult (org.talend.dataquality.analysis.AnalysisResult)4 AllMatchIndicator (org.talend.dataquality.indicators.columnset.AllMatchIndicator)4 ModelElement (orgomg.cwm.objectmodel.core.ModelElement)4 BasicEList (org.eclipse.emf.common.util.BasicEList)3 DelimitedFileConnection (org.talend.core.model.metadata.builder.connection.DelimitedFileConnection)3 MetadataTable (org.talend.core.model.metadata.builder.connection.MetadataTable)3 FrequencyIndicator (org.talend.dataquality.indicators.FrequencyIndicator)3