Search in sources :

Example 1 with AnalyzedDataSet

use of org.talend.dataquality.analysis.AnalyzedDataSet in project tdq-studio-se by Talend.

the class ColumnSetIndicatorEvaluator method evaluateBySql.

/**
 * orgnize EList 'objectLs' by SQL.
 *
 * @param sqlStatement
 * @param ok
 * @return
 * @throws SQLException
 */
private ReturnCode evaluateBySql(String sqlStatement, ReturnCode ok) throws SQLException {
    Statement statement = null;
    ResultSet resultSet = null;
    try {
        statement = createStatement();
        if (continueRun()) {
            if (log.isInfoEnabled()) {
                // $NON-NLS-1$
                log.info("Executing query: " + sqlStatement);
            }
            statement.execute(sqlStatement);
        }
        // get the results
        resultSet = statement.getResultSet();
        List<String> columnNames = getAnalyzedElementsName();
        if (resultSet == null) {
            // $NON-NLS-1$
            String mess = Messages.getString("Evaluator.NoResultSet", sqlStatement);
            log.warn(mess);
            ok.setReturnCode(mess, false);
            return ok;
        }
        EMap<Indicator, AnalyzedDataSet> indicToRowMap = analysis.getResults().getIndicToRowMap();
        indicToRowMap.clear();
        while (resultSet.next()) {
            // MOD yyi 2012-04-11 TDQ-4916:Add memory control for java analysis.
            if (!continueRun()) {
                break;
            }
            EList<Object> objectLs = new BasicEList<Object>();
            Iterator<String> it = columnNames.iterator();
            while (it.hasNext()) {
                Object obj = ResultSetUtils.getBigObject(resultSet, it.next());
                objectLs.add(obj);
            }
            if (objectLs.size() == 0) {
                continue;
            }
            handleObjects(objectLs, resultSet);
        }
    } finally {
        if (resultSet != null) {
            resultSet.close();
        }
        if (statement != null) {
            statement.close();
        }
        closeConnection();
    }
    return ok;
}
Also used : AnalyzedDataSet(org.talend.dataquality.analysis.AnalyzedDataSet) Statement(java.sql.Statement) BasicEList(org.eclipse.emf.common.util.BasicEList) ResultSet(java.sql.ResultSet) UniqueCountIndicator(org.talend.dataquality.indicators.UniqueCountIndicator) Indicator(org.talend.dataquality.indicators.Indicator) DistinctCountIndicator(org.talend.dataquality.indicators.DistinctCountIndicator) AllMatchIndicator(org.talend.dataquality.indicators.columnset.AllMatchIndicator) SimpleStatIndicator(org.talend.dataquality.indicators.columnset.SimpleStatIndicator) DuplicateCountIndicator(org.talend.dataquality.indicators.DuplicateCountIndicator) RowCountIndicator(org.talend.dataquality.indicators.RowCountIndicator)

Example 2 with AnalyzedDataSet

use of org.talend.dataquality.analysis.AnalyzedDataSet in project tdq-studio-se by Talend.

the class ColumnSetIndicatorEvaluator method executeSqlQuery.

@Override
protected ReturnCode executeSqlQuery(String sqlStatement) throws SQLException {
    ReturnCode ok = new ReturnCode(true);
    AnalysisResult anaResult = analysis.getResults();
    EMap<Indicator, AnalyzedDataSet> indicToRowMap = anaResult.getIndicToRowMap();
    indicToRowMap.clear();
    if (isDelimitedFile) {
        ok = evaluateByDelimitedFile(sqlStatement, ok);
    } else {
        ok = evaluateBySql(sqlStatement, ok);
    }
    return ok;
}
Also used : ReturnCode(org.talend.utils.sugars.ReturnCode) AnalyzedDataSet(org.talend.dataquality.analysis.AnalyzedDataSet) AnalysisResult(org.talend.dataquality.analysis.AnalysisResult) UniqueCountIndicator(org.talend.dataquality.indicators.UniqueCountIndicator) Indicator(org.talend.dataquality.indicators.Indicator) DistinctCountIndicator(org.talend.dataquality.indicators.DistinctCountIndicator) AllMatchIndicator(org.talend.dataquality.indicators.columnset.AllMatchIndicator) SimpleStatIndicator(org.talend.dataquality.indicators.columnset.SimpleStatIndicator) DuplicateCountIndicator(org.talend.dataquality.indicators.DuplicateCountIndicator) RowCountIndicator(org.talend.dataquality.indicators.RowCountIndicator)

Example 3 with AnalyzedDataSet

use of org.talend.dataquality.analysis.AnalyzedDataSet in project tdq-studio-se by Talend.

the class ColumnSetIndicatorEvaluator method evaluateByDelimitedFile.

/**
 * orgnize EList 'objectLs' for DelimitedFile connection.
 *
 * @param sqlStatement
 * @param returnCode
 * @return
 */
private ReturnCode evaluateByDelimitedFile(String sqlStatement, ReturnCode returnCode) {
    DelimitedFileConnection fileConnection = (DelimitedFileConnection) analysis.getContext().getConnection();
    String path = JavaSqlFactory.getURL(fileConnection);
    String rowSeparator = JavaSqlFactory.getRowSeparatorValue(fileConnection);
    IPath iPath = new Path(path);
    File file = iPath.toFile();
    if (!file.exists()) {
        // $NON-NLS-1$
        returnCode.setReturnCode(Messages.getString("ColumnSetIndicatorEvaluator.FileNotFound", file.getName()), false);
        return returnCode;
    }
    CSVReader csvReader = null;
    try {
        List<ModelElement> analysisElementList = this.analysis.getContext().getAnalysedElements();
        EMap<Indicator, AnalyzedDataSet> indicToRowMap = analysis.getResults().getIndicToRowMap();
        indicToRowMap.clear();
        if (Escape.CSV.equals(fileConnection.getEscapeType())) {
            // use CsvReader to parse.
            csvReader = FileUtils.createCsvReader(file, fileConnection);
            this.useCsvReader(csvReader, file, fileConnection, analysisElementList);
        } else {
            // use TOSDelimitedReader in FileInputDelimited to parse.
            FileInputDelimited fileInputDelimited = AnalysisExecutorHelper.createFileInputDelimited(fileConnection);
            long currentRow = JavaSqlFactory.getHeadValue(fileConnection);
            int columsCount = 0;
            while (fileInputDelimited.nextRecord()) {
                if (!continueRun()) {
                    break;
                }
                currentRow++;
                if (columsCount == 0) {
                    columsCount = fileInputDelimited.getColumnsCountOfCurrentRow();
                }
                String[] rowValues = new String[columsCount];
                for (int i = 0; i < columsCount; i++) {
                    rowValues[i] = fileInputDelimited.get(i);
                }
                orgnizeObjectsToHandel(path, rowValues, currentRow, analysisElementList, rowSeparator);
            }
            // TDQ-5851~
            fileInputDelimited.close();
        }
    } catch (Exception e) {
        log.error(e, e);
        returnCode.setReturnCode(e.getMessage(), false);
    } finally {
        if (csvReader != null) {
            try {
                csvReader.close();
            } catch (IOException e) {
                log.error(e, e);
            }
        }
    }
    return returnCode;
}
Also used : IPath(org.eclipse.core.runtime.IPath) Path(org.eclipse.core.runtime.Path) IPath(org.eclipse.core.runtime.IPath) CSVReader(com.talend.csv.CSVReader) AnalyzedDataSet(org.talend.dataquality.analysis.AnalyzedDataSet) DelimitedFileConnection(org.talend.core.model.metadata.builder.connection.DelimitedFileConnection) IOException(java.io.IOException) UniqueCountIndicator(org.talend.dataquality.indicators.UniqueCountIndicator) Indicator(org.talend.dataquality.indicators.Indicator) DistinctCountIndicator(org.talend.dataquality.indicators.DistinctCountIndicator) AllMatchIndicator(org.talend.dataquality.indicators.columnset.AllMatchIndicator) SimpleStatIndicator(org.talend.dataquality.indicators.columnset.SimpleStatIndicator) DuplicateCountIndicator(org.talend.dataquality.indicators.DuplicateCountIndicator) RowCountIndicator(org.talend.dataquality.indicators.RowCountIndicator) SQLException(java.sql.SQLException) IOException(java.io.IOException) ModelElement(orgomg.cwm.objectmodel.core.ModelElement) FileInputDelimited(org.talend.fileprocess.FileInputDelimited) File(java.io.File)

Example 4 with AnalyzedDataSet

use of org.talend.dataquality.analysis.AnalyzedDataSet in project tdq-studio-se by Talend.

the class ColumnSetIndicatorEvaluator method storeDataSet.

/**
 * store data which from 'simpleIndicator.getListRows()' except RowCountIndicator.
 *
 * @param indicToRowMap
 */
private void storeDataSet() {
    EMap<Indicator, AnalyzedDataSet> indicToRowMap = analysis.getResults().getIndicToRowMap();
    for (Indicator indicator : analysis.getResults().getIndicators()) {
        if (indicator instanceof SimpleStatIndicator) {
            SimpleStatIndicator simpleIndicator = (SimpleStatIndicator) indicator;
            if (!analysis.getParameters().isStoreData()) {
                break;
            }
            if (simpleIndicator.isUsedMapDBMode() && AnalysisHelper.isJavaExecutionEngine(analysis)) {
            // nothing need to do
            } else {
                List<Object[]> listRows = simpleIndicator.getListRows();
                if (listRows == null || listRows.isEmpty()) {
                    break;
                }
                for (Indicator leafIndicator : simpleIndicator.getLeafIndicators()) {
                    if (leafIndicator instanceof RowCountIndicator) {
                        continue;
                    }
                    List<Object[]> dataList = new ArrayList<Object[]>();
                    AnalyzedDataSet analyzedDataSet = indicToRowMap.get(leafIndicator);
                    if (analyzedDataSet == null) {
                        analyzedDataSet = AnalysisFactory.eINSTANCE.createAnalyzedDataSet();
                        indicToRowMap.put(leafIndicator, analyzedDataSet);
                        analyzedDataSet.setDataCount(analysis.getParameters().getMaxNumberRows());
                        analyzedDataSet.setRecordSize(0);
                    }
                    for (int i = 0; i < listRows.size(); i++) {
                        // if (dataList.size() >= analyzedDataSet.getDataCount()) {
                        // break;
                        // }
                        Object[] object = listRows.get(i);
                        // the last element store the count value.
                        Object count = object[object.length > 0 ? object.length - 1 : 0];
                        if (leafIndicator instanceof DistinctCountIndicator) {
                            dataList.add(object);
                        } else if (leafIndicator instanceof UniqueCountIndicator) {
                            if (count != null && NumberUtils.isNumber(count + PluginConstant.EMPTY_STRING)) {
                                if (Long.valueOf(count + PluginConstant.EMPTY_STRING).longValue() == 1) {
                                    dataList.add(object);
                                }
                            }
                        } else if (leafIndicator instanceof DuplicateCountIndicator) {
                            if (count != null && NumberUtils.isNumber(count + PluginConstant.EMPTY_STRING)) {
                                if (Long.valueOf(count + PluginConstant.EMPTY_STRING).longValue() > 1) {
                                    dataList.add(object);
                                }
                            }
                        }
                    }
                    analyzedDataSet.setData(dataList);
                }
                // MOD sizhaoliu TDQ-7144 clear the listRows after usage for drill down
                if (!simpleIndicator.isStoreData()) {
                    simpleIndicator.setListRows(new ArrayList<Object[]>());
                }
            }
        }
        if (indicator instanceof AllMatchIndicator) {
            AllMatchIndicator allMatchIndicator = (AllMatchIndicator) indicator;
            if (!allMatchIndicator.isStoreData()) {
                allMatchIndicator.setListRows(new ArrayList<Object[]>());
            }
        }
    }
}
Also used : DuplicateCountIndicator(org.talend.dataquality.indicators.DuplicateCountIndicator) AnalyzedDataSet(org.talend.dataquality.analysis.AnalyzedDataSet) ArrayList(java.util.ArrayList) UniqueCountIndicator(org.talend.dataquality.indicators.UniqueCountIndicator) UniqueCountIndicator(org.talend.dataquality.indicators.UniqueCountIndicator) Indicator(org.talend.dataquality.indicators.Indicator) DistinctCountIndicator(org.talend.dataquality.indicators.DistinctCountIndicator) AllMatchIndicator(org.talend.dataquality.indicators.columnset.AllMatchIndicator) SimpleStatIndicator(org.talend.dataquality.indicators.columnset.SimpleStatIndicator) DuplicateCountIndicator(org.talend.dataquality.indicators.DuplicateCountIndicator) RowCountIndicator(org.talend.dataquality.indicators.RowCountIndicator) DistinctCountIndicator(org.talend.dataquality.indicators.DistinctCountIndicator) AllMatchIndicator(org.talend.dataquality.indicators.columnset.AllMatchIndicator) RowCountIndicator(org.talend.dataquality.indicators.RowCountIndicator) SimpleStatIndicator(org.talend.dataquality.indicators.columnset.SimpleStatIndicator)

Example 5 with AnalyzedDataSet

use of org.talend.dataquality.analysis.AnalyzedDataSet in project tdq-studio-se by Talend.

the class ColumnSetIndicatorEvaluator method initDataSet.

protected List<Object[]> initDataSet(Indicator indicator, EMap<Indicator, AnalyzedDataSet> indicToRowMap) {
    AnalyzedDataSet analyzedDataSet = indicToRowMap.get(indicator);
    List<Object[]> valueObjectList = null;
    if (analyzedDataSet == null) {
        analyzedDataSet = AnalysisFactory.eINSTANCE.createAnalyzedDataSet();
        indicToRowMap.put(indicator, analyzedDataSet);
        analyzedDataSet.setDataCount(analysis.getParameters().getMaxNumberRows());
        analyzedDataSet.setRecordSize(0);
    }
    valueObjectList = analyzedDataSet.getData();
    if (valueObjectList == null) {
        valueObjectList = new ArrayList<Object[]>();
        analyzedDataSet.setData(valueObjectList);
    }
    return valueObjectList;
}
Also used : AnalyzedDataSet(org.talend.dataquality.analysis.AnalyzedDataSet)

Aggregations

AnalyzedDataSet (org.talend.dataquality.analysis.AnalyzedDataSet)16 Indicator (org.talend.dataquality.indicators.Indicator)11 DuplicateCountIndicator (org.talend.dataquality.indicators.DuplicateCountIndicator)8 UniqueCountIndicator (org.talend.dataquality.indicators.UniqueCountIndicator)8 RowCountIndicator (org.talend.dataquality.indicators.RowCountIndicator)7 ArrayList (java.util.ArrayList)6 DistinctCountIndicator (org.talend.dataquality.indicators.DistinctCountIndicator)5 SimpleStatIndicator (org.talend.dataquality.indicators.columnset.SimpleStatIndicator)5 ReturnCode (org.talend.utils.sugars.ReturnCode)5 File (java.io.File)4 List (java.util.List)4 IPath (org.eclipse.core.runtime.IPath)4 MetadataColumn (org.talend.core.model.metadata.builder.connection.MetadataColumn)4 AnalysisResult (org.talend.dataquality.analysis.AnalysisResult)4 AllMatchIndicator (org.talend.dataquality.indicators.columnset.AllMatchIndicator)4 ModelElement (orgomg.cwm.objectmodel.core.ModelElement)4 BasicEList (org.eclipse.emf.common.util.BasicEList)3 DelimitedFileConnection (org.talend.core.model.metadata.builder.connection.DelimitedFileConnection)3 MetadataTable (org.talend.core.model.metadata.builder.connection.MetadataTable)3 FrequencyIndicator (org.talend.dataquality.indicators.FrequencyIndicator)3