use of org.talend.dataquality.analysis.AnalyzedDataSet in project tdq-studio-se by Talend.
the class ColumnSetIndicatorEvaluator method evaluateBySql.
/**
* orgnize EList 'objectLs' by SQL.
*
* @param sqlStatement
* @param ok
* @return
* @throws SQLException
*/
private ReturnCode evaluateBySql(String sqlStatement, ReturnCode ok) throws SQLException {
Statement statement = null;
ResultSet resultSet = null;
try {
statement = createStatement();
if (continueRun()) {
if (log.isInfoEnabled()) {
// $NON-NLS-1$
log.info("Executing query: " + sqlStatement);
}
statement.execute(sqlStatement);
}
// get the results
resultSet = statement.getResultSet();
List<String> columnNames = getAnalyzedElementsName();
if (resultSet == null) {
// $NON-NLS-1$
String mess = Messages.getString("Evaluator.NoResultSet", sqlStatement);
log.warn(mess);
ok.setReturnCode(mess, false);
return ok;
}
EMap<Indicator, AnalyzedDataSet> indicToRowMap = analysis.getResults().getIndicToRowMap();
indicToRowMap.clear();
while (resultSet.next()) {
// MOD yyi 2012-04-11 TDQ-4916:Add memory control for java analysis.
if (!continueRun()) {
break;
}
EList<Object> objectLs = new BasicEList<Object>();
Iterator<String> it = columnNames.iterator();
while (it.hasNext()) {
Object obj = ResultSetUtils.getBigObject(resultSet, it.next());
objectLs.add(obj);
}
if (objectLs.size() == 0) {
continue;
}
handleObjects(objectLs, resultSet);
}
} finally {
if (resultSet != null) {
resultSet.close();
}
if (statement != null) {
statement.close();
}
closeConnection();
}
return ok;
}
use of org.talend.dataquality.analysis.AnalyzedDataSet in project tdq-studio-se by Talend.
the class ColumnSetIndicatorEvaluator method executeSqlQuery.
@Override
protected ReturnCode executeSqlQuery(String sqlStatement) throws SQLException {
ReturnCode ok = new ReturnCode(true);
AnalysisResult anaResult = analysis.getResults();
EMap<Indicator, AnalyzedDataSet> indicToRowMap = anaResult.getIndicToRowMap();
indicToRowMap.clear();
if (isDelimitedFile) {
ok = evaluateByDelimitedFile(sqlStatement, ok);
} else {
ok = evaluateBySql(sqlStatement, ok);
}
return ok;
}
use of org.talend.dataquality.analysis.AnalyzedDataSet in project tdq-studio-se by Talend.
the class ColumnSetIndicatorEvaluator method evaluateByDelimitedFile.
/**
* orgnize EList 'objectLs' for DelimitedFile connection.
*
* @param sqlStatement
* @param returnCode
* @return
*/
private ReturnCode evaluateByDelimitedFile(String sqlStatement, ReturnCode returnCode) {
DelimitedFileConnection fileConnection = (DelimitedFileConnection) analysis.getContext().getConnection();
String path = JavaSqlFactory.getURL(fileConnection);
String rowSeparator = JavaSqlFactory.getRowSeparatorValue(fileConnection);
IPath iPath = new Path(path);
File file = iPath.toFile();
if (!file.exists()) {
// $NON-NLS-1$
returnCode.setReturnCode(Messages.getString("ColumnSetIndicatorEvaluator.FileNotFound", file.getName()), false);
return returnCode;
}
CSVReader csvReader = null;
try {
List<ModelElement> analysisElementList = this.analysis.getContext().getAnalysedElements();
EMap<Indicator, AnalyzedDataSet> indicToRowMap = analysis.getResults().getIndicToRowMap();
indicToRowMap.clear();
if (Escape.CSV.equals(fileConnection.getEscapeType())) {
// use CsvReader to parse.
csvReader = FileUtils.createCsvReader(file, fileConnection);
this.useCsvReader(csvReader, file, fileConnection, analysisElementList);
} else {
// use TOSDelimitedReader in FileInputDelimited to parse.
FileInputDelimited fileInputDelimited = AnalysisExecutorHelper.createFileInputDelimited(fileConnection);
long currentRow = JavaSqlFactory.getHeadValue(fileConnection);
int columsCount = 0;
while (fileInputDelimited.nextRecord()) {
if (!continueRun()) {
break;
}
currentRow++;
if (columsCount == 0) {
columsCount = fileInputDelimited.getColumnsCountOfCurrentRow();
}
String[] rowValues = new String[columsCount];
for (int i = 0; i < columsCount; i++) {
rowValues[i] = fileInputDelimited.get(i);
}
orgnizeObjectsToHandel(path, rowValues, currentRow, analysisElementList, rowSeparator);
}
// TDQ-5851~
fileInputDelimited.close();
}
} catch (Exception e) {
log.error(e, e);
returnCode.setReturnCode(e.getMessage(), false);
} finally {
if (csvReader != null) {
try {
csvReader.close();
} catch (IOException e) {
log.error(e, e);
}
}
}
return returnCode;
}
use of org.talend.dataquality.analysis.AnalyzedDataSet in project tdq-studio-se by Talend.
the class ColumnSetIndicatorEvaluator method storeDataSet.
/**
* store data which from 'simpleIndicator.getListRows()' except RowCountIndicator.
*
* @param indicToRowMap
*/
private void storeDataSet() {
EMap<Indicator, AnalyzedDataSet> indicToRowMap = analysis.getResults().getIndicToRowMap();
for (Indicator indicator : analysis.getResults().getIndicators()) {
if (indicator instanceof SimpleStatIndicator) {
SimpleStatIndicator simpleIndicator = (SimpleStatIndicator) indicator;
if (!analysis.getParameters().isStoreData()) {
break;
}
if (simpleIndicator.isUsedMapDBMode() && AnalysisHelper.isJavaExecutionEngine(analysis)) {
// nothing need to do
} else {
List<Object[]> listRows = simpleIndicator.getListRows();
if (listRows == null || listRows.isEmpty()) {
break;
}
for (Indicator leafIndicator : simpleIndicator.getLeafIndicators()) {
if (leafIndicator instanceof RowCountIndicator) {
continue;
}
List<Object[]> dataList = new ArrayList<Object[]>();
AnalyzedDataSet analyzedDataSet = indicToRowMap.get(leafIndicator);
if (analyzedDataSet == null) {
analyzedDataSet = AnalysisFactory.eINSTANCE.createAnalyzedDataSet();
indicToRowMap.put(leafIndicator, analyzedDataSet);
analyzedDataSet.setDataCount(analysis.getParameters().getMaxNumberRows());
analyzedDataSet.setRecordSize(0);
}
for (int i = 0; i < listRows.size(); i++) {
// if (dataList.size() >= analyzedDataSet.getDataCount()) {
// break;
// }
Object[] object = listRows.get(i);
// the last element store the count value.
Object count = object[object.length > 0 ? object.length - 1 : 0];
if (leafIndicator instanceof DistinctCountIndicator) {
dataList.add(object);
} else if (leafIndicator instanceof UniqueCountIndicator) {
if (count != null && NumberUtils.isNumber(count + PluginConstant.EMPTY_STRING)) {
if (Long.valueOf(count + PluginConstant.EMPTY_STRING).longValue() == 1) {
dataList.add(object);
}
}
} else if (leafIndicator instanceof DuplicateCountIndicator) {
if (count != null && NumberUtils.isNumber(count + PluginConstant.EMPTY_STRING)) {
if (Long.valueOf(count + PluginConstant.EMPTY_STRING).longValue() > 1) {
dataList.add(object);
}
}
}
}
analyzedDataSet.setData(dataList);
}
// MOD sizhaoliu TDQ-7144 clear the listRows after usage for drill down
if (!simpleIndicator.isStoreData()) {
simpleIndicator.setListRows(new ArrayList<Object[]>());
}
}
}
if (indicator instanceof AllMatchIndicator) {
AllMatchIndicator allMatchIndicator = (AllMatchIndicator) indicator;
if (!allMatchIndicator.isStoreData()) {
allMatchIndicator.setListRows(new ArrayList<Object[]>());
}
}
}
}
use of org.talend.dataquality.analysis.AnalyzedDataSet in project tdq-studio-se by Talend.
the class ColumnSetIndicatorEvaluator method initDataSet.
protected List<Object[]> initDataSet(Indicator indicator, EMap<Indicator, AnalyzedDataSet> indicToRowMap) {
AnalyzedDataSet analyzedDataSet = indicToRowMap.get(indicator);
List<Object[]> valueObjectList = null;
if (analyzedDataSet == null) {
analyzedDataSet = AnalysisFactory.eINSTANCE.createAnalyzedDataSet();
indicToRowMap.put(indicator, analyzedDataSet);
analyzedDataSet.setDataCount(analysis.getParameters().getMaxNumberRows());
analyzedDataSet.setRecordSize(0);
}
valueObjectList = analyzedDataSet.getData();
if (valueObjectList == null) {
valueObjectList = new ArrayList<Object[]>();
analyzedDataSet.setData(valueObjectList);
}
return valueObjectList;
}
Aggregations