use of org.talend.dataquality.indicators.columnset.SimpleStatIndicator in project tdq-studio-se by Talend.
the class DrillDownEditorInput method filterAdaptColumnHeader.
/**
* DOC zshen Comment method "filterAdaptColumnHeader".
*
* @returnget the name of column which will be displayed on the drill down editor.
*/
public List<String> filterAdaptColumnHeader() {
// get columnHeader
Indicator indicator = this.getCurrIndicator();
ModelElement analysisElement = indicator.getAnalyzedElement();
String menuType = this.getMenuType();
List<String> columnElementList = new ArrayList<String>();
// MOD qiongli 2011-3-3,feature 19192 ,drill down for columnSet with java engine .
if (analysisElement == null && indicator.eContainer() instanceof SimpleStatIndicator) {
columnElementList = columnHeaderForColumnSet((SimpleStatIndicator) indicator.eContainer());
} else {
// MOD qiongli 2011-1-9 feature 16796
if (DrillDownUtils.judgeMenuType(menuType, DrillDownUtils.MENU_VALUE_TYPE)) {
columnElementList.add(ModelElementHelper.getName(indicator.getAnalyzedElement()));
} else if (analysisElement instanceof TdColumn) {
for (TdColumn column : getColumnsByTdColumn((TdColumn) analysisElement)) {
columnElementList.add(column.getName());
}
} else if (analysisElement instanceof MetadataColumn) {
MetadataTable mTable = ColumnHelper.getColumnOwnerAsMetadataTable((MetadataColumn) analysisElement);
for (MetadataColumn mColumn : mTable.getColumns()) {
columnElementList.add(mColumn.getLabel());
}
}
}
return columnElementList;
}
use of org.talend.dataquality.indicators.columnset.SimpleStatIndicator in project tdq-studio-se by Talend.
the class ColumnSetIndicatorEvaluator method storeDataSet.
/**
* store data which from 'simpleIndicator.getListRows()' except RowCountIndicator.
*
* @param indicToRowMap
*/
private void storeDataSet() {
EMap<Indicator, AnalyzedDataSet> indicToRowMap = analysis.getResults().getIndicToRowMap();
for (Indicator indicator : analysis.getResults().getIndicators()) {
if (indicator instanceof SimpleStatIndicator) {
SimpleStatIndicator simpleIndicator = (SimpleStatIndicator) indicator;
if (!analysis.getParameters().isStoreData()) {
break;
}
if (simpleIndicator.isUsedMapDBMode() && AnalysisHelper.isJavaExecutionEngine(analysis)) {
// nothing need to do
} else {
List<Object[]> listRows = simpleIndicator.getListRows();
if (listRows == null || listRows.isEmpty()) {
break;
}
for (Indicator leafIndicator : simpleIndicator.getLeafIndicators()) {
if (leafIndicator instanceof RowCountIndicator) {
continue;
}
List<Object[]> dataList = new ArrayList<Object[]>();
AnalyzedDataSet analyzedDataSet = indicToRowMap.get(leafIndicator);
if (analyzedDataSet == null) {
analyzedDataSet = AnalysisFactory.eINSTANCE.createAnalyzedDataSet();
indicToRowMap.put(leafIndicator, analyzedDataSet);
analyzedDataSet.setDataCount(analysis.getParameters().getMaxNumberRows());
analyzedDataSet.setRecordSize(0);
}
for (int i = 0; i < listRows.size(); i++) {
// if (dataList.size() >= analyzedDataSet.getDataCount()) {
// break;
// }
Object[] object = listRows.get(i);
// the last element store the count value.
Object count = object[object.length > 0 ? object.length - 1 : 0];
if (leafIndicator instanceof DistinctCountIndicator) {
dataList.add(object);
} else if (leafIndicator instanceof UniqueCountIndicator) {
if (count != null && NumberUtils.isNumber(count + PluginConstant.EMPTY_STRING)) {
if (Long.valueOf(count + PluginConstant.EMPTY_STRING).longValue() == 1) {
dataList.add(object);
}
}
} else if (leafIndicator instanceof DuplicateCountIndicator) {
if (count != null && NumberUtils.isNumber(count + PluginConstant.EMPTY_STRING)) {
if (Long.valueOf(count + PluginConstant.EMPTY_STRING).longValue() > 1) {
dataList.add(object);
}
}
}
}
analyzedDataSet.setData(dataList);
}
// MOD sizhaoliu TDQ-7144 clear the listRows after usage for drill down
if (!simpleIndicator.isStoreData()) {
simpleIndicator.setListRows(new ArrayList<Object[]>());
}
}
}
if (indicator instanceof AllMatchIndicator) {
AllMatchIndicator allMatchIndicator = (AllMatchIndicator) indicator;
if (!allMatchIndicator.isStoreData()) {
allMatchIndicator.setListRows(new ArrayList<Object[]>());
}
}
}
}
use of org.talend.dataquality.indicators.columnset.SimpleStatIndicator in project tdq-studio-se by Talend.
the class MultiColumnAnalysisExecutor method instantiateQuery.
/**
* DOC scorreia Comment method "instantiateQuery".
*
* @param indicator
*/
private void instantiateQuery(Indicator indicator) {
if (ColumnsetPackage.eINSTANCE.getColumnSetMultiValueIndicator().isSuperTypeOf(indicator.eClass())) {
ColumnSetMultiValueIndicator colSetMultValIndicator = (ColumnSetMultiValueIndicator) indicator;
final EList<ModelElement> analyzedColumns = colSetMultValIndicator.getAnalyzedColumns();
final EList<String> numericFunctions = initializeNumericFunctions(colSetMultValIndicator);
final EList<String> dateFunctions = initializeDateFunctions(colSetMultValIndicator);
// ADD msjian 2011-5-30 17479: Excel Odbc connection can not run well on the correlation analysis
// note: this feature is not supported now, if support, delete this
// $NON-NLS-1$
final String caseStr = "SUM(CASE WHEN {0} IS NULL THEN 1 ELSE 0 END)";
if (// $NON-NLS-1$
"EXCEL".equals(dbms().getDbmsName()) && (dateFunctions.contains(caseStr) || numericFunctions.contains(caseStr))) {
// $NON-NLS-1$
setError(Messages.getString("MultiColumnAnalysisExecutor.errMessage"));
Display.getDefault().syncExec(new Runnable() {
public void run() {
MessageDialog.openWarning(PlatformUI.getWorkbench().getActiveWorkbenchWindow().getShell(), Messages.getString("MultiColumnAnalysisExecutor.errTitle"), // $NON-NLS-1$
getErrorMessage());
return;
}
});
}
// ~
// get indicator definition
final Expression sqlGenericExpression = dbms().getSqlExpression(indicator.getIndicatorDefinition());
// separate nominal from numeric columns
List<String> nominalColumns = new ArrayList<String>();
for (ModelElement column : colSetMultValIndicator.getNominalColumns()) {
nominalColumns.add(getQuotedColumnName(column));
}
List<String> computedColumns = new ArrayList<String>();
for (ModelElement column : colSetMultValIndicator.getNumericColumns()) {
// call functions for each column
for (String f : numericFunctions) {
computedColumns.add(replaceVariablesLow(f, getQuotedColumnName(column)));
}
}
for (ModelElement column : colSetMultValIndicator.getDateColumns()) {
// call functions for each column
for (String f : dateFunctions) {
computedColumns.add(replaceVariablesLow(f, getQuotedColumnName(column)));
}
}
// add count(*)
computedColumns.add(colSetMultValIndicator.getCountAll());
// MOD msjian TDQ-7254: fix the columnset analysis run get error. the columnset analysis don't need to
// consider the datamining type.
List<String> columns = new ArrayList<String>();
// if the analysis type is columnset, use analyzed columns
if (AnalysisType.COLUMN_SET == cachedAnalysis.getParameters().getAnalysisType()) {
for (ModelElement column : analyzedColumns) {
columns.add(getQuotedColumnName(column));
}
} else {
columns = nominalColumns;
}
// TDQ-7254~
String selectItems = createSelect(columns, computedColumns);
String grpByClause = createGroupBy(columns);
// all columns must belong to the same table
TdColumn firstColumn = SwitchHelpers.COLUMN_SWITCH.doSwitch(analyzedColumns.get(0));
String tableName = dbms().getQueryColumnSetWithPrefix(firstColumn);
this.catalogOrSchema = dbms().getCatalogOrSchemaName(firstColumn);
// definition is SELECT <%=__COLUMN_NAMES__%> FROM <%=__TABLE_NAME__%> GROUP BY
// <%=__GROUP_BY_ALIAS__%>
String sqlExpr = dbms().fillGenericQueryWithColumnTableAndAlias(sqlGenericExpression.getBody(), selectItems, tableName, grpByClause);
// handle data filter
String stringDataFilter = ContextHelper.getDataFilterWithoutContext(cachedAnalysis);
if (stringDataFilter == null) {
stringDataFilter = PluginConstant.EMPTY_STRING;
}
sqlExpr = dbms().addWhereToStatement(sqlExpr, stringDataFilter);
indicator.setInstantiatedExpression(BooleanExpressionHelper.createTdExpression(sqlGenericExpression.getLanguage(), sqlExpr));
// MOD qiongli 2011-3-30 feature 19192.allow drill down for sql engine.
if (ColumnsetPackage.eINSTANCE.getSimpleStatIndicator().isSuperTypeOf(indicator.eClass())) {
SimpleStatIndicator simpleIndicator = (SimpleStatIndicator) indicator;
// MOD TDQ-7287 lost some columns(type!=norminal) when view values in column set ana. yyin 20130514
String columnsName = createSelect(columns, new ArrayList<String>());
for (Indicator leafIndicator : simpleIndicator.getLeafIndicators()) {
final Expression leafSqlGenericExpression = dbms().getSqlExpression(leafIndicator.getIndicatorDefinition());
String leafSqlExpr = dbms().fillGenericQueryWithColumnTableAndAlias(leafSqlGenericExpression.getBody(), columnsName, tableName, grpByClause);
leafSqlExpr = dbms().addWhereToStatement(leafSqlExpr, stringDataFilter);
leafIndicator.setInstantiatedExpression(BooleanExpressionHelper.createTdExpression(leafSqlGenericExpression.getLanguage(), leafSqlExpr));
}
}
}
}
use of org.talend.dataquality.indicators.columnset.SimpleStatIndicator in project tdq-studio-se by Talend.
the class ColumnSetIndicatorEvaluator method handleObjects.
/**
* handle Objects and store data for delimited file .
*
* @param objectLs
* @param rowValues
* @param metadataColumn is one of analysedElements.it is used to get its Table then get the table's columns.
*/
private void handleObjects(EList<Object> objectLs, String[] rowValues, List<MetadataColumn> columnList) {
if (objectLs.size() == 0) {
return;
}
EList<Indicator> indicators = analysis.getResults().getIndicators();
EMap<Indicator, AnalyzedDataSet> indicToRowMap = analysis.getResults().getIndicToRowMap();
int recordIncrement = 0;
if (indicators != null) {
for (Indicator indicator : indicators) {
if (!this.continueRun()) {
break;
}
if (ColumnsetPackage.eINSTANCE.getColumnSetMultiValueIndicator().isSuperTypeOf(indicator.eClass())) {
indicator.handle(objectLs);
// feature 19192,store all rows value for RowCountIndicator
if (indicator instanceof SimpleStatIndicator) {
SimpleStatIndicator simpIndi = (SimpleStatIndicator) indicator;
for (Indicator leafIndicator : simpIndi.getLeafIndicators()) {
if (!this.continueRun()) {
break;
}
// (distinct count, unique count, duplicate count, etc).
if (!analysis.getParameters().isStoreData()) {
// ~
continue;
}
List<Object[]> valueObjectList = initDataSet(leafIndicator, indicToRowMap);
recordIncrement = valueObjectList.size();
Object[] valueObject = new Object[columnList.size()];
if (recordIncrement < analysis.getParameters().getMaxNumberRows()) {
for (int j = 0; j < columnList.size(); j++) {
if (!this.continueRun()) {
break;
}
Object newobject = PluginConstant.EMPTY_STRING;
// if (recordIncrement < analysis.getParameters().getMaxNumberRows()) {
if (j < rowValues.length) {
newobject = rowValues[j];
}
if (recordIncrement < valueObjectList.size()) {
valueObjectList.get(recordIncrement)[j] = newobject;
} else {
valueObject[j] = newobject;
valueObjectList.add(valueObject);
}
// }
}
}
}
}
}
}
}
}
use of org.talend.dataquality.indicators.columnset.SimpleStatIndicator in project tdq-studio-se by Talend.
the class ColumnSetAnalysisDetailsPage method saveAnalysis.
/**
* @param outputFolder
* @throws DataprofilerCoreException
*/
@Override
public void saveAnalysis() throws DataprofilerCoreException {
columnSetAnalysisHandler.changeDefaultRowLoaded(rowLoadedText.getText());
columnSetAnalysisHandler.changeSampleDataShowWay(sampleDataShowWayCombo.getText());
// columnSetAnalysisHandler.setName(columnSetAnalysisHandler.getName().replace(" ", ""));
for (Domain domain : getCurrentModelElement().getParameters().getDataFilter()) {
domain.setName(getCurrentModelElement().getName());
}
// ~
columnSetAnalysisHandler.clearAnalysis();
simpleStatIndicator.getAnalyzedColumns().clear();
allMatchIndicator.getAnalyzedColumns().clear();
// set execute engine
Analysis analysis = columnSetAnalysisHandler.getAnalysis();
analysis.getParameters().setExecutionLanguage(ExecutionLanguage.get(execLang));
// set data filter
columnSetAnalysisHandler.setStringDataFilter(dataFilterComp.getDataFilterString());
// save analysis
List<IRepositoryNode> repositoryNodes = treeViewer.getColumnSetMultiValueList();
Connection tdProvider = null;
IRepositoryViewObject reposObject = null;
if (repositoryNodes != null && repositoryNodes.size() != 0) {
ConnectionItem item = (ConnectionItem) repositoryNodes.get(0).getObject().getProperty().getItem();
tdProvider = item.getConnection();
if (tdProvider.eIsProxy()) {
// Resolve the connection again
tdProvider = (Connection) EObjectHelper.resolveObject(tdProvider);
}
analysis.getContext().setConnection(tdProvider);
List<ModelElement> columnList = new ArrayList<ModelElement>();
for (IRepositoryNode rd : repositoryNodes) {
reposObject = rd.getObject();
columnList.add(((MetadataColumnRepositoryObject) reposObject).getTdColumn());
}
simpleStatIndicator.getAnalyzedColumns().addAll(columnList);
columnSetAnalysisHandler.addIndicator(columnList, simpleStatIndicator);
// ~ MOD mzhao feature 13040. 2010-05-21
allMatchIndicator.getCompositeRegexMatchingIndicators().clear();
ModelElementIndicator[] modelElementIndicator = treeViewer.getModelElementIndicator();
if (modelElementIndicator != null) {
for (ModelElementIndicator modelElementInd : modelElementIndicator) {
Indicator[] inds = modelElementInd.getPatternIndicators();
for (Indicator ind : inds) {
if (ind instanceof RegexpMatchingIndicator) {
// MOD yyi 2011-06-15 22419:column set pattern for MDM
IRepositoryViewObject obj = modelElementInd.getModelElementRepositoryNode().getObject();
ModelElement analyzedElt = ((MetadataColumnRepositoryObject) obj).getTdColumn();
ind.setAnalyzedElement(analyzedElt);
allMatchIndicator.getCompositeRegexMatchingIndicators().add((RegexpMatchingIndicator) ind);
}
}
}
}
if (allMatchIndicator.getCompositeRegexMatchingIndicators().size() > 0) {
allMatchIndicator.getAnalyzedColumns().addAll(columnList);
columnSetAnalysisHandler.addIndicator(columnList, allMatchIndicator);
}
// ~
} else {
analysis.getContext().setConnection(null);
}
TaggedValueHelper.setTaggedValue(getCurrentModelElement(), TaggedValueHelper.IS_USE_SAMPLE_DATA, isRunWithSampleData.toString());
// save the number of connections per analysis
this.saveNumberOfConnectionsPerAnalysis();
// 2011.1.12 MOD by zhsne to unify anlysis and connection id when saving.
this.nameText.setText(columnSetAnalysisHandler.getName());
// TDQ-5581,if has removed emlements(patten),should remove dependency each other before saving.
// MOD yyi 2012-02-08 TDQ-4621:Explicitly set true for updating dependencies.
ReturnCode saved = ElementWriterFactory.getInstance().createAnalysisWrite().save(getCurrentRepNode().getObject().getProperty().getItem(), true);
// MOD yyi 2012-02-03 TDQ-3602:Avoid to rewriting all analyzes after saving, no reason to update all analyzes
// which is depended in the referred connection.
// Extract saving log function.
// @see org.talend.dataprofiler.core.ui.editor.analysis.AbstractAnalysisMetadataPage#logSaved(ReturnCode)
logSaved(saved);
treeViewer.setDirty(false);
dataFilterComp.setDirty(false);
}
Aggregations