use of org.talend.dataquality.indicators.Indicator in project tdq-studio-se by Talend.
the class ColumnSetIndicatorEvaluator method evaluateByDelimitedFile.
/**
* orgnize EList 'objectLs' for DelimitedFile connection.
*
* @param sqlStatement
* @param returnCode
* @return
*/
private ReturnCode evaluateByDelimitedFile(String sqlStatement, ReturnCode returnCode) {
DelimitedFileConnection fileConnection = (DelimitedFileConnection) analysis.getContext().getConnection();
String path = JavaSqlFactory.getURL(fileConnection);
String rowSeparator = JavaSqlFactory.getRowSeparatorValue(fileConnection);
IPath iPath = new Path(path);
File file = iPath.toFile();
if (!file.exists()) {
// $NON-NLS-1$
returnCode.setReturnCode(Messages.getString("ColumnSetIndicatorEvaluator.FileNotFound", file.getName()), false);
return returnCode;
}
CSVReader csvReader = null;
try {
List<ModelElement> analysisElementList = this.analysis.getContext().getAnalysedElements();
EMap<Indicator, AnalyzedDataSet> indicToRowMap = analysis.getResults().getIndicToRowMap();
indicToRowMap.clear();
if (Escape.CSV.equals(fileConnection.getEscapeType())) {
// use CsvReader to parse.
csvReader = FileUtils.createCsvReader(file, fileConnection);
this.useCsvReader(csvReader, file, fileConnection, analysisElementList);
} else {
// use TOSDelimitedReader in FileInputDelimited to parse.
FileInputDelimited fileInputDelimited = AnalysisExecutorHelper.createFileInputDelimited(fileConnection);
long currentRow = JavaSqlFactory.getHeadValue(fileConnection);
int columsCount = 0;
while (fileInputDelimited.nextRecord()) {
if (!continueRun()) {
break;
}
currentRow++;
if (columsCount == 0) {
columsCount = fileInputDelimited.getColumnsCountOfCurrentRow();
}
String[] rowValues = new String[columsCount];
for (int i = 0; i < columsCount; i++) {
rowValues[i] = fileInputDelimited.get(i);
}
orgnizeObjectsToHandel(path, rowValues, currentRow, analysisElementList, rowSeparator);
}
// TDQ-5851~
fileInputDelimited.close();
}
} catch (Exception e) {
log.error(e, e);
returnCode.setReturnCode(e.getMessage(), false);
} finally {
if (csvReader != null) {
try {
csvReader.close();
} catch (IOException e) {
log.error(e, e);
}
}
}
return returnCode;
}
use of org.talend.dataquality.indicators.Indicator in project tdq-studio-se by Talend.
the class ColumnSetIndicatorEvaluator method storeDataSet.
/**
* store data which from 'simpleIndicator.getListRows()' except RowCountIndicator.
*
* @param indicToRowMap
*/
private void storeDataSet() {
EMap<Indicator, AnalyzedDataSet> indicToRowMap = analysis.getResults().getIndicToRowMap();
for (Indicator indicator : analysis.getResults().getIndicators()) {
if (indicator instanceof SimpleStatIndicator) {
SimpleStatIndicator simpleIndicator = (SimpleStatIndicator) indicator;
if (!analysis.getParameters().isStoreData()) {
break;
}
if (simpleIndicator.isUsedMapDBMode() && AnalysisHelper.isJavaExecutionEngine(analysis)) {
// nothing need to do
} else {
List<Object[]> listRows = simpleIndicator.getListRows();
if (listRows == null || listRows.isEmpty()) {
break;
}
for (Indicator leafIndicator : simpleIndicator.getLeafIndicators()) {
if (leafIndicator instanceof RowCountIndicator) {
continue;
}
List<Object[]> dataList = new ArrayList<Object[]>();
AnalyzedDataSet analyzedDataSet = indicToRowMap.get(leafIndicator);
if (analyzedDataSet == null) {
analyzedDataSet = AnalysisFactory.eINSTANCE.createAnalyzedDataSet();
indicToRowMap.put(leafIndicator, analyzedDataSet);
analyzedDataSet.setDataCount(analysis.getParameters().getMaxNumberRows());
analyzedDataSet.setRecordSize(0);
}
for (int i = 0; i < listRows.size(); i++) {
// if (dataList.size() >= analyzedDataSet.getDataCount()) {
// break;
// }
Object[] object = listRows.get(i);
// the last element store the count value.
Object count = object[object.length > 0 ? object.length - 1 : 0];
if (leafIndicator instanceof DistinctCountIndicator) {
dataList.add(object);
} else if (leafIndicator instanceof UniqueCountIndicator) {
if (count != null && NumberUtils.isNumber(count + PluginConstant.EMPTY_STRING)) {
if (Long.valueOf(count + PluginConstant.EMPTY_STRING).longValue() == 1) {
dataList.add(object);
}
}
} else if (leafIndicator instanceof DuplicateCountIndicator) {
if (count != null && NumberUtils.isNumber(count + PluginConstant.EMPTY_STRING)) {
if (Long.valueOf(count + PluginConstant.EMPTY_STRING).longValue() > 1) {
dataList.add(object);
}
}
}
}
analyzedDataSet.setData(dataList);
}
// MOD sizhaoliu TDQ-7144 clear the listRows after usage for drill down
if (!simpleIndicator.isStoreData()) {
simpleIndicator.setListRows(new ArrayList<Object[]>());
}
}
}
if (indicator instanceof AllMatchIndicator) {
AllMatchIndicator allMatchIndicator = (AllMatchIndicator) indicator;
if (!allMatchIndicator.isStoreData()) {
allMatchIndicator.setListRows(new ArrayList<Object[]>());
}
}
}
}
use of org.talend.dataquality.indicators.Indicator in project tdq-studio-se by Talend.
the class RowMatchExplorer method getdataFilterIndex.
/**
* DOC zshen 2010-01-15 Comment method "getdataFilterIndex".
*
* @param tableOrViewName the name of table or view.if null get index of current indicator in analysis
* @return the index for datafilter. return -1 when can't find
*/
private int getdataFilterIndex(Object nameOrIndicator) {
if (nameOrIndicator == null) {
nameOrIndicator = this.indicator;
}
Iterator<Indicator> iter = this.analysis.getResults().getIndicators().iterator();
int result = 0;
Object currentObj = null;
while (iter.hasNext()) {
Indicator indicator = iter.next();
if (nameOrIndicator instanceof String) {
currentObj = indicator.getAnalyzedElement().getName();
} else {
currentObj = indicator;
}
if (currentObj.equals(nameOrIndicator)) {
return result;
} else {
result++;
}
}
return -1;
}
use of org.talend.dataquality.indicators.Indicator in project tdq-studio-se by Talend.
the class ModelElementIndicatorRule method patternRule.
public static boolean patternRule(IndicatorEnum indicatorType, ModelElement me, ExecutionLanguage language, IIndicatorNode node) {
int javaType = 0;
boolean isDeliFileColumn = !(me instanceof TdColumn) && me instanceof MetadataColumn;
int isTeradataInterval = -1;
if (me instanceof TdColumn) {
javaType = ((TdColumn) me).getSqlDataType().getJavaDataType();
// Added yyin 20121211 TDQ-6099:
isTeradataInterval = Java2SqlType.isTeradataIntervalType(((TdColumn) me).getSqlDataType().getName());
// ~
} else if (isDeliFileColumn) {
javaType = TalendTypeConvert.convertToJDBCType(((MetadataColumn) me).getTalendType());
}
DataminingType dataminingType = MetadataHelper.getDataminingType(me);
if (dataminingType == null || isDeliFileColumn) {
dataminingType = MetadataHelper.getDefaultDataminingType(javaType);
}
// MOD qiongli 2012-4-25 TDQ-2699
Connection connection = null;
if (me instanceof TdColumn) {
connection = ConnectionHelper.getTdDataProvider((TdColumn) me);
} else if (me instanceof MetadataColumn) {
connection = ConnectionHelper.getTdDataProvider((MetadataColumn) me);
}
Indicator indicator = null;
if (node != null) {
indicator = node.getIndicatorInstance();
}
boolean isSQLEngine = ExecutionLanguage.SQL.equals(language);
boolean isJavaEngine = ExecutionLanguage.JAVA.equals(language);
DbmsLanguage dbmsLanguage = DbmsLanguageFactory.createDbmsLanguage(connection, language);
if (javaType == Types.LONGVARCHAR && isSQLEngine) {
if (connection != null && ConnectionHelper.isDb2(connection)) {
return enableLongVarchar(indicatorType, dataminingType, me);
}
}
// MOD qiongli 2012-8-10 TDQ-5907 need to disabled indicators for hive with sql engine.
boolean isHiveSQL = connection == null ? false : ConnectionHelper.isHive(connection) && isSQLEngine;
// MOD msjian 2013-5-15 TDQ-7275 need to disabled indicators for teradata with sql engine.
boolean isTeradataSQL = connection == null ? false : ConnectionHelper.isTeradata(connection) && isSQLEngine;
// MOD msjian 2016-8-25 TDQ-12349 need to disabled indicators for ingres with sql engine.
boolean isIngres = connection == null ? false : ConnectionHelper.isIngress(connection) && isSQLEngine;
// MOD msjian 2016-8-25 TDQ-12464 need to disabled indicators for ingres with sql engine.
boolean isSybase = connection == null ? false : ConnectionHelper.isSybase(connection) && isSQLEngine;
// MOD qiongli 2013-8-27 TDQ-2104 disabled soundex indicators for hive with sql engine.
boolean isVerticaSQL = connection == null ? false : ConnectionHelper.isVertica(connection) && isSQLEngine;
switch(indicatorType) {
case CountsIndicatorEnum:
case RowCountIndicatorEnum:
case NullCountIndicatorEnum:
case DistinctCountIndicatorEnum:
case UniqueIndicatorEnum:
case DuplicateCountIndicatorEnum:
// if (dataminingType == DataminingType.NOMINAL) {
return true;
case DefValueCountIndicatorEnum:
Expression initialValue = null;
if (me instanceof TdColumn) {
initialValue = ((TdColumn) me).getInitialValue();
}
if (initialValue != null && initialValue.getBody() != null) {
// non nullable numeric column give a non null default value as ''
return initialValue.getBody().length() != 0 || Java2SqlType.isTextInSQL(javaType);
}
break;
case BlankCountIndicatorEnum:
// MOD xwang 2011-07-29 bug TDQ-1731 disable blank count checkable for other data type but Text
if (!Java2SqlType.isTextInSQL(javaType)) {
return false;
} else if (isTeradataInterval == Java2SqlType.TERADATA_INTERVAL_TO && isSQLEngine) {
// Added yyin 20121212 TDQ-6099: disable for Teradata's interval_xx_to_xx type.
return false;
} else {
return true;
}
case TextIndicatorEnum:
case MinLengthIndicatorEnum:
case MinLengthWithNullIndicatorEnum:
case MinLengthWithBlankIndicatorEnum:
case MinLengthWithBlankNullIndicatorEnum:
case MaxLengthIndicatorEnum:
case MaxLengthWithNullIndicatorEnum:
case MaxLengthWithBlankIndicatorEnum:
case MaxLengthWithBlankNullIndicatorEnum:
case AverageLengthIndicatorEnum:
case AverageLengthWithNullIndicatorEnum:
case AverageLengthWithBlankIndicatorEnum:
case AverageLengthWithNullBlankIndicatorEnum:
if (Java2SqlType.isTextInSQL(javaType)) {
// Added yyin 20121212 TDQ-6099: disable for Teradata's interval_xx_to_xx type.
if (isTeradataInterval > 0) {
return false;
}
if (dataminingType == DataminingType.NOMINAL || dataminingType == DataminingType.UNSTRUCTURED_TEXT) {
return true;
}
}
break;
case EastAsiaPatternFreqIndicatorEnum:
case EastAsiaPatternLowFreqIndicatorEnum:
if (isSQLEngine && isEmpryExpression(indicator, dbmsLanguage)) {
return false;
} else if (isJavaEngine) {
return true;
}
case BenfordLawFrequencyIndicatorEnum:
// disable the benford for interval type: both sql and java
if (isTeradataInterval > 0) {
return false;
}
case PatternFreqIndicatorEnum:
case PatternLowFreqIndicatorEnum:
if (isTeradataSQL || isIngres || isSybase) {
return false;
}
case ModeIndicatorEnum:
// Added yyin 20121212 TDQ-6099: disable for Teradata's interval_xx_to_xx type.
if (isTeradataInterval == Java2SqlType.TERADATA_INTERVAL_TO && isSQLEngine) {
return false;
}
case FrequencyIndicatorEnum:
case LowFrequencyIndicatorEnum:
if (dataminingType == DataminingType.NOMINAL || dataminingType == DataminingType.INTERVAL) {
return true;
}
break;
// MOD zshen 2010-01-27 Date Pattern frequency indicator
case DatePatternFreqIndicatorEnum:
if (isJavaEngine && (Java2SqlType.isDateInSQL(javaType) || Java2SqlType.isTextInSQL(javaType))) {
return true;
}
break;
// MOD mzhao 2009-03-05 Soundex frequency indicator
case SoundexIndicatorEnum:
case SoundexLowIndicatorEnum:
if (!Java2SqlType.isDateInSQL(javaType) && !Java2SqlType.isNumbericInSQL(javaType) && (dataminingType == DataminingType.NOMINAL || dataminingType == DataminingType.INTERVAL)) {
if (isHiveSQL || isVerticaSQL) {
return false;
}
// Added yyin 20121212 TDQ-6099: disable for Teradata's interval_xx_to_xx type.
if (isTeradataInterval > 0 && isSQLEngine) {
return false;
}
return true;
}
break;
case MeanIndicatorEnum:
case MedianIndicatorEnum:
case IQRIndicatorEnum:
case LowerQuartileIndicatorEnum:
case UpperQuartileIndicatorEnum:
// graphics and database yet.
if (Java2SqlType.isNumbericInSQL(javaType)) /* || Java2SqlType.isDateInSQL(javaType) */
{
if (dataminingType == DataminingType.INTERVAL) {
if (isHiveSQL && !(indicatorType == IndicatorEnum.MeanIndicatorEnum)) {
return false;
}
return true;
}
}
break;
case BoxIIndicatorEnum:
case RangeIndicatorEnum:
case MinValueIndicatorEnum:
case MaxValueIndicatorEnum:
// the graphics and database yet.
if (Java2SqlType.isNumbericInSQL(javaType) || Java2SqlType.isDateInSQL(javaType)) {
if (dataminingType == DataminingType.INTERVAL) {
return true;
}
}
break;
case DateFrequencyIndicatorEnum:
case WeekFrequencyIndicatorEnum:
case MonthFrequencyIndicatorEnum:
case QuarterFrequencyIndicatorEnum:
case YearFrequencyIndicatorEnum:
case DateLowFrequencyIndicatorEnum:
case WeekLowFrequencyIndicatorEnum:
case MonthLowFrequencyIndicatorEnum:
case QuarterLowFrequencyIndicatorEnum:
case YearLowFrequencyIndicatorEnum:
// Mod yyin 20120511 TDQ-5241
if (Java2SqlType.isDateInSQL(javaType) && !Java2SqlType.isTimeSQL(javaType) && (dataminingType == DataminingType.NOMINAL || dataminingType == DataminingType.INTERVAL)) {
return true;
}
break;
case BinFrequencyIndicatorEnum:
case BinLowFrequencyIndicatorEnum:
if (Java2SqlType.isNumbericInSQL(javaType) && (dataminingType == DataminingType.NOMINAL || dataminingType == DataminingType.INTERVAL)) {
return true;
}
break;
case ValidPhoneCountIndicatorEnum:
case PossiblePhoneCountIndicatorEnum:
case ValidRegCodeCountIndicatorEnum:
case InvalidRegCodeCountIndicatorEnum:
case WellFormE164PhoneCountIndicatorEnum:
case WellFormIntePhoneCountIndicatorEnum:
case WellFormNationalPhoneCountIndicatorEnum:
case PhoneNumbStatisticsIndicatorEnum:
case FormatFreqPieIndictorEnum:
if (isJavaEngine && (dataminingType == DataminingType.NOMINAL || dataminingType == DataminingType.INTERVAL)) {
return true;
}
break;
case SqlPatternMatchingIndicatorEnum:
if (node == null) {
return false;
}
if (!isSQLEngine) {
return false;
}
Pattern pattern = IndicatorHelper.getPattern(indicator);
Expression returnExpression = dbmsLanguage.getRegexp(pattern);
if (returnExpression != null) {
return true;
}
break;
case RegexpMatchingIndicatorEnum:
if (node == null) {
return false;
}
pattern = IndicatorHelper.getPattern(indicator);
returnExpression = dbmsLanguage.getRegexp(pattern);
if (returnExpression != null) {
return true;
}
break;
case UserDefinedIndicatorEnum:
// judge language
if (node == null) {
return false;
}
Indicator judi = null;
try {
judi = UDIHelper.adaptToJavaUDI(indicator);
} catch (Throwable e) {
return false;
}
if (judi != null) {
indicator = judi;
}
returnExpression = dbmsLanguage.getExpression(indicator);
if (isJavaEngine && judi == null || isSQLEngine && returnExpression == null) {
return false;
}
return true;
default:
return false;
}
return false;
}
use of org.talend.dataquality.indicators.Indicator in project tdq-studio-se by Talend.
the class UDIUtils method createIndicatorUnit.
public static IndicatorUnit[] createIndicatorUnit(IndicatorDefinition udid, ModelElementIndicator meIndicator, Analysis analysis) throws Throwable {
List<IndicatorUnit> addIndicatorUnits = new ArrayList<IndicatorUnit>();
// can't add the same user defined indicator
for (Indicator indicator : meIndicator.getIndicators()) {
// MOD xwang 2011-08-01 bug TDQ-2730
if (udid.getName().equals(indicator.getName()) && indicator instanceof UserDefIndicator) {
// $NON-NLS-1$
MessageUI.openWarning(DefaultMessagesImpl.getString("UDIUtils.UDISelected", udid.getName()));
return null;
}
}
Indicator udi = UDIFactory.createUserDefIndicator(udid);
udi.setIndicatorDefinition(udid);
// MOD mzhao feature 11128, Handle Java User Defined Indicator.
Indicator judi = UDIHelper.adaptToJavaUDI(udi);
if (judi != null) {
udi = judi;
}
IEditorPart activeEditor = CorePlugin.getDefault().getWorkbench().getActiveWorkbenchWindow().getActivePage().getActiveEditor();
if (activeEditor == null || !(activeEditor instanceof AnalysisEditor)) {
return null;
}
ExecutionLanguage executionLanguage = ((AnalysisEditor) activeEditor).getUIExecuteEngin();
boolean isJavaEngin = ExecutionLanguage.JAVA.equals(executionLanguage);
DbmsLanguage dbmsLanguage = DbmsLanguageFactory.createDbmsLanguage(analysis, executionLanguage);
Expression returnExpression = dbmsLanguage.getExpression(udi);
String executeType = isJavaEngin ? executionLanguage.getName() : dbmsLanguage.getDbmsName();
// MOD qiongli 2013.5.22 TDQ-7282.if don't find a valid java expression for JUDI,should also pop this dialog.
boolean finddExpression = true;
if (isJavaEngin && judi == null || !isJavaEngin && returnExpression == null) {
finddExpression = false;
}
if (!finddExpression) {
// open the editor
boolean openUDI = MessageDialog.openQuestion(PlatformUI.getWorkbench().getActiveWorkbenchWindow().getShell(), DefaultMessagesImpl.getString("PatternUtilities.Warning"), // $NON-NLS-1$ //$NON-NLS-2$
DefaultMessagesImpl.getString("UDIUtils.NoExpression", executeType));
if (openUDI) {
RepositoryNode node = RepositoryNodeHelper.recursiveFind(udid);
if (RepositoryNodeHelper.canOpenEditor(node)) {
new OpenItemEditorAction(new IRepositoryNode[] { node }).run();
}
}
return null;
}
// dbmsLanguage
IndicatorParameters parameters = udi.getParameters();
if (parameters == null) {
parameters = IndicatorsFactory.eINSTANCE.createIndicatorParameters();
udi.setParameters(parameters);
}
Domain indicatorValidDomain = parameters.getIndicatorValidDomain();
if (indicatorValidDomain == null) {
// $NON-NLS-1$
indicatorValidDomain = DomainHelper.createDomain("JAVA_UDI_PARAMETERS");
parameters.setIndicatorValidDomain(indicatorValidDomain);
}
List<IndicatorDefinitionParameter> indicatorDefs = udid.getIndicatorDefinitionParameter();
for (IndicatorDefinitionParameter idp : indicatorDefs) {
JavaUDIIndicatorParameter judip = DomainHelper.createJavaUDIIndicatorParameter(idp.getKey(), idp.getValue());
indicatorValidDomain.getJavaUDIIndicatorParameter().add(judip);
}
IndicatorEnum indicatorType = IndicatorEnum.findIndicatorEnum(udi.eClass());
addIndicatorUnits.add(meIndicator.addSpecialIndicator(indicatorType, udi));
DependenciesHandler.getInstance().setUsageDependencyOn(analysis, udid);
return addIndicatorUnits.toArray(new IndicatorUnit[addIndicatorUnits.size()]);
}
Aggregations