use of org.talend.dataquality.indicators.DataminingType in project tdq-studio-se by Talend.
the class ColumnSetAnalysisDetailsPage method recomputeIndicators.
public void recomputeIndicators() {
columnSetAnalysisHandler = new ColumnSetAnalysisHandler();
columnSetAnalysisHandler.setAnalysis(getCurrentModelElement());
stringDataFilter = columnSetAnalysisHandler.getStringDataFilter();
analyzedColumns = columnSetAnalysisHandler.getAnalyzedColumns();
if (columnSetAnalysisHandler.getSimpleStatIndicator() == null || columnSetAnalysisHandler.getSimpleStatIndicator().eIsProxy()) {
ColumnsetFactory columnsetFactory = ColumnsetFactory.eINSTANCE;
simpleStatIndicator = columnsetFactory.createSimpleStatIndicator();
simpleStatIndicator.setRowCountIndicator(IndicatorsFactory.eINSTANCE.createRowCountIndicator());
simpleStatIndicator.setDistinctCountIndicator(IndicatorsFactory.eINSTANCE.createDistinctCountIndicator());
simpleStatIndicator.setDuplicateCountIndicator(IndicatorsFactory.eINSTANCE.createDuplicateCountIndicator());
simpleStatIndicator.setUniqueCountIndicator(IndicatorsFactory.eINSTANCE.createUniqueCountIndicator());
} else {
simpleStatIndicator = (SimpleStatIndicator) columnSetAnalysisHandler.getSimpleStatIndicator();
}
if (columnSetAnalysisHandler.getAllmatchIndicator() == null) {
ColumnsetFactory columnsetFactory = ColumnsetFactory.eINSTANCE;
allMatchIndicator = columnsetFactory.createAllMatchIndicator();
DefinitionHandler.getInstance().setDefaultIndicatorDefinition(allMatchIndicator);
} else {
allMatchIndicator = (AllMatchIndicator) columnSetAnalysisHandler.getAllmatchIndicator();
}
initializeIndicator(simpleStatIndicator);
List<ModelElementIndicator> meIndicatorList = new ArrayList<ModelElementIndicator>();
ModelElementIndicator currentIndicator = null;
for (ModelElement element : analyzedColumns) {
// MOD yyi 2011-02-16 17871:delimitefile
MetadataColumn mdColumn = SwitchHelpers.METADATA_COLUMN_SWITCH.doSwitch(element);
TdColumn tdColumn = SwitchHelpers.COLUMN_SWITCH.doSwitch(element);
if (tdColumn == null && mdColumn == null) {
continue;
}
if (tdColumn == null && mdColumn != null) {
currentIndicator = ModelElementIndicatorHelper.createDFColumnIndicator(RepositoryNodeHelper.recursiveFind(mdColumn));
} else if (tdColumn != null) {
RepositoryNode recursiveFind = RepositoryNodeHelper.recursiveFind(tdColumn);
if (recursiveFind == null) {
recursiveFind = RepositoryNodeHelper.createRepositoryNode(tdColumn);
}
currentIndicator = ModelElementIndicatorHelper.createModelElementIndicator(recursiveFind);
}
DataminingType dataminingType = MetadataHelper.getDataminingType(element);
MetadataHelper.setDataminingType(dataminingType == null ? DataminingType.NOMINAL : dataminingType, element);
Collection<Indicator> indicatorList = columnSetAnalysisHandler.getRegexMathingIndicators(element);
if (null != currentIndicator) {
currentIndicator.setIndicators(indicatorList.toArray(new Indicator[indicatorList.size()]));
meIndicatorList.add(currentIndicator);
}
}
currentModelElementIndicators = meIndicatorList.toArray(new ModelElementIndicator[meIndicatorList.size()]);
}
use of org.talend.dataquality.indicators.DataminingType in project tdq-studio-se by Talend.
the class IndicatorImpl method setDataminingType.
/**
* <!-- begin-user-doc --> <!-- end-user-doc -->
* @generated
*/
@Override
public void setDataminingType(DataminingType newDataminingType) {
DataminingType oldDataminingType = dataminingType;
dataminingType = newDataminingType == null ? DATAMINING_TYPE_EDEFAULT : newDataminingType;
if (eNotificationRequired())
eNotify(new ENotificationImpl(this, Notification.SET, IndicatorsPackage.INDICATOR__DATAMINING_TYPE, oldDataminingType, dataminingType));
}
use of org.talend.dataquality.indicators.DataminingType in project tdq-studio-se by Talend.
the class ModelElementIndicatorRule method patternRule.
public static boolean patternRule(IndicatorEnum indicatorType, ModelElement me, ExecutionLanguage language, IIndicatorNode node) {
int javaType = 0;
boolean isDeliFileColumn = !(me instanceof TdColumn) && me instanceof MetadataColumn;
int isTeradataInterval = -1;
if (me instanceof TdColumn) {
javaType = ((TdColumn) me).getSqlDataType().getJavaDataType();
// Added yyin 20121211 TDQ-6099:
isTeradataInterval = Java2SqlType.isTeradataIntervalType(((TdColumn) me).getSqlDataType().getName());
// ~
} else if (isDeliFileColumn) {
javaType = TalendTypeConvert.convertToJDBCType(((MetadataColumn) me).getTalendType());
}
DataminingType dataminingType = MetadataHelper.getDataminingType(me);
if (dataminingType == null || isDeliFileColumn) {
dataminingType = MetadataHelper.getDefaultDataminingType(javaType);
}
// MOD qiongli 2012-4-25 TDQ-2699
Connection connection = null;
if (me instanceof TdColumn) {
connection = ConnectionHelper.getTdDataProvider((TdColumn) me);
} else if (me instanceof MetadataColumn) {
connection = ConnectionHelper.getTdDataProvider((MetadataColumn) me);
}
Indicator indicator = null;
if (node != null) {
indicator = node.getIndicatorInstance();
}
boolean isSQLEngine = ExecutionLanguage.SQL.equals(language);
boolean isJavaEngine = ExecutionLanguage.JAVA.equals(language);
DbmsLanguage dbmsLanguage = DbmsLanguageFactory.createDbmsLanguage(connection, language);
if (javaType == Types.LONGVARCHAR && isSQLEngine) {
if (connection != null && ConnectionHelper.isDb2(connection)) {
return enableLongVarchar(indicatorType, dataminingType, me);
}
}
// MOD qiongli 2012-8-10 TDQ-5907 need to disabled indicators for hive with sql engine.
boolean isHiveSQL = connection == null ? false : ConnectionHelper.isHive(connection) && isSQLEngine;
// MOD msjian 2013-5-15 TDQ-7275 need to disabled indicators for teradata with sql engine.
boolean isTeradataSQL = connection == null ? false : ConnectionHelper.isTeradata(connection) && isSQLEngine;
// MOD msjian 2016-8-25 TDQ-12349 need to disabled indicators for ingres with sql engine.
boolean isIngres = connection == null ? false : ConnectionHelper.isIngress(connection) && isSQLEngine;
// MOD msjian 2016-8-25 TDQ-12464 need to disabled indicators for ingres with sql engine.
boolean isSybase = connection == null ? false : ConnectionHelper.isSybase(connection) && isSQLEngine;
// MOD qiongli 2013-8-27 TDQ-2104 disabled soundex indicators for hive with sql engine.
boolean isVerticaSQL = connection == null ? false : ConnectionHelper.isVertica(connection) && isSQLEngine;
switch(indicatorType) {
case CountsIndicatorEnum:
case RowCountIndicatorEnum:
case NullCountIndicatorEnum:
case DistinctCountIndicatorEnum:
case UniqueIndicatorEnum:
case DuplicateCountIndicatorEnum:
// if (dataminingType == DataminingType.NOMINAL) {
return true;
case DefValueCountIndicatorEnum:
Expression initialValue = null;
if (me instanceof TdColumn) {
initialValue = ((TdColumn) me).getInitialValue();
}
if (initialValue != null && initialValue.getBody() != null) {
// non nullable numeric column give a non null default value as ''
return initialValue.getBody().length() != 0 || Java2SqlType.isTextInSQL(javaType);
}
break;
case BlankCountIndicatorEnum:
// MOD xwang 2011-07-29 bug TDQ-1731 disable blank count checkable for other data type but Text
if (!Java2SqlType.isTextInSQL(javaType)) {
return false;
} else if (isTeradataInterval == Java2SqlType.TERADATA_INTERVAL_TO && isSQLEngine) {
// Added yyin 20121212 TDQ-6099: disable for Teradata's interval_xx_to_xx type.
return false;
} else {
return true;
}
case TextIndicatorEnum:
case MinLengthIndicatorEnum:
case MinLengthWithNullIndicatorEnum:
case MinLengthWithBlankIndicatorEnum:
case MinLengthWithBlankNullIndicatorEnum:
case MaxLengthIndicatorEnum:
case MaxLengthWithNullIndicatorEnum:
case MaxLengthWithBlankIndicatorEnum:
case MaxLengthWithBlankNullIndicatorEnum:
case AverageLengthIndicatorEnum:
case AverageLengthWithNullIndicatorEnum:
case AverageLengthWithBlankIndicatorEnum:
case AverageLengthWithNullBlankIndicatorEnum:
if (Java2SqlType.isTextInSQL(javaType)) {
// Added yyin 20121212 TDQ-6099: disable for Teradata's interval_xx_to_xx type.
if (isTeradataInterval > 0) {
return false;
}
if (dataminingType == DataminingType.NOMINAL || dataminingType == DataminingType.UNSTRUCTURED_TEXT) {
return true;
}
}
break;
case EastAsiaPatternFreqIndicatorEnum:
case EastAsiaPatternLowFreqIndicatorEnum:
if (isSQLEngine && isEmpryExpression(indicator, dbmsLanguage)) {
return false;
} else if (isJavaEngine) {
return true;
}
case BenfordLawFrequencyIndicatorEnum:
// disable the benford for interval type: both sql and java
if (isTeradataInterval > 0) {
return false;
}
case PatternFreqIndicatorEnum:
case PatternLowFreqIndicatorEnum:
if (isTeradataSQL || isIngres || isSybase) {
return false;
}
case ModeIndicatorEnum:
// Added yyin 20121212 TDQ-6099: disable for Teradata's interval_xx_to_xx type.
if (isTeradataInterval == Java2SqlType.TERADATA_INTERVAL_TO && isSQLEngine) {
return false;
}
case FrequencyIndicatorEnum:
case LowFrequencyIndicatorEnum:
if (dataminingType == DataminingType.NOMINAL || dataminingType == DataminingType.INTERVAL) {
return true;
}
break;
// MOD zshen 2010-01-27 Date Pattern frequency indicator
case DatePatternFreqIndicatorEnum:
if (isJavaEngine && (Java2SqlType.isDateInSQL(javaType) || Java2SqlType.isTextInSQL(javaType))) {
return true;
}
break;
// MOD mzhao 2009-03-05 Soundex frequency indicator
case SoundexIndicatorEnum:
case SoundexLowIndicatorEnum:
if (!Java2SqlType.isDateInSQL(javaType) && !Java2SqlType.isNumbericInSQL(javaType) && (dataminingType == DataminingType.NOMINAL || dataminingType == DataminingType.INTERVAL)) {
if (isHiveSQL || isVerticaSQL) {
return false;
}
// Added yyin 20121212 TDQ-6099: disable for Teradata's interval_xx_to_xx type.
if (isTeradataInterval > 0 && isSQLEngine) {
return false;
}
return true;
}
break;
case MeanIndicatorEnum:
case MedianIndicatorEnum:
case IQRIndicatorEnum:
case LowerQuartileIndicatorEnum:
case UpperQuartileIndicatorEnum:
// graphics and database yet.
if (Java2SqlType.isNumbericInSQL(javaType)) /* || Java2SqlType.isDateInSQL(javaType) */
{
if (dataminingType == DataminingType.INTERVAL) {
if (isHiveSQL && !(indicatorType == IndicatorEnum.MeanIndicatorEnum)) {
return false;
}
return true;
}
}
break;
case BoxIIndicatorEnum:
case RangeIndicatorEnum:
case MinValueIndicatorEnum:
case MaxValueIndicatorEnum:
// the graphics and database yet.
if (Java2SqlType.isNumbericInSQL(javaType) || Java2SqlType.isDateInSQL(javaType)) {
if (dataminingType == DataminingType.INTERVAL) {
return true;
}
}
break;
case DateFrequencyIndicatorEnum:
case WeekFrequencyIndicatorEnum:
case MonthFrequencyIndicatorEnum:
case QuarterFrequencyIndicatorEnum:
case YearFrequencyIndicatorEnum:
case DateLowFrequencyIndicatorEnum:
case WeekLowFrequencyIndicatorEnum:
case MonthLowFrequencyIndicatorEnum:
case QuarterLowFrequencyIndicatorEnum:
case YearLowFrequencyIndicatorEnum:
// Mod yyin 20120511 TDQ-5241
if (Java2SqlType.isDateInSQL(javaType) && !Java2SqlType.isTimeSQL(javaType) && (dataminingType == DataminingType.NOMINAL || dataminingType == DataminingType.INTERVAL)) {
return true;
}
break;
case BinFrequencyIndicatorEnum:
case BinLowFrequencyIndicatorEnum:
if (Java2SqlType.isNumbericInSQL(javaType) && (dataminingType == DataminingType.NOMINAL || dataminingType == DataminingType.INTERVAL)) {
return true;
}
break;
case ValidPhoneCountIndicatorEnum:
case PossiblePhoneCountIndicatorEnum:
case ValidRegCodeCountIndicatorEnum:
case InvalidRegCodeCountIndicatorEnum:
case WellFormE164PhoneCountIndicatorEnum:
case WellFormIntePhoneCountIndicatorEnum:
case WellFormNationalPhoneCountIndicatorEnum:
case PhoneNumbStatisticsIndicatorEnum:
case FormatFreqPieIndictorEnum:
if (isJavaEngine && (dataminingType == DataminingType.NOMINAL || dataminingType == DataminingType.INTERVAL)) {
return true;
}
break;
case SqlPatternMatchingIndicatorEnum:
if (node == null) {
return false;
}
if (!isSQLEngine) {
return false;
}
Pattern pattern = IndicatorHelper.getPattern(indicator);
Expression returnExpression = dbmsLanguage.getRegexp(pattern);
if (returnExpression != null) {
return true;
}
break;
case RegexpMatchingIndicatorEnum:
if (node == null) {
return false;
}
pattern = IndicatorHelper.getPattern(indicator);
returnExpression = dbmsLanguage.getRegexp(pattern);
if (returnExpression != null) {
return true;
}
break;
case UserDefinedIndicatorEnum:
// judge language
if (node == null) {
return false;
}
Indicator judi = null;
try {
judi = UDIHelper.adaptToJavaUDI(indicator);
} catch (Throwable e) {
return false;
}
if (judi != null) {
indicator = judi;
}
returnExpression = dbmsLanguage.getExpression(indicator);
if (isJavaEngine && judi == null || isSQLEngine && returnExpression == null) {
return false;
}
return true;
default:
return false;
}
return false;
}
use of org.talend.dataquality.indicators.DataminingType in project tdq-studio-se by Talend.
the class ColumnCorrelationAnalysisHandler method setDatamingType.
/**
* Method "setDatamingType".
*
* @param dataminingTypeLiteral the literal expression of the datamining type used for the analysis
* @param column a column
*/
public void setDatamingType(String dataminingTypeLiteral, TdColumn column) {
DataminingType type = DataminingType.get(dataminingTypeLiteral);
MetadataHelper.setDataminingType(type, column);
Resource resource = column.eResource();
if (resource != null) {
// tell that the resource has been modified.
resource.setModified(true);
// it would be better to handle modifications with EMF Commands
// this.modifiedResources.add(resource);
}
}
use of org.talend.dataquality.indicators.DataminingType in project tdq-studio-se by Talend.
the class ModelElementAnalysisHandler method setDatamingType.
/**
* DOC xqliu Comment method "setDatamingType".
*
* @param dataminingTypeLiteral
* @param modelElement
*/
public void setDatamingType(String dataminingTypeLiteral, ModelElement modelElement) {
DataminingType type = DataminingType.get(dataminingTypeLiteral);
if (modelElement instanceof MetadataColumn) {
MetadataHelper.setDataminingType(type, modelElement);
} else {
return;
}
Resource resource = modelElement.eResource();
if (resource != null) {
// tell that the resource has been modified.
resource.setModified(true);
// it would be better to handle modifications with EMF Commands
}
}
Aggregations