Search in sources :

Example 1 with MinLengthIndicator

use of org.talend.dataquality.indicators.MinLengthIndicator in project tdq-studio-se by Talend.

the class TextIndicatorImpl method basicSetMinLengthIndicator.

/**
 * <!-- begin-user-doc --> <!-- end-user-doc -->
 * @generated
 */
public NotificationChain basicSetMinLengthIndicator(MinLengthIndicator newMinLengthIndicator, NotificationChain msgs) {
    MinLengthIndicator oldMinLengthIndicator = minLengthIndicator;
    minLengthIndicator = newMinLengthIndicator;
    if (eNotificationRequired()) {
        ENotificationImpl notification = new ENotificationImpl(this, Notification.SET, IndicatorsPackage.TEXT_INDICATOR__MIN_LENGTH_INDICATOR, oldMinLengthIndicator, newMinLengthIndicator);
        if (msgs == null)
            msgs = notification;
        else
            msgs.add(notification);
    }
    return msgs;
}
Also used : ENotificationImpl(org.eclipse.emf.ecore.impl.ENotificationImpl) MinLengthIndicator(org.talend.dataquality.indicators.MinLengthIndicator)

Example 2 with MinLengthIndicator

use of org.talend.dataquality.indicators.MinLengthIndicator in project tdq-studio-se by Talend.

the class IndicatorEvaluator method initDataSet.

@SuppressWarnings("unchecked")
protected List<Object[]> initDataSet(Indicator indicator, EMap<Indicator, AnalyzedDataSet> indicToRowMap, Object object) {
    AnalyzedDataSet analyzedDataSet = indicToRowMap.get(indicator);
    List<Object[]> valueObjectList = null;
    if (analyzedDataSet == null) {
        analyzedDataSet = AnalysisFactory.eINSTANCE.createAnalyzedDataSet();
        indicToRowMap.put(indicator, analyzedDataSet);
        analyzedDataSet.setDataCount(analysis.getParameters().getMaxNumberRows());
        analyzedDataSet.setRecordSize(0);
    }
    if (indicator instanceof FrequencyIndicator || indicator instanceof MinLengthIndicator || indicator instanceof MaxLengthIndicator) {
        Map<Object, List<Object[]>> valueObjectListMap = analyzedDataSet.getFrequencyData();
        if (valueObjectListMap == null) {
            valueObjectListMap = new HashMap<Object, List<Object[]>>();
            analyzedDataSet.setFrequencyData(valueObjectListMap);
        }
        String key = null;
        if (object == null) {
            key = SpecialValueDisplay.NULL_FIELD;
        } else if (indicator instanceof MinLengthIndicator || indicator instanceof MaxLengthIndicator) {
            key = String.valueOf(object.toString().length());
        } else if (object.equals(PluginConstant.EMPTY_STRING)) {
            key = SpecialValueDisplay.EMPTY_FIELD;
        } else if (indicator instanceof PatternLowFreqIndicator) {
            key = ((PatternLowFreqIndicator) indicator).convertCharacters(object.toString());
        } else if (indicator instanceof PatternFreqIndicator) {
            key = ((PatternFreqIndicator) indicator).convertCharacters(object.toString());
        } else if (indicator instanceof FormatFreqPieIndicator) {
            // MOD qiongli 2011-8-26,feature TDQ-3253.
            key = ((FormatFreqPieIndicator) indicator).getCurrentKey();
        } else {
            key = object.toString();
        }
        valueObjectList = valueObjectListMap.get(key);
        if (valueObjectList == null) {
            valueObjectList = new ArrayList<Object[]>();
            valueObjectListMap.put(key, valueObjectList);
        }
    } else if (indicator.isInValidRow() || indicator.isValidRow()) {
        List<Object> patternData = analyzedDataSet.getPatternData();
        if (patternData == null) {
            patternData = new ArrayList<Object>();
            // mapping with AnalyzedDataSetImpl.VALID_VALUE
            patternData.add(new ArrayList<Object[]>());
            // mapping with AnalyzedDataSetImpl.INVALID_VALUE
            patternData.add(new ArrayList<Object[]>());
            analyzedDataSet.setPatternData(patternData);
        }
        Object listObject = indicator.isInValidRow() ? patternData.get(AnalyzedDataSetImpl.INVALID_VALUE) : patternData.get(AnalyzedDataSetImpl.VALID_VALUE);
        if (listObject instanceof ArrayList<?>) {
            valueObjectList = (ArrayList<Object[]>) listObject;
        }
    } else {
        valueObjectList = analyzedDataSet.getData();
        if (valueObjectList == null) {
            valueObjectList = new ArrayList<Object[]>();
            analyzedDataSet.setData(valueObjectList);
        }
    }
    return valueObjectList;
}
Also used : MaxLengthIndicator(org.talend.dataquality.indicators.MaxLengthIndicator) AnalyzedDataSet(org.talend.dataquality.analysis.AnalyzedDataSet) FormatFreqPieIndicator(org.talend.dataquality.indicators.FormatFreqPieIndicator) ArrayList(java.util.ArrayList) MinLengthIndicator(org.talend.dataquality.indicators.MinLengthIndicator) PatternLowFreqIndicator(org.talend.dataquality.indicators.PatternLowFreqIndicator) FrequencyIndicator(org.talend.dataquality.indicators.FrequencyIndicator) ArrayList(java.util.ArrayList) List(java.util.List) PatternFreqIndicator(org.talend.dataquality.indicators.PatternFreqIndicator)

Example 3 with MinLengthIndicator

use of org.talend.dataquality.indicators.MinLengthIndicator in project tdq-studio-se by Talend.

the class IndicatorCommonUtil method getIndicatorValue.

public static Object getIndicatorValue(Indicator indicator) {
    Object value = null;
    IndicatorEnum type = IndicatorEnum.findIndicatorEnum(indicator.eClass());
    if (type != null) {
        try {
            if (type == IndicatorEnum.RangeIndicatorEnum || type == IndicatorEnum.IQRIndicatorEnum) {
                value = ((RangeIndicator) indicator).getRange();
                ((RangeIndicator) indicator).setComputed(true);
            } else if (indicator.isComputed()) {
                switch(type) {
                    case RowCountIndicatorEnum:
                        value = ((RowCountIndicator) indicator).getCount();
                        break;
                    case NullCountIndicatorEnum:
                        value = ((NullCountIndicator) indicator).getNullCount();
                        break;
                    case DistinctCountIndicatorEnum:
                        value = ((DistinctCountIndicator) indicator).getDistinctValueCount();
                        break;
                    case UniqueIndicatorEnum:
                        value = (((UniqueCountIndicator) indicator).getUniqueValueCount());
                        break;
                    case DuplicateCountIndicatorEnum:
                        value = ((DuplicateCountIndicator) indicator).getDuplicateValueCount();
                        break;
                    case BlankCountIndicatorEnum:
                        value = ((BlankCountIndicator) indicator).getBlankCount();
                        break;
                    case DefValueCountIndicatorEnum:
                        value = ((DefValueCountIndicator) indicator).getDefaultValCount();
                        break;
                    case MinLengthIndicatorEnum:
                        value = ((MinLengthIndicator) indicator).getLength();
                        break;
                    case MinLengthWithNullIndicatorEnum:
                        value = ((MinLengthWithNullIndicator) indicator).getLength();
                        break;
                    case MinLengthWithBlankIndicatorEnum:
                        value = ((MinLengthWithBlankIndicator) indicator).getLength();
                        break;
                    case MinLengthWithBlankNullIndicatorEnum:
                        value = ((MinLengthWithBlankNullIndicator) indicator).getLength();
                        break;
                    case MaxLengthIndicatorEnum:
                        value = ((MaxLengthIndicator) indicator).getLength();
                        break;
                    case MaxLengthWithNullIndicatorEnum:
                        value = ((MaxLengthWithNullIndicator) indicator).getLength();
                        break;
                    case MaxLengthWithBlankIndicatorEnum:
                        value = ((MaxLengthWithBlankIndicator) indicator).getLength();
                        break;
                    case MaxLengthWithBlankNullIndicatorEnum:
                        value = ((MaxLengthWithBlankNullIndicator) indicator).getLength();
                        break;
                    case AverageLengthIndicatorEnum:
                        value = ((AverageLengthIndicator) indicator).getAverageLength();
                        break;
                    case AverageLengthWithNullIndicatorEnum:
                        value = ((AvgLengthWithNullIndicator) indicator).getAverageLength();
                        break;
                    case AverageLengthWithBlankIndicatorEnum:
                        value = ((AvgLengthWithBlankIndicator) indicator).getAverageLength();
                        break;
                    case AverageLengthWithNullBlankIndicatorEnum:
                        value = ((AvgLengthWithBlankNullIndicator) indicator).getAverageLength();
                        break;
                    case FrequencyIndicatorEnum:
                    case DateFrequencyIndicatorEnum:
                    case WeekFrequencyIndicatorEnum:
                    case MonthFrequencyIndicatorEnum:
                    case QuarterFrequencyIndicatorEnum:
                    case YearFrequencyIndicatorEnum:
                    case BinFrequencyIndicatorEnum:
                    case LowFrequencyIndicatorEnum:
                    case DateLowFrequencyIndicatorEnum:
                    case WeekLowFrequencyIndicatorEnum:
                    case MonthLowFrequencyIndicatorEnum:
                    case QuarterLowFrequencyIndicatorEnum:
                    case YearLowFrequencyIndicatorEnum:
                    case BinLowFrequencyIndicatorEnum:
                    case PatternFreqIndicatorEnum:
                    case PatternLowFreqIndicatorEnum:
                    case EastAsiaPatternFreqIndicatorEnum:
                    case EastAsiaPatternLowFreqIndicatorEnum:
                    case DatePatternFreqIndicatorEnum:
                    case SoundexIndicatorEnum:
                    case SoundexLowIndicatorEnum:
                    case BenfordLawFrequencyIndicatorEnum:
                        value = handleFrequency(indicator);
                        break;
                    case MeanIndicatorEnum:
                        value = ((MeanIndicator) indicator).getMean();
                        break;
                    case MedianIndicatorEnum:
                        value = ((MedianIndicator) indicator).getMedian();
                        break;
                    case MinValueIndicatorEnum:
                        value = ((MinValueIndicator) indicator).getValue();
                        break;
                    case MaxValueIndicatorEnum:
                        value = ((MaxValueIndicator) indicator).getValue();
                        break;
                    case LowerQuartileIndicatorEnum:
                        value = ((LowerQuartileIndicator) indicator).getValue();
                        break;
                    case UpperQuartileIndicatorEnum:
                        value = ((UpperQuartileIndicator) indicator).getValue();
                        break;
                    case RegexpMatchingIndicatorEnum:
                    case SqlPatternMatchingIndicatorEnum:
                    case AllMatchIndicatorEnum:
                        value = handleMatchingValue(indicator);
                        break;
                    case ModeIndicatorEnum:
                        value = ((ModeIndicator) indicator).getMode();
                        break;
                    case UserDefinedIndicatorEnum:
                        value = handleUDIValue(indicator);
                        break;
                    case WhereRuleIndicatorEnum:
                        Long userCount = ((WhereRuleIndicator) indicator).getUserCount();
                        value = userCount == null ? 0 : userCount;
                        break;
                    // MOD qiongli 2011-7-21 feature 22362
                    case ValidPhoneCountIndicatorEnum:
                        value = ((ValidPhoneCountIndicator) indicator).getValidPhoneNumCount();
                        break;
                    case ValidRegCodeCountIndicatorEnum:
                        value = ((ValidRegCodeCountIndicator) indicator).getValidRegCount();
                        break;
                    case InvalidRegCodeCountIndicatorEnum:
                        value = ((InvalidRegCodeCountIndicator) indicator).getInvalidRegCount();
                        break;
                    case WellFormE164PhoneCountIndicatorEnum:
                        value = ((WellFormE164PhoneCountIndicator) indicator).getWellFormE164PhoneCount();
                        break;
                    case WellFormIntePhoneCountIndicatorEnum:
                        value = ((WellFormIntePhoneCountIndicator) indicator).getWellFormIntePhoneCount();
                        break;
                    case WellFormNationalPhoneCountIndicatorEnum:
                        value = ((WellFormNationalPhoneCountIndicator) indicator).getWellFormNatiPhoneCount();
                        break;
                    case PossiblePhoneCountIndicatorEnum:
                        value = ((PossiblePhoneCountIndicator) indicator).getPossiblePhoneCount();
                        break;
                    case FormatFreqPieIndictorEnum:
                        value = handleFreqPie(indicator);
                    default:
                }
                if (value == null || "null".equalsIgnoreCase(value.toString())) {
                    // $NON-NLS-1$
                    indicator.setComputed(false);
                }
            }
        } catch (Exception e) {
            // $NON-NLS-1$
            log.error(Messages.getString("IndicatorCommonUtil.FailValue", e.getMessage()), e);
        }
    }
    return value;
}
Also used : IndicatorEnum(org.talend.dq.nodes.indicator.type.IndicatorEnum) MinLengthWithNullIndicator(org.talend.dataquality.indicators.MinLengthWithNullIndicator) BlankCountIndicator(org.talend.dataquality.indicators.BlankCountIndicator) MaxLengthWithBlankIndicator(org.talend.dataquality.indicators.MaxLengthWithBlankIndicator) MedianIndicator(org.talend.dataquality.indicators.MedianIndicator) ModeIndicator(org.talend.dataquality.indicators.ModeIndicator) MinLengthWithBlankIndicator(org.talend.dataquality.indicators.MinLengthWithBlankIndicator) AvgLengthWithNullIndicator(org.talend.dataquality.indicators.AvgLengthWithNullIndicator) NullCountIndicator(org.talend.dataquality.indicators.NullCountIndicator) DistinctCountIndicator(org.talend.dataquality.indicators.DistinctCountIndicator) WellFormIntePhoneCountIndicator(org.talend.dataquality.indicators.WellFormIntePhoneCountIndicator) MaxValueIndicator(org.talend.dataquality.indicators.MaxValueIndicator) MaxLengthWithBlankNullIndicator(org.talend.dataquality.indicators.MaxLengthWithBlankNullIndicator) LowerQuartileIndicator(org.talend.dataquality.indicators.LowerQuartileIndicator) RowCountIndicator(org.talend.dataquality.indicators.RowCountIndicator) MeanIndicator(org.talend.dataquality.indicators.MeanIndicator) WellFormE164PhoneCountIndicator(org.talend.dataquality.indicators.WellFormE164PhoneCountIndicator) PossiblePhoneCountIndicator(org.talend.dataquality.indicators.PossiblePhoneCountIndicator) ValidPhoneCountIndicator(org.talend.dataquality.indicators.ValidPhoneCountIndicator) DuplicateCountIndicator(org.talend.dataquality.indicators.DuplicateCountIndicator) MaxLengthWithNullIndicator(org.talend.dataquality.indicators.MaxLengthWithNullIndicator) MaxLengthIndicator(org.talend.dataquality.indicators.MaxLengthIndicator) MinLengthWithBlankNullIndicator(org.talend.dataquality.indicators.MinLengthWithBlankNullIndicator) AvgLengthWithBlankIndicator(org.talend.dataquality.indicators.AvgLengthWithBlankIndicator) ValidRegCodeCountIndicator(org.talend.dataquality.indicators.ValidRegCodeCountIndicator) MinLengthIndicator(org.talend.dataquality.indicators.MinLengthIndicator) InvalidRegCodeCountIndicator(org.talend.dataquality.indicators.InvalidRegCodeCountIndicator) DefValueCountIndicator(org.talend.dataquality.indicators.DefValueCountIndicator) AverageLengthIndicator(org.talend.dataquality.indicators.AverageLengthIndicator) AvgLengthWithBlankNullIndicator(org.talend.dataquality.indicators.AvgLengthWithBlankNullIndicator) RangeIndicator(org.talend.dataquality.indicators.RangeIndicator) UpperQuartileIndicator(org.talend.dataquality.indicators.UpperQuartileIndicator) WellFormNationalPhoneCountIndicator(org.talend.dataquality.indicators.WellFormNationalPhoneCountIndicator) MinValueIndicator(org.talend.dataquality.indicators.MinValueIndicator)

Example 4 with MinLengthIndicator

use of org.talend.dataquality.indicators.MinLengthIndicator in project tdq-studio-se by Talend.

the class IndicatorHelper method getIndicatorValue.

/**
 * DOC bZhou Comment method "getIndicatorValue".
 *
 * @param indicator
 * @return
 */
public static String getIndicatorValue(Indicator indicator) {
    IndicatorsSwitch<String> mySwitch = new IndicatorsSwitch<String>() {

        @Override
        public String caseAverageLengthIndicator(AverageLengthIndicator object) {
            return createStandardNumber(object.getAverageLength());
        }

        @Override
        public String caseBlankCountIndicator(BlankCountIndicator object) {
            return String.valueOf(object.getBlankCount());
        }

        @Override
        public String caseDefValueCountIndicator(DefValueCountIndicator object) {
            return String.valueOf(object.getDefaultValCount());
        }

        @Override
        public String caseDistinctCountIndicator(DistinctCountIndicator object) {
            return String.valueOf(object.getDistinctValueCount());
        }

        @Override
        public String caseDuplicateCountIndicator(DuplicateCountIndicator object) {
            return String.valueOf(object.getDuplicateValueCount());
        }

        @Override
        public String caseMaxLengthIndicator(MaxLengthIndicator object) {
            return String.valueOf(object.getLength());
        }

        @Override
        public String caseMeanIndicator(MeanIndicator object) {
            return createStandardNumber(object.getMean());
        }

        @Override
        public String caseMedianIndicator(MedianIndicator object) {
            return createStandardNumber(object.getMedian());
        }

        @Override
        public String caseMinLengthIndicator(MinLengthIndicator object) {
            return String.valueOf(object.getLength());
        }

        @Override
        public String caseModeIndicator(ModeIndicator object) {
            return String.valueOf(object.getMode());
        }

        @Override
        public String caseNullCountIndicator(NullCountIndicator object) {
            return String.valueOf(object.getNullCount());
        }

        @Override
        public String casePatternMatchingIndicator(PatternMatchingIndicator object) {
            return String.valueOf(object.getMatchingValueCount());
        }

        @Override
        public String caseRowCountIndicator(RowCountIndicator object) {
            return String.valueOf(object.getCount());
        }

        @Override
        public String caseUniqueCountIndicator(UniqueCountIndicator object) {
            return String.valueOf(object.getUniqueValueCount());
        }

        @Override
        public String caseValueIndicator(ValueIndicator object) {
            return object.getValue();
        }

        @Override
        public String caseValidPhoneCountIndicator(ValidPhoneCountIndicator object) {
            return String.valueOf(object.getValidPhoneNumCount());
        }

        @Override
        public String casePossiblePhoneCountIndicator(PossiblePhoneCountIndicator object) {
            return String.valueOf(object.getPossiblePhoneCount());
        }

        @Override
        public String caseValidRegCodeCountIndicator(ValidRegCodeCountIndicator object) {
            return String.valueOf(object.getValidRegCount());
        }

        @Override
        public String caseInvalidRegCodeCountIndicator(InvalidRegCodeCountIndicator object) {
            return String.valueOf(object.getInvalidRegCount());
        }

        @Override
        public String caseWellFormE164PhoneCountIndicator(WellFormE164PhoneCountIndicator object) {
            return String.valueOf(object.getWellFormE164PhoneCount());
        }

        @Override
        public String caseWellFormIntePhoneCountIndicator(WellFormIntePhoneCountIndicator object) {
            return String.valueOf(object.getWellFormIntePhoneCount());
        }

        @Override
        public String caseWellFormNationalPhoneCountIndicator(WellFormNationalPhoneCountIndicator object) {
            return String.valueOf(object.getWellFormNatiPhoneCount());
        }
    };
    // TDQ-11114: consider the UDI type
    IndicatorSqlSwitch<String> sqlSwitch = new IndicatorSqlSwitch<String>() {

        @Override
        public String caseWhereRuleIndicator(WhereRuleIndicator object) {
            return String.valueOf(object.getUserCount());
        }

        @Override
        public String caseUserDefIndicator(UserDefIndicator object) {
            // TDQ-11114: get the correct value for user define realvalue indicator
            if (object instanceof UserDefIndicator) {
                UserDefIndicator userDefineIndicator = object;
                IndicatorCategory category = IndicatorCategoryHelper.getCategory(userDefineIndicator.getIndicatorDefinition());
                if (IndicatorCategoryHelper.isUserDefRealValue(category)) {
                    return String.valueOf(userDefineIndicator.getRealValue());
                }
            }
            // TDQ-11485: fix the match udi get result value error
            return String.valueOf(object.getIntegerValue());
        }

        @Override
        public String caseJavaUserDefIndicator(JavaUserDefIndicator object) {
            return String.valueOf(object.getUserCount());
        }
    };
    String result = mySwitch.doSwitch(indicator);
    return result == null ? sqlSwitch.doSwitch(indicator) : result;
}
Also used : BlankCountIndicator(org.talend.dataquality.indicators.BlankCountIndicator) MedianIndicator(org.talend.dataquality.indicators.MedianIndicator) UniqueCountIndicator(org.talend.dataquality.indicators.UniqueCountIndicator) ModeIndicator(org.talend.dataquality.indicators.ModeIndicator) NullCountIndicator(org.talend.dataquality.indicators.NullCountIndicator) DistinctCountIndicator(org.talend.dataquality.indicators.DistinctCountIndicator) WellFormIntePhoneCountIndicator(org.talend.dataquality.indicators.WellFormIntePhoneCountIndicator) PatternMatchingIndicator(org.talend.dataquality.indicators.PatternMatchingIndicator) WhereRuleIndicator(org.talend.dataquality.indicators.sql.WhereRuleIndicator) IndicatorCategory(org.talend.dataquality.indicators.definition.IndicatorCategory) IndicatorsSwitch(org.talend.dataquality.indicators.util.IndicatorsSwitch) RowCountIndicator(org.talend.dataquality.indicators.RowCountIndicator) MeanIndicator(org.talend.dataquality.indicators.MeanIndicator) JavaUserDefIndicator(org.talend.dataquality.indicators.sql.JavaUserDefIndicator) UserDefIndicator(org.talend.dataquality.indicators.sql.UserDefIndicator) IndicatorSqlSwitch(org.talend.dataquality.indicators.sql.util.IndicatorSqlSwitch) PossiblePhoneCountIndicator(org.talend.dataquality.indicators.PossiblePhoneCountIndicator) WellFormE164PhoneCountIndicator(org.talend.dataquality.indicators.WellFormE164PhoneCountIndicator) ValidPhoneCountIndicator(org.talend.dataquality.indicators.ValidPhoneCountIndicator) DuplicateCountIndicator(org.talend.dataquality.indicators.DuplicateCountIndicator) MaxLengthIndicator(org.talend.dataquality.indicators.MaxLengthIndicator) MaxValueIndicator(org.talend.dataquality.indicators.MaxValueIndicator) MinValueIndicator(org.talend.dataquality.indicators.MinValueIndicator) ValueIndicator(org.talend.dataquality.indicators.ValueIndicator) ValidRegCodeCountIndicator(org.talend.dataquality.indicators.ValidRegCodeCountIndicator) MinLengthIndicator(org.talend.dataquality.indicators.MinLengthIndicator) InvalidRegCodeCountIndicator(org.talend.dataquality.indicators.InvalidRegCodeCountIndicator) JavaUserDefIndicator(org.talend.dataquality.indicators.sql.JavaUserDefIndicator) DefValueCountIndicator(org.talend.dataquality.indicators.DefValueCountIndicator) AverageLengthIndicator(org.talend.dataquality.indicators.AverageLengthIndicator) WellFormNationalPhoneCountIndicator(org.talend.dataquality.indicators.WellFormNationalPhoneCountIndicator)

Example 5 with MinLengthIndicator

use of org.talend.dataquality.indicators.MinLengthIndicator in project tdq-studio-se by Talend.

the class IndicatorEvaluationMain method main.

/**
 * DOC scorreia Comment method "main".
 *
 * @param args
 */
public static void main(String[] args) {
    TypedProperties connectionParams = PropertiesLoader.getProperties(IndicatorEvaluator.class, "db.properties");
    String driverClassName = connectionParams.getProperty("driver");
    String dbUrl = connectionParams.getProperty("url");
    try {
        TimeTracer tt = new TimeTracer("Indicator evaluation", null);
        tt.start();
        // create connection
        Connection connection = ConnectionUtils.createConnection(dbUrl, driverClassName, connectionParams);
        String database = "test";
        String tableName = "my_test";
        // --- columns to analyze
        String[] columnsArray = new String[] { // 0
        "my_int", // 1
        "my_double", // 2
        "my_text", // 4
        "my_date", // 3
        "my_string", // 5
        "my_int_null" };
        List<String> columns = Arrays.asList(columnsArray);
        // store in file
        File file = new File("out/columnTest_0.1.ana");
        EMFUtil util = new EMFUtil();
        Resource resource = util.getResourceSet().createResource(URI.createFileURI(file.getAbsolutePath()));
        rContents = resource.getContents();
        evaluator.setConnection(connection);
        // --- create indicators
        RowCountIndicator rowCountIndicator = IndicatorsFactory.eINSTANCE.createRowCountIndicator();
        NullCountIndicator nullCountIndicator = IndicatorsFactory.eINSTANCE.createNullCountIndicator();
        DistinctCountIndicator distinctCountIndicator = IndicatorsFactory.eINSTANCE.createDistinctCountIndicator();
        DistinctCountIndicator distinctCountIndicator2 = IndicatorsFactory.eINSTANCE.createDistinctCountIndicator();
        UniqueCountIndicator uniqueCountIndicator = IndicatorsFactory.eINSTANCE.createUniqueCountIndicator();
        DuplicateCountIndicator duplicateCountIndicator = IndicatorsFactory.eINSTANCE.createDuplicateCountIndicator();
        BlankCountIndicator blankCountIndicator = IndicatorsFactory.eINSTANCE.createBlankCountIndicator();
        MinLengthIndicator minLengthIndicator = IndicatorsFactory.eINSTANCE.createMinLengthIndicator();
        MaxLengthIndicator maxLengthIndicator = IndicatorsFactory.eINSTANCE.createMaxLengthIndicator();
        AverageLengthIndicator averageLengthIndicator = IndicatorsFactory.eINSTANCE.createAverageLengthIndicator();
        AverageLengthIndicator averageLengthIndicator2 = IndicatorsFactory.eINSTANCE.createAverageLengthIndicator();
        ModeIndicator modeIndicator = IndicatorsFactory.eINSTANCE.createModeIndicator();
        FrequencyIndicator textFrequencyIndicator = IndicatorsFactory.eINSTANCE.createFrequencyIndicator();
        // store in freq indic
        // textFrequencyIndicator.setDistinctCountIndicator(distinctCountIndicator);
        // textFrequencyIndicator.setDistinctCountIndicator(distinctCountIndicator2);
        // textFrequencyIndicator.setUniqueCountIndicator(uniqueCountIndicator);
        // textFrequencyIndicator.setDuplicateCountIndicator(duplicateCountIndicator);
        // textFrequencyIndicator.setModeIndicator(modeIndicator);
        MeanIndicator doubleMeanIndicator = IndicatorsFactory.eINSTANCE.createMeanIndicator();
        MeanIndicator integerMeanIndicator = IndicatorsFactory.eINSTANCE.createMeanIndicator();
        MedianIndicator medianIndicator = IndicatorsFactory.eINSTANCE.createMedianIndicator();
        SumIndicator integerSumIndicator = IndicatorsFactory.eINSTANCE.createSumIndicator();
        addIndicator(columnsArray[0], medianIndicator);
        addIndicator(columnsArray[1], doubleMeanIndicator);
        addIndicator(columnsArray[2], blankCountIndicator);
        addIndicator(columnsArray[5], nullCountIndicator);
        // addIndicator(columnsArray[2], textFrequencyIndicator);
        // addIndicator(columnsArray[2], distinctCountIndicator); // probably not useful?
        // addIndicator(columnsArray[2], uniqueCountIndicator); // probably not useful?
        // addIndicator(columnsArray[2], duplicateCountIndicator); // probably not useful?
        // addIndicator(columnsArray[2], modeIndicator); // probably not useful?
        addIndicator(columnsArray[3], rowCountIndicator);
        addIndicator(columnsArray[5], integerSumIndicator);
        addIndicator(columnsArray[5], integerMeanIndicator);
        addIndicator(columnsArray[2], averageLengthIndicator);
        addIndicator(columnsArray[3], averageLengthIndicator2);
        addIndicator(columnsArray[3], minLengthIndicator);
        addIndicator(columnsArray[3], maxLengthIndicator);
        // build query on columns
        // TODO scorreia add filter somewhere here...
        String selectCols = sqlSelectColumns(database, tableName, columns);
        // --- create a description of the column set
        QueryExpression queryExpression = DatatypesFactory.eINSTANCE.createQueryExpression();
        queryExpression.setBody(selectCols);
        // TODO scorreia externalize this as a constant
        queryExpression.setLanguage("SQL");
        tt.start("compute");
        evaluator.setFetchSize(10000);
        evaluator.evaluateIndicators(selectCols, true);
        tt.end("compute");
        // Print indicators the median
        System.out.println("Median=" + medianIndicator.getMedian());
        System.out.println("# Unique values= " + textFrequencyIndicator.getUniqueValueCount());
        System.out.println("# Distinct values= " + textFrequencyIndicator.getDistinctValueCount());
        for (String col : columns) {
            printIndicators(evaluator.getIndicators(col));
        }
        tt.start("save");
        util.save();
        tt.end("saved in " + file.getAbsolutePath());
        tt.end();
        CwmResource cwmR = (CwmResource) resource;
        String id = cwmR.getID(medianIndicator);
        System.out.println("ecore util.getId= " + EcoreUtil.getID(medianIndicator));
        System.out.println("uuId= " + id);
    // test reload this file
    // LoadSerialData.main(args);
    } catch (SQLException e) {
        // TODO Auto-generated catch block
        log.error(e, e);
    } catch (InstantiationException e) {
        // TODO Auto-generated catch block
        log.error(e, e);
    } catch (IllegalAccessException e) {
        // TODO Auto-generated catch block
        log.error(e, e);
    } catch (ClassNotFoundException e) {
        // TODO Auto-generated catch block
        log.error(e, e);
    }
}
Also used : SumIndicator(org.talend.dataquality.indicators.SumIndicator) SQLException(java.sql.SQLException) BlankCountIndicator(org.talend.dataquality.indicators.BlankCountIndicator) MedianIndicator(org.talend.dataquality.indicators.MedianIndicator) UniqueCountIndicator(org.talend.dataquality.indicators.UniqueCountIndicator) ModeIndicator(org.talend.dataquality.indicators.ModeIndicator) NullCountIndicator(org.talend.dataquality.indicators.NullCountIndicator) FrequencyIndicator(org.talend.dataquality.indicators.FrequencyIndicator) DistinctCountIndicator(org.talend.dataquality.indicators.DistinctCountIndicator) RowCountIndicator(org.talend.dataquality.indicators.RowCountIndicator) QueryExpression(orgomg.cwm.foundation.datatypes.QueryExpression) MeanIndicator(org.talend.dataquality.indicators.MeanIndicator) CwmResource(org.talend.model.emf.CwmResource) DuplicateCountIndicator(org.talend.dataquality.indicators.DuplicateCountIndicator) MaxLengthIndicator(org.talend.dataquality.indicators.MaxLengthIndicator) Connection(java.sql.Connection) CwmResource(org.talend.model.emf.CwmResource) Resource(org.eclipse.emf.ecore.resource.Resource) MinLengthIndicator(org.talend.dataquality.indicators.MinLengthIndicator) TypedProperties(org.talend.utils.properties.TypedProperties) TimeTracer(org.talend.utils.time.TimeTracer) AverageLengthIndicator(org.talend.dataquality.indicators.AverageLengthIndicator) EMFUtil(org.talend.commons.emf.EMFUtil) File(java.io.File)

Aggregations

MinLengthIndicator (org.talend.dataquality.indicators.MinLengthIndicator)5 MaxLengthIndicator (org.talend.dataquality.indicators.MaxLengthIndicator)4 AverageLengthIndicator (org.talend.dataquality.indicators.AverageLengthIndicator)3 BlankCountIndicator (org.talend.dataquality.indicators.BlankCountIndicator)3 DistinctCountIndicator (org.talend.dataquality.indicators.DistinctCountIndicator)3 DuplicateCountIndicator (org.talend.dataquality.indicators.DuplicateCountIndicator)3 MeanIndicator (org.talend.dataquality.indicators.MeanIndicator)3 MedianIndicator (org.talend.dataquality.indicators.MedianIndicator)3 ModeIndicator (org.talend.dataquality.indicators.ModeIndicator)3 NullCountIndicator (org.talend.dataquality.indicators.NullCountIndicator)3 RowCountIndicator (org.talend.dataquality.indicators.RowCountIndicator)3 DefValueCountIndicator (org.talend.dataquality.indicators.DefValueCountIndicator)2 FrequencyIndicator (org.talend.dataquality.indicators.FrequencyIndicator)2 InvalidRegCodeCountIndicator (org.talend.dataquality.indicators.InvalidRegCodeCountIndicator)2 MaxValueIndicator (org.talend.dataquality.indicators.MaxValueIndicator)2 MinValueIndicator (org.talend.dataquality.indicators.MinValueIndicator)2 PossiblePhoneCountIndicator (org.talend.dataquality.indicators.PossiblePhoneCountIndicator)2 ValidPhoneCountIndicator (org.talend.dataquality.indicators.ValidPhoneCountIndicator)2 ValidRegCodeCountIndicator (org.talend.dataquality.indicators.ValidRegCodeCountIndicator)2 WellFormE164PhoneCountIndicator (org.talend.dataquality.indicators.WellFormE164PhoneCountIndicator)2