Search in sources :

Example 1 with FrequencyIndicator

use of org.talend.dataquality.indicators.FrequencyIndicator in project tdq-studio-se by Talend.

the class ModelElementIndicatorImpl method createPlainIndicatorUnit.

/**
 * Create a new IndicatorUnit according to indicatorEnum and indicator, if the parameter indicator is null, will
 * create a new indicator .
 *
 * @param indicatorEnum
 * @param indicator
 * @return
 */
private IndicatorUnit createPlainIndicatorUnit(IndicatorEnum indicatorEnum, Indicator indicator) {
    Indicator tempIndicator = indicator;
    if (tempIndicator == null) {
        EFactoryImpl factory = (EFactoryImpl) indicatorEnum.getIndicatorType().getEPackage().getEFactoryInstance();
        tempIndicator = (Indicator) factory.create(indicatorEnum.getIndicatorType());
        // MOD scorreia 2008-09-18: bug 5131 fixed: set indicator's definition when the indicator is created.
        if (!DefinitionHandler.getInstance().setDefaultIndicatorDefinition(tempIndicator)) {
            // $NON-NLS-1$
            log.error("Could not set the definition of the given indicator :" + tempIndicator.getName());
        }
        // for 4225, the frequency indicator need be initialized
        int sqlType = getJavaType();
        if (tempIndicator instanceof FrequencyIndicator && Java2SqlType.isDateInSQL(sqlType)) {
            IndicatorParameters parameters = tempIndicator.getParameters();
            if (parameters == null) {
                parameters = IndicatorsFactory.eINSTANCE.createIndicatorParameters();
                tempIndicator.setParameters(parameters);
            }
            DateParameters dateParameters = parameters.getDateParameters();
            // "Year Frequency Table" when applying on "Time" type
            if (dateParameters == null && !(indicatorEnum == IndicatorEnum.PatternFreqIndicatorEnum || indicatorEnum == IndicatorEnum.PatternLowFreqIndicatorEnum || indicatorEnum == IndicatorEnum.FrequencyIndicatorEnum || indicatorEnum == IndicatorEnum.LowFrequencyIndicatorEnum)) {
                dateParameters = IndicatorsFactory.eINSTANCE.createDateParameters();
                parameters.setDateParameters(dateParameters);
            }
        // TDQ-5357~
        // MOD scorreia 2008-06-19 default is already set in the model
        // dateParameters.setDateAggregationType(DateGrain.YEAR);
        }
    }
    IndicatorUnit indicatorUnit = new ColumnIndicatorUnit(indicatorEnum, tempIndicator, this);
    this.plainIndicatorUnitMap.put(indicatorEnum, indicatorUnit);
    return indicatorUnit;
}
Also used : EFactoryImpl(org.eclipse.emf.ecore.impl.EFactoryImpl) IndicatorParameters(org.talend.dataquality.indicators.IndicatorParameters) IndicatorUnit(org.talend.dataprofiler.core.ui.editor.preview.IndicatorUnit) ColumnIndicatorUnit(org.talend.dataprofiler.core.ui.editor.preview.ColumnIndicatorUnit) DateParameters(org.talend.dataquality.indicators.DateParameters) ColumnIndicatorUnit(org.talend.dataprofiler.core.ui.editor.preview.ColumnIndicatorUnit) UniqueCountIndicator(org.talend.dataquality.indicators.UniqueCountIndicator) AvgLengthWithBlankIndicator(org.talend.dataquality.indicators.AvgLengthWithBlankIndicator) FrequencyIndicator(org.talend.dataquality.indicators.FrequencyIndicator) MaxValueIndicator(org.talend.dataquality.indicators.MaxValueIndicator) MaxLengthWithBlankNullIndicator(org.talend.dataquality.indicators.MaxLengthWithBlankNullIndicator) ValidRegCodeCountIndicator(org.talend.dataquality.indicators.ValidRegCodeCountIndicator) PhoneNumbStatisticsIndicator(org.talend.dataquality.indicators.PhoneNumbStatisticsIndicator) AvgLengthWithBlankNullIndicator(org.talend.dataquality.indicators.AvgLengthWithBlankNullIndicator) MinLengthIndicator(org.talend.dataquality.indicators.MinLengthIndicator) MinValueIndicator(org.talend.dataquality.indicators.MinValueIndicator) LowerQuartileIndicator(org.talend.dataquality.indicators.LowerQuartileIndicator) AverageLengthIndicator(org.talend.dataquality.indicators.AverageLengthIndicator) MaxLengthWithBlankIndicator(org.talend.dataquality.indicators.MaxLengthWithBlankIndicator) MeanIndicator(org.talend.dataquality.indicators.MeanIndicator) DefValueCountIndicator(org.talend.dataquality.indicators.DefValueCountIndicator) WellFormE164PhoneCountIndicator(org.talend.dataquality.indicators.WellFormE164PhoneCountIndicator) RangeIndicator(org.talend.dataquality.indicators.RangeIndicator) AvgLengthWithNullIndicator(org.talend.dataquality.indicators.AvgLengthWithNullIndicator) MaxLengthWithNullIndicator(org.talend.dataquality.indicators.MaxLengthWithNullIndicator) ModelElementIndicator(org.talend.dataprofiler.core.model.ModelElementIndicator) InvalidRegCodeCountIndicator(org.talend.dataquality.indicators.InvalidRegCodeCountIndicator) UpperQuartileIndicator(org.talend.dataquality.indicators.UpperQuartileIndicator) ValidPhoneCountIndicator(org.talend.dataquality.indicators.ValidPhoneCountIndicator) RowCountIndicator(org.talend.dataquality.indicators.RowCountIndicator) PossiblePhoneCountIndicator(org.talend.dataquality.indicators.PossiblePhoneCountIndicator) MaxLengthIndicator(org.talend.dataquality.indicators.MaxLengthIndicator) CountsIndicator(org.talend.dataquality.indicators.CountsIndicator) MinLengthWithBlankNullIndicator(org.talend.dataquality.indicators.MinLengthWithBlankNullIndicator) NullCountIndicator(org.talend.dataquality.indicators.NullCountIndicator) MedianIndicator(org.talend.dataquality.indicators.MedianIndicator) Indicator(org.talend.dataquality.indicators.Indicator) BlankCountIndicator(org.talend.dataquality.indicators.BlankCountIndicator) TextIndicator(org.talend.dataquality.indicators.TextIndicator) DistinctCountIndicator(org.talend.dataquality.indicators.DistinctCountIndicator) DuplicateCountIndicator(org.talend.dataquality.indicators.DuplicateCountIndicator) IQRIndicator(org.talend.dataquality.indicators.IQRIndicator) MinLengthWithBlankIndicator(org.talend.dataquality.indicators.MinLengthWithBlankIndicator) MinLengthWithNullIndicator(org.talend.dataquality.indicators.MinLengthWithNullIndicator) BoxIndicator(org.talend.dataquality.indicators.BoxIndicator) FrequencyIndicator(org.talend.dataquality.indicators.FrequencyIndicator)

Example 2 with FrequencyIndicator

use of org.talend.dataquality.indicators.FrequencyIndicator in project tdq-studio-se by Talend.

the class LoadSerialDataMain method main.

/**
 * DOC scorreia Comment method "main".
 *
 * @param args
 */
public static void main(String[] args) {
    EMFUtil util = new EMFUtil();
    File file = new File("out/columnTest_0.1.ana");
    System.out.println("Loading file " + file.getAbsolutePath());
    ResourceSet rs = util.getResourceSet();
    Resource r = rs.getResource(URI.createFileURI(file.getAbsolutePath()), true);
    EList<EObject> contents = r.getContents();
    if (contents.isEmpty()) {
        System.err.println("No content in " + r);
    }
    System.out.println("Nb elements in contents " + contents.size());
    IndicatorsSwitch<FrequencyIndicator> mySwitch = new IndicatorsSwitch<FrequencyIndicator>() {

        @Override
        public FrequencyIndicator caseFrequencyIndicator(FrequencyIndicator object) {
            return object;
        }
    };
    for (EObject object : contents) {
        FrequencyIndicator freqI = mySwitch.doSwitch(object);
        if (freqI != null) {
            Long uniqueValueCount = freqI.getUniqueValueCount();
            System.out.println("nb unique values = " + uniqueValueCount);
            EList<Object> uniqueValues = freqI.getUniqueValues();
            for (Object data : uniqueValues) {
                System.out.println("unique value= " + data + " " + freqI.getCount(data));
            }
        }
    }
}
Also used : EMFUtil(org.talend.commons.emf.EMFUtil) EObject(org.eclipse.emf.ecore.EObject) Resource(org.eclipse.emf.ecore.resource.Resource) IndicatorsSwitch(org.talend.dataquality.indicators.util.IndicatorsSwitch) EObject(org.eclipse.emf.ecore.EObject) ResourceSet(org.eclipse.emf.ecore.resource.ResourceSet) File(java.io.File) FrequencyIndicator(org.talend.dataquality.indicators.FrequencyIndicator)

Example 3 with FrequencyIndicator

use of org.talend.dataquality.indicators.FrequencyIndicator in project tdq-studio-se by Talend.

the class AbstractColumnDropTree method hasIndicatorParameters.

/**
 * DOC msjian Comment method "hasIndicatorParameters".
 *
 * @param indicatorUnit
 * @return
 */
private boolean hasIndicatorParameters(IndicatorUnit indicatorUnit) {
    IndicatorParameters parameters = indicatorUnit.getIndicator().getParameters();
    if (parameters == null) {
        return false;
    }
    if (hideParameters(indicatorUnit)) {
        return false;
    }
    if (indicatorUnit.getIndicator() instanceof FrequencyIndicator) {
        // set on tree
        if (parameters.getBins() == null) {
            return false;
        }
        // ~
        return true;
    }
    TextParameters tParameter = parameters.getTextParameter();
    if (tParameter != null && !hideTextParameters(indicatorUnit)) {
        return true;
    }
    DateParameters dParameters = parameters.getDateParameters();
    if (dParameters != null) {
        return true;
    }
    Domain indicatorValidDomain = parameters.getIndicatorValidDomain();
    if (indicatorValidDomain != null) {
        if (indicatorValidDomain.getRanges() != null && !indicatorValidDomain.getRanges().isEmpty()) {
            return true;
        }
    }
    Domain bins = parameters.getBins();
    if (bins != null) {
        return true;
    }
    return false;
}
Also used : TextParameters(org.talend.dataquality.indicators.TextParameters) IndicatorParameters(org.talend.dataquality.indicators.IndicatorParameters) DateParameters(org.talend.dataquality.indicators.DateParameters) Domain(org.talend.dataquality.domain.Domain) FrequencyIndicator(org.talend.dataquality.indicators.FrequencyIndicator)

Example 4 with FrequencyIndicator

use of org.talend.dataquality.indicators.FrequencyIndicator in project tdq-studio-se by Talend.

the class IndicatorEvaluator method initDataSet.

@SuppressWarnings("unchecked")
protected List<Object[]> initDataSet(Indicator indicator, EMap<Indicator, AnalyzedDataSet> indicToRowMap, Object object) {
    AnalyzedDataSet analyzedDataSet = indicToRowMap.get(indicator);
    List<Object[]> valueObjectList = null;
    if (analyzedDataSet == null) {
        analyzedDataSet = AnalysisFactory.eINSTANCE.createAnalyzedDataSet();
        indicToRowMap.put(indicator, analyzedDataSet);
        analyzedDataSet.setDataCount(analysis.getParameters().getMaxNumberRows());
        analyzedDataSet.setRecordSize(0);
    }
    if (indicator instanceof FrequencyIndicator || indicator instanceof MinLengthIndicator || indicator instanceof MaxLengthIndicator) {
        Map<Object, List<Object[]>> valueObjectListMap = analyzedDataSet.getFrequencyData();
        if (valueObjectListMap == null) {
            valueObjectListMap = new HashMap<Object, List<Object[]>>();
            analyzedDataSet.setFrequencyData(valueObjectListMap);
        }
        String key = null;
        if (object == null) {
            key = SpecialValueDisplay.NULL_FIELD;
        } else if (indicator instanceof MinLengthIndicator || indicator instanceof MaxLengthIndicator) {
            key = String.valueOf(object.toString().length());
        } else if (object.equals(PluginConstant.EMPTY_STRING)) {
            key = SpecialValueDisplay.EMPTY_FIELD;
        } else if (indicator instanceof PatternLowFreqIndicator) {
            key = ((PatternLowFreqIndicator) indicator).convertCharacters(object.toString());
        } else if (indicator instanceof PatternFreqIndicator) {
            key = ((PatternFreqIndicator) indicator).convertCharacters(object.toString());
        } else if (indicator instanceof FormatFreqPieIndicator) {
            // MOD qiongli 2011-8-26,feature TDQ-3253.
            key = ((FormatFreqPieIndicator) indicator).getCurrentKey();
        } else {
            key = object.toString();
        }
        valueObjectList = valueObjectListMap.get(key);
        if (valueObjectList == null) {
            valueObjectList = new ArrayList<Object[]>();
            valueObjectListMap.put(key, valueObjectList);
        }
    } else if (indicator.isInValidRow() || indicator.isValidRow()) {
        List<Object> patternData = analyzedDataSet.getPatternData();
        if (patternData == null) {
            patternData = new ArrayList<Object>();
            // mapping with AnalyzedDataSetImpl.VALID_VALUE
            patternData.add(new ArrayList<Object[]>());
            // mapping with AnalyzedDataSetImpl.INVALID_VALUE
            patternData.add(new ArrayList<Object[]>());
            analyzedDataSet.setPatternData(patternData);
        }
        Object listObject = indicator.isInValidRow() ? patternData.get(AnalyzedDataSetImpl.INVALID_VALUE) : patternData.get(AnalyzedDataSetImpl.VALID_VALUE);
        if (listObject instanceof ArrayList<?>) {
            valueObjectList = (ArrayList<Object[]>) listObject;
        }
    } else {
        valueObjectList = analyzedDataSet.getData();
        if (valueObjectList == null) {
            valueObjectList = new ArrayList<Object[]>();
            analyzedDataSet.setData(valueObjectList);
        }
    }
    return valueObjectList;
}
Also used : MaxLengthIndicator(org.talend.dataquality.indicators.MaxLengthIndicator) AnalyzedDataSet(org.talend.dataquality.analysis.AnalyzedDataSet) FormatFreqPieIndicator(org.talend.dataquality.indicators.FormatFreqPieIndicator) ArrayList(java.util.ArrayList) MinLengthIndicator(org.talend.dataquality.indicators.MinLengthIndicator) PatternLowFreqIndicator(org.talend.dataquality.indicators.PatternLowFreqIndicator) FrequencyIndicator(org.talend.dataquality.indicators.FrequencyIndicator) ArrayList(java.util.ArrayList) List(java.util.List) PatternFreqIndicator(org.talend.dataquality.indicators.PatternFreqIndicator)

Example 5 with FrequencyIndicator

use of org.talend.dataquality.indicators.FrequencyIndicator in project tdq-studio-se by Talend.

the class IndicatorCommonUtil method handleFrequency.

/**
 * DOC xqliu Comment method "handleFrequency".
 *
 * @param indicator
 * @return
 */
private static Object handleFrequency(Indicator indicator) {
    FrequencyExt[] frequencyExt = null;
    if (UDIHelper.isUDI(indicator)) {
        UserDefIndicator udi = (UserDefIndicator) indicator;
        Set<Object> valueSet = udi.getDistinctValues();
        if (valueSet == null) {
            return null;
        }
        frequencyExt = new FrequencyExt[valueSet.size()];
        int i = 0;
        for (Object o : valueSet) {
            frequencyExt[i] = new FrequencyExt();
            frequencyExt[i].setKey(o);
            frequencyExt[i].setValue(udi.getCount(o));
            frequencyExt[i].setFrequency(udi.getFrequency(o));
            i++;
        }
    } else if (IndicatorEnum.DatePatternFreqIndicatorEnum.getIndicatorType().isInstance(indicator)) {
        DatePatternFreqIndicator datePatternFrequency = (DatePatternFreqIndicator) indicator;
        Map<String, Long> results = datePatternFrequency.getResult();
        frequencyExt = new FrequencyExt[results.size()];
        int i = 0;
        for (String key : results.keySet()) {
            Long value = results.get(key);
            Double frequency = datePatternFrequency.getFrequency(key);
            frequencyExt[i] = new FrequencyExt();
            frequencyExt[i].setKey(key);
            frequencyExt[i].setValue(value);
            frequencyExt[i].setFrequency(frequency);
            i++;
        }
    } else {
        FrequencyIndicator frequency = (FrequencyIndicator) indicator;
        Set<Object> valueSet = frequency.getDistinctValues();
        if (valueSet == null) {
            return null;
        }
        frequencyExt = new FrequencyExt[valueSet.size()];
        int i = 0;
        for (Object o : valueSet) {
            frequencyExt[i] = new FrequencyExt();
            frequencyExt[i].setKey(o);
            if (IndicatorsPackage.eINSTANCE.getSoundexFreqIndicator().equals(frequency.eClass()) || IndicatorsPackage.eINSTANCE.getSoundexLowFreqIndicator().equals(frequency.eClass())) {
                // MOD scorreia 2009-03-23 display distinct count when working with Soundex
                frequencyExt[i].setValue(((SoundexFreqIndicator) frequency).getDistinctCount(o));
            } else {
                frequencyExt[i].setValue(frequency.getCount(o));
            }
            frequencyExt[i].setFrequency(frequency.getFrequency(o));
            i++;
        }
    }
    return frequencyExt;
}
Also used : Set(java.util.Set) SoundexFreqIndicator(org.talend.dataquality.indicators.SoundexFreqIndicator) FrequencyIndicator(org.talend.dataquality.indicators.FrequencyIndicator) DatePatternFreqIndicator(org.talend.dataquality.indicators.DatePatternFreqIndicator) FrequencyExt(org.talend.dq.indicators.ext.FrequencyExt) UserDefIndicator(org.talend.dataquality.indicators.sql.UserDefIndicator) Map(java.util.Map) HashMap(java.util.HashMap)

Aggregations

FrequencyIndicator (org.talend.dataquality.indicators.FrequencyIndicator)9 DateParameters (org.talend.dataquality.indicators.DateParameters)3 IndicatorParameters (org.talend.dataquality.indicators.IndicatorParameters)3 MaxLengthIndicator (org.talend.dataquality.indicators.MaxLengthIndicator)3 MinLengthIndicator (org.talend.dataquality.indicators.MinLengthIndicator)3 File (java.io.File)2 Resource (org.eclipse.emf.ecore.resource.Resource)2 EMFUtil (org.talend.commons.emf.EMFUtil)2 ModelElementIndicator (org.talend.dataprofiler.core.model.ModelElementIndicator)2 Domain (org.talend.dataquality.domain.Domain)2 AverageLengthIndicator (org.talend.dataquality.indicators.AverageLengthIndicator)2 BlankCountIndicator (org.talend.dataquality.indicators.BlankCountIndicator)2 DatePatternFreqIndicator (org.talend.dataquality.indicators.DatePatternFreqIndicator)2 DistinctCountIndicator (org.talend.dataquality.indicators.DistinctCountIndicator)2 DuplicateCountIndicator (org.talend.dataquality.indicators.DuplicateCountIndicator)2 Indicator (org.talend.dataquality.indicators.Indicator)2 MeanIndicator (org.talend.dataquality.indicators.MeanIndicator)2 MedianIndicator (org.talend.dataquality.indicators.MedianIndicator)2 SoundexFreqIndicator (org.talend.dataquality.indicators.SoundexFreqIndicator)2 Connection (java.sql.Connection)1