Search in sources :

Example 1 with NumberHistogram

use of org.talend.dataprep.api.dataset.statistics.number.NumberHistogram in project data-prep by Talend.

the class StatisticsAdapter method injectNumberHistogram.

private void injectNumberHistogram(final ColumnMetadata column, final Analyzers.Result result) {
    if (NUMERIC.isAssignableFrom(column.getType()) && result.exist(StreamNumberHistogramStatistics.class)) {
        final Statistics statistics = column.getStatistics();
        final Map<org.talend.dataquality.statistics.numeric.histogram.Range, Long> histogramStatistics = result.get(StreamNumberHistogramStatistics.class).getHistogram();
        final NumberFormat format = DecimalFormat.getInstance(Locale.US);
        // Set histogram ranges
        final Histogram histogram = new NumberHistogram();
        histogramStatistics.forEach((rangeValues, occurrence) -> {
            final HistogramRange range = new HistogramRange();
            try {
                range.getRange().setMax(new Double(format.format(rangeValues.getUpper())));
                range.getRange().setMin(new Double(format.format(rangeValues.getLower())));
            } catch (NumberFormatException e) {
                // Fallback to non formatted numbers (unable to parse numbers).
                range.getRange().setMax(rangeValues.getUpper());
                range.getRange().setMin(rangeValues.getLower());
            }
            range.setOccurrences(occurrence);
            histogram.getItems().add(range);
        });
        statistics.setHistogram(histogram);
    }
}
Also used : DateHistogram(org.talend.dataprep.api.dataset.statistics.date.DateHistogram) NumberHistogram(org.talend.dataprep.api.dataset.statistics.number.NumberHistogram) NumberHistogram(org.talend.dataprep.api.dataset.statistics.number.NumberHistogram) CardinalityStatistics(org.talend.dataquality.statistics.cardinality.CardinalityStatistics) DataTypeFrequencyStatistics(org.talend.dataquality.statistics.frequency.DataTypeFrequencyStatistics) StreamNumberHistogramStatistics(org.talend.dataprep.api.dataset.statistics.number.StreamNumberHistogramStatistics) ValueQualityStatistics(org.talend.dataquality.common.inference.ValueQualityStatistics) SummaryStatistics(org.talend.dataquality.statistics.numeric.summary.SummaryStatistics) StreamDateHistogramStatistics(org.talend.dataprep.api.dataset.statistics.date.StreamDateHistogramStatistics) TextLengthStatistics(org.talend.dataquality.statistics.text.TextLengthStatistics) PatternFrequencyStatistics(org.talend.dataquality.statistics.frequency.pattern.PatternFrequencyStatistics) QuantileStatistics(org.talend.dataquality.statistics.numeric.quantile.QuantileStatistics) StreamNumberHistogramStatistics(org.talend.dataprep.api.dataset.statistics.number.StreamNumberHistogramStatistics) NumberFormat(java.text.NumberFormat)

Example 2 with NumberHistogram

use of org.talend.dataprep.api.dataset.statistics.number.NumberHistogram in project data-prep by Talend.

the class HistogramTest method test_type.

@Test
public void test_type() throws IOException {
    String json = "{ \"_class\" : \"org.talend.dataprep.api.dataset.statistics.number.NumberHistogram\" , \"items\" : [ { \"occurrences\" : 16 , \"range\" : { \"min\" : 1.0 , \"max\" : 17.0}} , { \"occurrences\" : 16 , \"range\" : { \"min\" : 17.0 , \"max\" : 33.0}} , { \"occurrences\" : 16 , \"range\" : { \"min\" : 33.0 , \"max\" : 49.0}} , { \"occurrences\" : 16 , \"range\" : { \"min\" : 49.0 , \"max\" : 65.0}} , { \"occurrences\" : 16 , \"range\" : { \"min\" : 65.0 , \"max\" : 81.0}} , { \"occurrences\" : 16 , \"range\" : { \"min\" : 81.0 , \"max\" : 97.0}} , { \"occurrences\" : 16 , \"range\" : { \"min\" : 97.0 , \"max\" : 113.0}} , { \"occurrences\" : 16 , \"range\" : { \"min\" : 113.0 , \"max\" : 129.0}} , { \"occurrences\" : 16 , \"range\" : { \"min\" : 129.0 , \"max\" : 145.0}} , { \"occurrences\" : 16 , \"range\" : { \"min\" : 145.0 , \"max\" : 161.0}} , { \"occurrences\" : 16 , \"range\" : { \"min\" : 161.0 , \"max\" : 177.0}} , { \"occurrences\" : 16 , \"range\" : { \"min\" : 177.0 , \"max\" : 193.0}} , { \"occurrences\" : 16 , \"range\" : { \"min\" : 193.0 , \"max\" : 209.0}} , { \"occurrences\" : 16 , \"range\" : { \"min\" : 209.0 , \"max\" : 225.0}} , { \"occurrences\" : 16 , \"range\" : { \"min\" : 225.0 , \"max\" : 241.0}} , { \"occurrences\" : 16 , \"range\" : { \"min\" : 241.0 , \"max\" : 257.0}} , { \"occurrences\" : 16 , \"range\" : { \"min\" : 257.0 , \"max\" : 273.0}} , { \"occurrences\" : 16 , \"range\" : { \"min\" : 273.0 , \"max\" : 289.0}} , { \"occurrences\" : 16 , \"range\" : { \"min\" : 289.0 , \"max\" : 305.0}} , { \"occurrences\" : 16 , \"range\" : { \"min\" : 305.0 , \"max\" : 321.0}} , { \"occurrences\" : 16 , \"range\" : { \"min\" : 321.0 , \"max\" : 337.0}} , { \"occurrences\" : 16 , \"range\" : { \"min\" : 337.0 , \"max\" : 353.0}} , { \"occurrences\" : 16 , \"range\" : { \"min\" : 353.0 , \"max\" : 369.0}} , { \"occurrences\" : 16 , \"range\" : { \"min\" : 369.0 , \"max\" : 385.0}} , { \"occurrences\" : 16 , \"range\" : { \"min\" : 385.0 , \"max\" : 401.0}} , { \"occurrences\" : 16 , \"range\" : { \"min\" : 401.0 , \"max\" : 417.0}} , { \"occurrences\" : 16 , \"range\" : { \"min\" : 417.0 , \"max\" : 433.0}} , { \"occurrences\" : 16 , \"range\" : { \"min\" : 433.0 , \"max\" : 449.0}} , { \"occurrences\" : 16 , \"range\" : { \"min\" : 449.0 , \"max\" : 465.0}} , { \"occurrences\" : 16 , \"range\" : { \"min\" : 465.0 , \"max\" : 481.0}} , { \"occurrences\" : 16 , \"range\" : { \"min\" : 481.0 , \"max\" : 497.0}} , { \"occurrences\" : 4 , \"range\" : { \"min\" : 497.0 , \"max\" : 513.0}}]}";
    json = json.replace("_class", "type").replace("org.talend.dataprep.api.dataset.statistics.number.NumberHistogram", "number").replace("org.talend.dataprep.api.dataset.statistics.number.DateHistogram", "date");
    ObjectMapper mapper = new ObjectMapper();
    NumberHistogram histogram = (NumberHistogram) mapper.readValue(json, Histogram.class);
    assertEquals(histogram.getItems().size(), 32);
}
Also used : NumberHistogram(org.talend.dataprep.api.dataset.statistics.number.NumberHistogram) NumberHistogram(org.talend.dataprep.api.dataset.statistics.number.NumberHistogram) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) Test(org.junit.Test)

Aggregations

NumberHistogram (org.talend.dataprep.api.dataset.statistics.number.NumberHistogram)2 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)1 NumberFormat (java.text.NumberFormat)1 Test (org.junit.Test)1 DateHistogram (org.talend.dataprep.api.dataset.statistics.date.DateHistogram)1 StreamDateHistogramStatistics (org.talend.dataprep.api.dataset.statistics.date.StreamDateHistogramStatistics)1 StreamNumberHistogramStatistics (org.talend.dataprep.api.dataset.statistics.number.StreamNumberHistogramStatistics)1 ValueQualityStatistics (org.talend.dataquality.common.inference.ValueQualityStatistics)1 CardinalityStatistics (org.talend.dataquality.statistics.cardinality.CardinalityStatistics)1 DataTypeFrequencyStatistics (org.talend.dataquality.statistics.frequency.DataTypeFrequencyStatistics)1 PatternFrequencyStatistics (org.talend.dataquality.statistics.frequency.pattern.PatternFrequencyStatistics)1 QuantileStatistics (org.talend.dataquality.statistics.numeric.quantile.QuantileStatistics)1 SummaryStatistics (org.talend.dataquality.statistics.numeric.summary.SummaryStatistics)1 TextLengthStatistics (org.talend.dataquality.statistics.text.TextLengthStatistics)1