Search in sources :

Example 11 with Statistics

use of org.talend.dataprep.api.dataset.statistics.Statistics in project data-prep by Talend.

the class CopyColumnTest method should_copy_semantic.

@Test
public void should_copy_semantic() throws Exception {
    List<ColumnMetadata> input = new ArrayList<>();
    final ColumnMetadata original = createMetadata("0001", "column");
    original.setStatistics(new Statistics());
    SemanticDomain semanticDomain = new SemanticDomain("mountain_goat", "Mountain goat pale pale", 1);
    original.setDomain("beer");
    original.setDomainFrequency(1);
    original.setDomainLabel("the best beer");
    original.setSemanticDomains(Collections.singletonList(semanticDomain));
    input.add(original);
    RowMetadata rowMetadata = new RowMetadata(input);
    assertThat(rowMetadata.getColumns()).isNotNull().isNotEmpty().hasSize(1);
    final DataSetRow row = new DataSetRow(rowMetadata);
    ActionTestWorkbench.test(row, actionRegistry, factory.create(action, parameters));
    List<ColumnMetadata> actual = row.getRowMetadata().getColumns();
    assertThat(actual).isNotNull().isNotEmpty().hasSize(2);
    assertEquals(actual.get(1).getStatistics(), original.getStatistics());
    // 
    assertThat(actual.get(1)).isEqualToComparingOnlyGivenFields(original, "domain", "domainLabel", "domainFrequency");
    // 
    assertThat(actual.get(1).getSemanticDomains()).isNotNull().isNotEmpty().contains(semanticDomain);
}
Also used : ColumnMetadata(org.talend.dataprep.api.dataset.ColumnMetadata) SemanticDomain(org.talend.dataprep.api.dataset.statistics.SemanticDomain) RowMetadata(org.talend.dataprep.api.dataset.RowMetadata) Statistics(org.talend.dataprep.api.dataset.statistics.Statistics) DataSetRow(org.talend.dataprep.api.dataset.row.DataSetRow) Test(org.junit.Test) AbstractMetadataBaseTest(org.talend.dataprep.transformation.actions.AbstractMetadataBaseTest)

Example 12 with Statistics

use of org.talend.dataprep.api.dataset.statistics.Statistics in project data-prep by Talend.

the class ActionMetadataTestUtils method setStatistics.

/**
 * Set the statistics to the given column on the given row.
 *
 * @param row the row to update.
 * @param columnId the column id.
 * @param statisticsContent the statistics in json as expected from the DQ library.
 * @throws IOException you never know :)
 */
public static void setStatistics(DataSetRow row, String columnId, InputStream statisticsContent) throws IOException {
    final ObjectMapper mapper = new ObjectMapper();
    final Statistics statistics = mapper.readValue(statisticsContent, Statistics.class);
    row.getRowMetadata().getById(columnId).setStatistics(statistics);
}
Also used : Statistics(org.talend.dataprep.api.dataset.statistics.Statistics) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper)

Example 13 with Statistics

use of org.talend.dataprep.api.dataset.statistics.Statistics in project data-prep by Talend.

the class AbstractMetadataBaseTest method createMetadata.

protected ColumnMetadata createMetadata(String id, String name, Type type, String statisticsFileName) throws IOException {
    ColumnMetadata column = createMetadata(id, name, type);
    ObjectMapper mapper = new ObjectMapper();
    final Statistics statistics = mapper.reader(Statistics.class).readValue(getClass().getResourceAsStream("/org/talend/dataprep/transformation/actions/date/" + statisticsFileName));
    column.setStatistics(statistics);
    return column;
}
Also used : ColumnMetadata(org.talend.dataprep.api.dataset.ColumnMetadata) Statistics(org.talend.dataprep.api.dataset.statistics.Statistics) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper)

Example 14 with Statistics

use of org.talend.dataprep.api.dataset.statistics.Statistics in project data-prep by Talend.

the class ChangeDatePattern method compile.

@Override
public void compile(ActionContext actionContext) {
    super.compile(actionContext);
    boolean doesCreateNewColumn = ActionsUtils.doesCreateNewColumn(actionContext.getParameters(), CREATE_NEW_COLUMN_DEFAULT);
    if (doesCreateNewColumn) {
        ActionsUtils.createNewColumn(actionContext, singletonList(ActionsUtils.additionalColumn().withName(actionContext.getColumnName() + NEW_COLUMN_SUFFIX).withCopyMetadataFromId(actionContext.getColumnId())));
    }
    if (actionContext.getActionStatus() == OK) {
        compileDatePattern(actionContext);
        if (actionContext.getActionStatus() == OK) {
            // register the new pattern in column's stats as the most used pattern,
            // to be able to process date action more efficiently later
            final DatePattern newPattern = actionContext.get(COMPILED_DATE_PATTERN);
            final RowMetadata rowMetadata = actionContext.getRowMetadata();
            // target column
            String targetId = ActionsUtils.getTargetColumnId(actionContext);
            final ColumnMetadata targetColumn = rowMetadata.getById(targetId);
            // origin column
            final String columnId = actionContext.getColumnId();
            final ColumnMetadata column = rowMetadata.getById(columnId);
            // if the target column is not the original column, we souldn't use the same statitics object
            final Statistics statistics;
            if (doesCreateNewColumn) {
                statistics = new Statistics(column.getStatistics());
                targetColumn.setStatistics(statistics);
            } else {
                statistics = targetColumn.getStatistics();
            }
            actionContext.get(FROM_DATE_PATTERNS, p -> compileFromDatePattern(actionContext));
            final PatternFrequency newPatternFrequency = statistics.getPatternFrequencies().stream().filter(patternFrequency -> StringUtils.equals(patternFrequency.getPattern(), newPattern.getPattern())).findFirst().orElseGet(() -> {
                final PatternFrequency newPatternFreq = new PatternFrequency(newPattern.getPattern(), 0);
                statistics.getPatternFrequencies().add(newPatternFreq);
                return newPatternFreq;
            });
            long mostUsedPatternCount = getMostUsedPatternCount(column);
            newPatternFrequency.setOccurrences(mostUsedPatternCount + 1);
            rowMetadata.update(targetId, targetColumn);
        }
    }
}
Also used : ColumnMetadata(org.talend.dataprep.api.dataset.ColumnMetadata) PatternFrequency(org.talend.dataprep.api.dataset.statistics.PatternFrequency) RowMetadata(org.talend.dataprep.api.dataset.RowMetadata) Statistics(org.talend.dataprep.api.dataset.statistics.Statistics)

Example 15 with Statistics

use of org.talend.dataprep.api.dataset.statistics.Statistics in project data-prep by Talend.

the class DateCalendarConverter method compile.

@Override
public void compile(ActionContext actionContext) {
    super.compile(actionContext);
    if (ActionsUtils.doesCreateNewColumn(actionContext.getParameters(), CREATE_NEW_COLUMN_DEFAULT)) {
        ActionsUtils.createNewColumn(actionContext, singletonList(ActionsUtils.additionalColumn().withName(actionContext.getColumnName() + NEW_COLUMN_SUFFIX)));
        ColumnMetadata targetColumn = actionContext.getRowMetadata().getById(ActionsUtils.getTargetColumnId(actionContext));
        ColumnMetadata originalColumn = actionContext.getRowMetadata().getById(actionContext.getColumnId());
        targetColumn.setStatistics(new Statistics(originalColumn.getStatistics()));
    }
    if (actionContext.getActionStatus() == OK) {
        dateCalendarConverterMap = new HashMap<>();
        String fromCalendarParameter = actionContext.getParameters().get(FROM_CALENDAR_TYPE_PARAMETER);
        String toCalendarParameter = actionContext.getParameters().get(TO_CALENDAR_TYPE_PARAMETER);
        final boolean isFromChronology = actionContext.get(IS_FROM_CHRONOLOGY_INTERNAL_KEY, p -> valueOf(fromCalendarParameter).isChronology());
        final boolean isToChronology = actionContext.get(IS_TO_CHRONOLOGY_INTERNAL_KEY, p -> valueOf(toCalendarParameter).isChronology());
        if (isFromChronology) {
            AbstractChronology fromCalendarType = valueOf(fromCalendarParameter).getCalendarType();
            Locale fromLocale = valueOf(fromCalendarParameter).getDefaultLocale();
            actionContext.get(FROM_CALENDAR_TYPE_KEY, p -> fromCalendarType);
            actionContext.get(FROM_LOCALE_KEY, p -> fromLocale);
            actionContext.get(FROM_DATE_PATTERNS_KEY, p -> compileFromDatePattern(actionContext));
        } else {
            // from JulianDay,no need to input pattern and Locale
            TemporalField fromTemporalField = valueOf(fromCalendarParameter).getTemporalField();
            actionContext.get(FROM_CALENDAR_TYPE_KEY, p -> fromTemporalField);
        }
        if (isToChronology) {
            AbstractChronology toCalendarType = valueOf(toCalendarParameter).getCalendarType();
            Locale toLocale = valueOf(toCalendarParameter).getDefaultLocale();
            actionContext.get(TO_CALENDAR_TYPE_KEY, p -> toCalendarType);
            actionContext.get(TO_LOCALE_KEY, p -> toLocale);
        } else {
            // to JulianDay,no need to output pattern and Locale
            TemporalField toTemporalField = valueOf(toCalendarParameter).getTemporalField();
            actionContext.get(TO_CALENDAR_TYPE_KEY, p -> toTemporalField);
        }
        // init an instance 'JulianDayConverter' when the converter is from JulianDay
        if (!isFromChronology) {
            JulianDayConverter julianDayConvert;
            if (isToChronology) {
                // convert JulianDay to ISO Calendar and use default output pattern.
                julianDayConvert = new JulianDayConverter(actionContext.get(FROM_CALENDAR_TYPE_KEY), actionContext.get(TO_CALENDAR_TYPE_KEY), DEFAULT_OUTPUT_PATTERN, ISO.getDefaultLocale());
            } else {
                julianDayConvert = new JulianDayConverter((TemporalField) actionContext.get(FROM_CALENDAR_TYPE_KEY), (TemporalField) actionContext.get(TO_CALENDAR_TYPE_KEY));
            }
            actionContext.get(JULIAN_DAY_CONVERT_KEY, p -> julianDayConvert);
        }
    }
}
Also used : ColumnMetadata(org.talend.dataprep.api.dataset.ColumnMetadata) TemporalField(java.time.temporal.TemporalField) JulianDayConverter(org.talend.dataquality.converters.JulianDayConverter) Statistics(org.talend.dataprep.api.dataset.statistics.Statistics)

Aggregations

Statistics (org.talend.dataprep.api.dataset.statistics.Statistics)15 Test (org.junit.Test)10 ColumnMetadata (org.talend.dataprep.api.dataset.ColumnMetadata)10 RowMetadata (org.talend.dataprep.api.dataset.RowMetadata)9 AbstractMetadataBaseTest (org.talend.dataprep.transformation.actions.AbstractMetadataBaseTest)7 DataSetRow (org.talend.dataprep.api.dataset.row.DataSetRow)6 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)3 SemanticDomain (org.talend.dataprep.api.dataset.statistics.SemanticDomain)3 HashMap (java.util.HashMap)2 PatternFrequency (org.talend.dataprep.api.dataset.statistics.PatternFrequency)2 ChangeDatePatternTest (org.talend.dataprep.transformation.actions.date.ChangeDatePatternTest)2 TemporalField (java.time.temporal.TemporalField)1 Matchers.containsString (org.hamcrest.Matchers.containsString)1 Matchers.isEmptyString (org.hamcrest.Matchers.isEmptyString)1 DataSetMetadata (org.talend.dataprep.api.dataset.DataSetMetadata)1 Quality (org.talend.dataprep.api.dataset.Quality)1 DataSetBaseTest (org.talend.dataprep.dataset.DataSetBaseTest)1 ActionMetadataTestUtils.setStatistics (org.talend.dataprep.transformation.actions.ActionMetadataTestUtils.setStatistics)1 JulianDayConverter (org.talend.dataquality.converters.JulianDayConverter)1