use of org.talend.dataprep.api.dataset.statistics.Statistics in project data-prep by Talend.
the class CopyColumnTest method should_copy_semantic.
@Test
public void should_copy_semantic() throws Exception {
List<ColumnMetadata> input = new ArrayList<>();
final ColumnMetadata original = createMetadata("0001", "column");
original.setStatistics(new Statistics());
SemanticDomain semanticDomain = new SemanticDomain("mountain_goat", "Mountain goat pale pale", 1);
original.setDomain("beer");
original.setDomainFrequency(1);
original.setDomainLabel("the best beer");
original.setSemanticDomains(Collections.singletonList(semanticDomain));
input.add(original);
RowMetadata rowMetadata = new RowMetadata(input);
assertThat(rowMetadata.getColumns()).isNotNull().isNotEmpty().hasSize(1);
final DataSetRow row = new DataSetRow(rowMetadata);
ActionTestWorkbench.test(row, actionRegistry, factory.create(action, parameters));
List<ColumnMetadata> actual = row.getRowMetadata().getColumns();
assertThat(actual).isNotNull().isNotEmpty().hasSize(2);
assertEquals(actual.get(1).getStatistics(), original.getStatistics());
//
assertThat(actual.get(1)).isEqualToComparingOnlyGivenFields(original, "domain", "domainLabel", "domainFrequency");
//
assertThat(actual.get(1).getSemanticDomains()).isNotNull().isNotEmpty().contains(semanticDomain);
}
use of org.talend.dataprep.api.dataset.statistics.Statistics in project data-prep by Talend.
the class ActionMetadataTestUtils method setStatistics.
/**
* Set the statistics to the given column on the given row.
*
* @param row the row to update.
* @param columnId the column id.
* @param statisticsContent the statistics in json as expected from the DQ library.
* @throws IOException you never know :)
*/
public static void setStatistics(DataSetRow row, String columnId, InputStream statisticsContent) throws IOException {
final ObjectMapper mapper = new ObjectMapper();
final Statistics statistics = mapper.readValue(statisticsContent, Statistics.class);
row.getRowMetadata().getById(columnId).setStatistics(statistics);
}
use of org.talend.dataprep.api.dataset.statistics.Statistics in project data-prep by Talend.
the class AbstractMetadataBaseTest method createMetadata.
protected ColumnMetadata createMetadata(String id, String name, Type type, String statisticsFileName) throws IOException {
ColumnMetadata column = createMetadata(id, name, type);
ObjectMapper mapper = new ObjectMapper();
final Statistics statistics = mapper.reader(Statistics.class).readValue(getClass().getResourceAsStream("/org/talend/dataprep/transformation/actions/date/" + statisticsFileName));
column.setStatistics(statistics);
return column;
}
use of org.talend.dataprep.api.dataset.statistics.Statistics in project data-prep by Talend.
the class ChangeDatePattern method compile.
@Override
public void compile(ActionContext actionContext) {
super.compile(actionContext);
boolean doesCreateNewColumn = ActionsUtils.doesCreateNewColumn(actionContext.getParameters(), CREATE_NEW_COLUMN_DEFAULT);
if (doesCreateNewColumn) {
ActionsUtils.createNewColumn(actionContext, singletonList(ActionsUtils.additionalColumn().withName(actionContext.getColumnName() + NEW_COLUMN_SUFFIX).withCopyMetadataFromId(actionContext.getColumnId())));
}
if (actionContext.getActionStatus() == OK) {
compileDatePattern(actionContext);
if (actionContext.getActionStatus() == OK) {
// register the new pattern in column's stats as the most used pattern,
// to be able to process date action more efficiently later
final DatePattern newPattern = actionContext.get(COMPILED_DATE_PATTERN);
final RowMetadata rowMetadata = actionContext.getRowMetadata();
// target column
String targetId = ActionsUtils.getTargetColumnId(actionContext);
final ColumnMetadata targetColumn = rowMetadata.getById(targetId);
// origin column
final String columnId = actionContext.getColumnId();
final ColumnMetadata column = rowMetadata.getById(columnId);
// if the target column is not the original column, we souldn't use the same statitics object
final Statistics statistics;
if (doesCreateNewColumn) {
statistics = new Statistics(column.getStatistics());
targetColumn.setStatistics(statistics);
} else {
statistics = targetColumn.getStatistics();
}
actionContext.get(FROM_DATE_PATTERNS, p -> compileFromDatePattern(actionContext));
final PatternFrequency newPatternFrequency = statistics.getPatternFrequencies().stream().filter(patternFrequency -> StringUtils.equals(patternFrequency.getPattern(), newPattern.getPattern())).findFirst().orElseGet(() -> {
final PatternFrequency newPatternFreq = new PatternFrequency(newPattern.getPattern(), 0);
statistics.getPatternFrequencies().add(newPatternFreq);
return newPatternFreq;
});
long mostUsedPatternCount = getMostUsedPatternCount(column);
newPatternFrequency.setOccurrences(mostUsedPatternCount + 1);
rowMetadata.update(targetId, targetColumn);
}
}
}
use of org.talend.dataprep.api.dataset.statistics.Statistics in project data-prep by Talend.
the class DateCalendarConverter method compile.
@Override
public void compile(ActionContext actionContext) {
super.compile(actionContext);
if (ActionsUtils.doesCreateNewColumn(actionContext.getParameters(), CREATE_NEW_COLUMN_DEFAULT)) {
ActionsUtils.createNewColumn(actionContext, singletonList(ActionsUtils.additionalColumn().withName(actionContext.getColumnName() + NEW_COLUMN_SUFFIX)));
ColumnMetadata targetColumn = actionContext.getRowMetadata().getById(ActionsUtils.getTargetColumnId(actionContext));
ColumnMetadata originalColumn = actionContext.getRowMetadata().getById(actionContext.getColumnId());
targetColumn.setStatistics(new Statistics(originalColumn.getStatistics()));
}
if (actionContext.getActionStatus() == OK) {
dateCalendarConverterMap = new HashMap<>();
String fromCalendarParameter = actionContext.getParameters().get(FROM_CALENDAR_TYPE_PARAMETER);
String toCalendarParameter = actionContext.getParameters().get(TO_CALENDAR_TYPE_PARAMETER);
final boolean isFromChronology = actionContext.get(IS_FROM_CHRONOLOGY_INTERNAL_KEY, p -> valueOf(fromCalendarParameter).isChronology());
final boolean isToChronology = actionContext.get(IS_TO_CHRONOLOGY_INTERNAL_KEY, p -> valueOf(toCalendarParameter).isChronology());
if (isFromChronology) {
AbstractChronology fromCalendarType = valueOf(fromCalendarParameter).getCalendarType();
Locale fromLocale = valueOf(fromCalendarParameter).getDefaultLocale();
actionContext.get(FROM_CALENDAR_TYPE_KEY, p -> fromCalendarType);
actionContext.get(FROM_LOCALE_KEY, p -> fromLocale);
actionContext.get(FROM_DATE_PATTERNS_KEY, p -> compileFromDatePattern(actionContext));
} else {
// from JulianDay,no need to input pattern and Locale
TemporalField fromTemporalField = valueOf(fromCalendarParameter).getTemporalField();
actionContext.get(FROM_CALENDAR_TYPE_KEY, p -> fromTemporalField);
}
if (isToChronology) {
AbstractChronology toCalendarType = valueOf(toCalendarParameter).getCalendarType();
Locale toLocale = valueOf(toCalendarParameter).getDefaultLocale();
actionContext.get(TO_CALENDAR_TYPE_KEY, p -> toCalendarType);
actionContext.get(TO_LOCALE_KEY, p -> toLocale);
} else {
// to JulianDay,no need to output pattern and Locale
TemporalField toTemporalField = valueOf(toCalendarParameter).getTemporalField();
actionContext.get(TO_CALENDAR_TYPE_KEY, p -> toTemporalField);
}
// init an instance 'JulianDayConverter' when the converter is from JulianDay
if (!isFromChronology) {
JulianDayConverter julianDayConvert;
if (isToChronology) {
// convert JulianDay to ISO Calendar and use default output pattern.
julianDayConvert = new JulianDayConverter(actionContext.get(FROM_CALENDAR_TYPE_KEY), actionContext.get(TO_CALENDAR_TYPE_KEY), DEFAULT_OUTPUT_PATTERN, ISO.getDefaultLocale());
} else {
julianDayConvert = new JulianDayConverter((TemporalField) actionContext.get(FROM_CALENDAR_TYPE_KEY), (TemporalField) actionContext.get(TO_CALENDAR_TYPE_KEY));
}
actionContext.get(JULIAN_DAY_CONVERT_KEY, p -> julianDayConvert);
}
}
}
Aggregations