use of org.talend.dataprep.api.dataset.statistics.PatternFrequency in project data-prep by Talend.
the class DateCalendarConverterTest method testConversionJapaneseToISO_InvalidDateWithEra.
@Test
public void testConversionJapaneseToISO_InvalidDateWithEra() {
// given
Map<String, String> rowContent = new HashMap<>();
rowContent.put("0000", "Lucy");
rowContent.put("0001", "0008/02/30 平成");
final DataSetRow row = new DataSetRow(rowContent);
row.getRowMetadata().getColumns().get(1).getStatistics().getPatternFrequencies().add(new PatternFrequency("yyyy/MM/dd G", 1));
final Map<String, String> parameters = new HashMap<>();
parameters.put(ImplicitParameters.SCOPE.getKey().toLowerCase(), "column");
parameters.put(COLUMN_ID.getKey(), "0001");
parameters.put(FROM_CALENDAR_TYPE_PARAMETER, DateCalendarConverter.CalendarUnit.JAPANESE.name());
parameters.put(TO_CALENDAR_TYPE_PARAMETER, DateCalendarConverter.CalendarUnit.ISO.name());
// when
ActionTestWorkbench.test(row, actionRegistry, factory.create(action, parameters));
// then
// February 30 does not exist, can not parse even the Era part exists.
// invalid date
assertEquals("0008/02/30 平成", row.get("0001"));
}
use of org.talend.dataprep.api.dataset.statistics.PatternFrequency in project data-prep by Talend.
the class DateCalendarConverterTest method testConversionJapaneseToISO_ValidDateWithoutEra.
@Test
public void testConversionJapaneseToISO_ValidDateWithoutEra() {
// given
Map<String, String> rowContent = new HashMap<>();
rowContent.put("0000", "John");
rowContent.put("0001", "0008/10/29");
final DataSetRow row = new DataSetRow(rowContent);
row.getRowMetadata().getColumns().get(1).getStatistics().getPatternFrequencies().add(new PatternFrequency("yyyy/MM/dd", 1));
final Map<String, String> parameters = new HashMap<>();
parameters.put(ImplicitParameters.SCOPE.getKey().toLowerCase(), "column");
parameters.put(COLUMN_ID.getKey(), "0001");
parameters.put(FROM_CALENDAR_TYPE_PARAMETER, DateCalendarConverter.CalendarUnit.JAPANESE.name());
parameters.put(TO_CALENDAR_TYPE_PARAMETER, DateCalendarConverter.CalendarUnit.ISO.name());
// when
ActionTestWorkbench.test(row, actionRegistry, factory.create(action, parameters));
// then
// The date pattern does not contain G, the input cannot be parsed
assertEquals("0008/10/29", row.get("0001"));
}
use of org.talend.dataprep.api.dataset.statistics.PatternFrequency in project data-prep by Talend.
the class DateCalendarConverterTest method test_apply_in_newcolumn.
@Test
public void test_apply_in_newcolumn() {
Map<String, String> rowContent = new HashMap<>();
// row1
rowContent.put("0000", "David");
rowContent.put("0001", "1970-01-01");
final DataSetRow row1 = new DataSetRow(rowContent);
row1.getRowMetadata().getColumns().get(1).getStatistics().getPatternFrequencies().add(new PatternFrequency("yyyy-MM-dd", 1));
// row 2
rowContent = new HashMap<>();
rowContent.put("0000", "John");
rowContent.put("0001", "0001-01-01");
final DataSetRow row2 = new DataSetRow(rowContent);
// row 3
rowContent = new HashMap<>();
rowContent.put("0000", "Michel");
rowContent.put("0001", "");
final DataSetRow row3 = new DataSetRow(rowContent);
final Map<String, String> parameters = new HashMap<>();
parameters.put(ImplicitParameters.SCOPE.getKey().toLowerCase(), "column");
parameters.put("column_id", "0001");
parameters.put(FROM_CALENDAR_TYPE_PARAMETER, DateCalendarConverter.CalendarUnit.ISO.name());
parameters.put(TO_CALENDAR_TYPE_PARAMETER, DateCalendarConverter.CalendarUnit.JULIAN_DAY.name());
parameters.put("create_new_column", "true");
// when
ActionTestWorkbench.test(Arrays.asList(row1, row2, row3), actionRegistry, factory.create(action, parameters));
// then
// assert that original column is unchanged:
assertEquals("1970-01-01", row1.get("0001"));
assertEquals("0001-01-01", row2.get("0001"));
assertEquals("", row3.get("0001"));
// assert that new column is created:
assertEquals("2440588", row1.get("0002"));
assertEquals("1721426", row2.get("0002"));
assertEquals("", row3.get("0002"));
}
use of org.talend.dataprep.api.dataset.statistics.PatternFrequency in project data-prep by Talend.
the class DateParserTest method shouldUpdateColumnStatisticsWithNewDatePattern.
@Test
public void shouldUpdateColumnStatisticsWithNewDatePattern() {
// given
ColumnMetadata column = ActionMetadataTestUtils.getColumn(Type.DATE);
column.getStatistics().getPatternFrequencies().add(new PatternFrequency("yyyy", 19));
// when
action.guessAndParse("01/02/2015", column);
// then
final List<PatternFrequency> actual = column.getStatistics().getPatternFrequencies();
assertEquals(2, actual.size());
assertEquals(new PatternFrequency("dd/MM/yyyy", 1), actual.get(1));
}
use of org.talend.dataprep.api.dataset.statistics.PatternFrequency in project data-prep by Talend.
the class SplitTest method test_TDP_876.
@Test
public void test_TDP_876() {
// given
final DataSetRow row = //
builder().with(//
value("lorem bacon").type(Type.STRING)).with(//
value("Bacon ipsum dolor amet swine leberkas pork belly").type(Type.STRING)).with(//
value("01/01/2015").type(Type.STRING)).build();
// when
//
ActionTestWorkbench.test(//
Collections.singletonList(row), // Test requires some analysis in asserts
analyzerService, actionRegistry, factory.create(action, parameters));
// then
final RowMetadata actual = row.getRowMetadata();
Statistics originalStats = actual.getById("0001").getStatistics();
final List<PatternFrequency> originalPatterns = originalStats.getPatternFrequencies();
assertFalse(originalPatterns.equals(actual.getById("0003").getStatistics().getPatternFrequencies()));
assertFalse(originalPatterns.equals(actual.getById("0004").getStatistics().getPatternFrequencies()));
}
Aggregations