Search in sources :

Example 1 with PatternFrequency

use of org.talend.dataprep.api.dataset.statistics.PatternFrequency in project data-prep by Talend.

the class DateCalendarConverterTest method testConversionJapaneseToISO_InvalidDateWithEra.

@Test
public void testConversionJapaneseToISO_InvalidDateWithEra() {
    // given
    Map<String, String> rowContent = new HashMap<>();
    rowContent.put("0000", "Lucy");
    rowContent.put("0001", "0008/02/30 平成");
    final DataSetRow row = new DataSetRow(rowContent);
    row.getRowMetadata().getColumns().get(1).getStatistics().getPatternFrequencies().add(new PatternFrequency("yyyy/MM/dd G", 1));
    final Map<String, String> parameters = new HashMap<>();
    parameters.put(ImplicitParameters.SCOPE.getKey().toLowerCase(), "column");
    parameters.put(COLUMN_ID.getKey(), "0001");
    parameters.put(FROM_CALENDAR_TYPE_PARAMETER, DateCalendarConverter.CalendarUnit.JAPANESE.name());
    parameters.put(TO_CALENDAR_TYPE_PARAMETER, DateCalendarConverter.CalendarUnit.ISO.name());
    // when
    ActionTestWorkbench.test(row, actionRegistry, factory.create(action, parameters));
    // then
    // February 30 does not exist, can not parse even the Era part exists.
    // invalid date
    assertEquals("0008/02/30 平成", row.get("0001"));
}
Also used : PatternFrequency(org.talend.dataprep.api.dataset.statistics.PatternFrequency) DataSetRow(org.talend.dataprep.api.dataset.row.DataSetRow) Test(org.junit.Test)

Example 2 with PatternFrequency

use of org.talend.dataprep.api.dataset.statistics.PatternFrequency in project data-prep by Talend.

the class DateCalendarConverterTest method testConversionJapaneseToISO_ValidDateWithoutEra.

@Test
public void testConversionJapaneseToISO_ValidDateWithoutEra() {
    // given
    Map<String, String> rowContent = new HashMap<>();
    rowContent.put("0000", "John");
    rowContent.put("0001", "0008/10/29");
    final DataSetRow row = new DataSetRow(rowContent);
    row.getRowMetadata().getColumns().get(1).getStatistics().getPatternFrequencies().add(new PatternFrequency("yyyy/MM/dd", 1));
    final Map<String, String> parameters = new HashMap<>();
    parameters.put(ImplicitParameters.SCOPE.getKey().toLowerCase(), "column");
    parameters.put(COLUMN_ID.getKey(), "0001");
    parameters.put(FROM_CALENDAR_TYPE_PARAMETER, DateCalendarConverter.CalendarUnit.JAPANESE.name());
    parameters.put(TO_CALENDAR_TYPE_PARAMETER, DateCalendarConverter.CalendarUnit.ISO.name());
    // when
    ActionTestWorkbench.test(row, actionRegistry, factory.create(action, parameters));
    // then
    // The date pattern does not contain G, the input cannot be parsed
    assertEquals("0008/10/29", row.get("0001"));
}
Also used : PatternFrequency(org.talend.dataprep.api.dataset.statistics.PatternFrequency) DataSetRow(org.talend.dataprep.api.dataset.row.DataSetRow) Test(org.junit.Test)

Example 3 with PatternFrequency

use of org.talend.dataprep.api.dataset.statistics.PatternFrequency in project data-prep by Talend.

the class DateCalendarConverterTest method test_apply_in_newcolumn.

@Test
public void test_apply_in_newcolumn() {
    Map<String, String> rowContent = new HashMap<>();
    // row1
    rowContent.put("0000", "David");
    rowContent.put("0001", "1970-01-01");
    final DataSetRow row1 = new DataSetRow(rowContent);
    row1.getRowMetadata().getColumns().get(1).getStatistics().getPatternFrequencies().add(new PatternFrequency("yyyy-MM-dd", 1));
    // row 2
    rowContent = new HashMap<>();
    rowContent.put("0000", "John");
    rowContent.put("0001", "0001-01-01");
    final DataSetRow row2 = new DataSetRow(rowContent);
    // row 3
    rowContent = new HashMap<>();
    rowContent.put("0000", "Michel");
    rowContent.put("0001", "");
    final DataSetRow row3 = new DataSetRow(rowContent);
    final Map<String, String> parameters = new HashMap<>();
    parameters.put(ImplicitParameters.SCOPE.getKey().toLowerCase(), "column");
    parameters.put("column_id", "0001");
    parameters.put(FROM_CALENDAR_TYPE_PARAMETER, DateCalendarConverter.CalendarUnit.ISO.name());
    parameters.put(TO_CALENDAR_TYPE_PARAMETER, DateCalendarConverter.CalendarUnit.JULIAN_DAY.name());
    parameters.put("create_new_column", "true");
    // when
    ActionTestWorkbench.test(Arrays.asList(row1, row2, row3), actionRegistry, factory.create(action, parameters));
    // then
    // assert that original column is unchanged:
    assertEquals("1970-01-01", row1.get("0001"));
    assertEquals("0001-01-01", row2.get("0001"));
    assertEquals("", row3.get("0001"));
    // assert that new column is created:
    assertEquals("2440588", row1.get("0002"));
    assertEquals("1721426", row2.get("0002"));
    assertEquals("", row3.get("0002"));
}
Also used : PatternFrequency(org.talend.dataprep.api.dataset.statistics.PatternFrequency) DataSetRow(org.talend.dataprep.api.dataset.row.DataSetRow) Test(org.junit.Test)

Example 4 with PatternFrequency

use of org.talend.dataprep.api.dataset.statistics.PatternFrequency in project data-prep by Talend.

the class DateParserTest method shouldUpdateColumnStatisticsWithNewDatePattern.

@Test
public void shouldUpdateColumnStatisticsWithNewDatePattern() {
    // given
    ColumnMetadata column = ActionMetadataTestUtils.getColumn(Type.DATE);
    column.getStatistics().getPatternFrequencies().add(new PatternFrequency("yyyy", 19));
    // when
    action.guessAndParse("01/02/2015", column);
    // then
    final List<PatternFrequency> actual = column.getStatistics().getPatternFrequencies();
    assertEquals(2, actual.size());
    assertEquals(new PatternFrequency("dd/MM/yyyy", 1), actual.get(1));
}
Also used : ColumnMetadata(org.talend.dataprep.api.dataset.ColumnMetadata) PatternFrequency(org.talend.dataprep.api.dataset.statistics.PatternFrequency) Test(org.junit.Test)

Example 5 with PatternFrequency

use of org.talend.dataprep.api.dataset.statistics.PatternFrequency in project data-prep by Talend.

the class SplitTest method test_TDP_876.

@Test
public void test_TDP_876() {
    // given
    final DataSetRow row = // 
    builder().with(// 
    value("lorem bacon").type(Type.STRING)).with(// 
    value("Bacon ipsum dolor amet swine leberkas pork belly").type(Type.STRING)).with(// 
    value("01/01/2015").type(Type.STRING)).build();
    // when
    // 
    ActionTestWorkbench.test(// 
    Collections.singletonList(row), // Test requires some analysis in asserts
    analyzerService, actionRegistry, factory.create(action, parameters));
    // then
    final RowMetadata actual = row.getRowMetadata();
    Statistics originalStats = actual.getById("0001").getStatistics();
    final List<PatternFrequency> originalPatterns = originalStats.getPatternFrequencies();
    assertFalse(originalPatterns.equals(actual.getById("0003").getStatistics().getPatternFrequencies()));
    assertFalse(originalPatterns.equals(actual.getById("0004").getStatistics().getPatternFrequencies()));
}
Also used : PatternFrequency(org.talend.dataprep.api.dataset.statistics.PatternFrequency) RowMetadata(org.talend.dataprep.api.dataset.RowMetadata) Statistics(org.talend.dataprep.api.dataset.statistics.Statistics) DataSetRow(org.talend.dataprep.api.dataset.row.DataSetRow) AbstractMetadataBaseTest(org.talend.dataprep.transformation.actions.AbstractMetadataBaseTest) Test(org.junit.Test)

Aggregations

PatternFrequency (org.talend.dataprep.api.dataset.statistics.PatternFrequency)25 Test (org.junit.Test)14 DataSetRow (org.talend.dataprep.api.dataset.row.DataSetRow)11 ColumnMetadata (org.talend.dataprep.api.dataset.ColumnMetadata)8 Before (org.junit.Before)7 RowMetadata (org.talend.dataprep.api.dataset.RowMetadata)4 HashMap (java.util.HashMap)3 List (java.util.List)3 StringUtils (org.apache.commons.lang.StringUtils)3 CoreMatchers.is (org.hamcrest.CoreMatchers.is)3 Type (org.talend.dataprep.api.type.Type)3 IOException (java.io.IOException)2 InputStream (java.io.InputStream)2 Arrays (java.util.Arrays)2 Collections (java.util.Collections)2 Locale (java.util.Locale)2 Map (java.util.Map)2 Optional (java.util.Optional)2 Assert.assertEquals (org.junit.Assert.assertEquals)2 Assert.assertFalse (org.junit.Assert.assertFalse)2