use of org.talend.dataprep.api.dataset.ColumnMetadata in project data-prep by Talend.
the class StandardizeInvalidTest method should_accept_column.
@Test
public void should_accept_column() {
// a column with semantic
SemanticCategoryEnum semantic = SemanticCategoryEnum.COUNTRY;
List<SemanticDomain> semanticDomainLs = new ArrayList<>();
semanticDomainLs.add(new SemanticDomain("COUNTRY", "Country", 0.85f));
ColumnMetadata column = ColumnMetadata.Builder.column().id(0).name("name").type(Type.STRING).semanticDomains(semanticDomainLs).domain(semantic.name()).build();
assertTrue(action.acceptField(column));
}
use of org.talend.dataprep.api.dataset.ColumnMetadata in project data-prep by Talend.
the class ChangeDatePatternTest method test_apply_in_newcolumn.
@Test
public void test_apply_in_newcolumn() throws Exception {
// given
final DataSetRow row1 = //
builder().with(//
value("toto").type(Type.STRING).name("recipe")).with(//
value("04/25/1999").type(Type.DATE).name("last update").statistics(getDateTestJsonAsStream("statistics_MM_dd_yyyy.json"))).with(//
value("tata").type(Type.STRING).name("recipe")).build();
final DataSetRow row2 = //
builder().with(//
value("tata").type(Type.STRING).name("recipe")).with(//
value("01/22/2018").type(Type.DATE).name("last update").statistics(getDateTestJsonAsStream("statistics_MM_dd_yyyy.json"))).with(//
value("toto").type(Type.STRING).name("recipe")).build();
final DataSetRow row3 = //
builder().with(//
value("tata").type(Type.STRING).name("recipe")).with(//
value("22/01/2018").type(Type.DATE).name("last update").statistics(getDateTestJsonAsStream("statistics_MM_dd_yyyy.json"))).with(//
value("toto").type(Type.STRING).name("recipe")).build();
parameters.put(CREATE_NEW_COLUMN, "true");
// then
assertEquals(7, row1.getRowMetadata().getColumns().get(1).getStatistics().getPatternFrequencies().size());
// when
ActionTestWorkbench.test(Arrays.asList(row1, row2, row3), actionRegistry, factory.create(action, parameters));
// then
final DataSetRow expectedRow1 = getRow("toto", "04/25/1999", "tata", "25 - Apr - 1999");
final DataSetRow expectedRow2 = getRow("tata", "01/22/2018", "toto", "22 - Jan - 2018");
final DataSetRow expectedRow3 = getRow("tata", "22/01/2018", "toto");
assertEquals(expectedRow1.values(), row1.values());
assertEquals(expectedRow2.values(), row2.values());
assertEquals(expectedRow3.values(), row3.values());
ColumnMetadata column1 = row1.getRowMetadata().getColumns().get(1);
ColumnMetadata column2 = row1.getRowMetadata().getColumns().get(2);
List<PatternFrequency> listPatternFirstColumn = column1.getStatistics().getPatternFrequencies();
List<PatternFrequency> listPatternSecondColumn = column2.getStatistics().getPatternFrequencies();
// check that the stats on the from column are not changed
assertEquals(7, listPatternFirstColumn.size());
assertEquals("MM/dd/yyyy", listPatternSecondColumn.get(0).getPattern());
// check that the stats on the target column are changed, and the new target pattern is added to the known ones
assertEquals(8, listPatternSecondColumn.size());
assertEquals("dd - MMM - yyyy", listPatternSecondColumn.get(7).getPattern());
// the new added pattern should had the biggest frequency : so it is the old most used pattern count + 1
assertEquals(listPatternSecondColumn.get(7).getOccurrences(), listPatternSecondColumn.get(0).getOccurrences() + 1);
}
use of org.talend.dataprep.api.dataset.ColumnMetadata in project data-prep by Talend.
the class ChangeDatePatternTest method testAdapt.
@Test
public void testAdapt() throws Exception {
assertThat(action.adapt((ColumnMetadata) null), is(action));
ColumnMetadata column = column().name("myColumn").id(0).type(Type.STRING).build();
assertThat(action.adapt(column), is(action));
}
use of org.talend.dataprep.api.dataset.ColumnMetadata in project data-prep by Talend.
the class CompareDatesTest method testAdapt.
@Test
public void testAdapt() throws Exception {
assertThat(action.adapt((ColumnMetadata) null), is(action));
ColumnMetadata column = column().name("myColumn").id(0).type(Type.STRING).build();
assertThat(action.adapt(column), is(action));
}
use of org.talend.dataprep.api.dataset.ColumnMetadata in project data-prep by Talend.
the class CompareDatesTest method simple_greater_result_with_constant.
@Test
public void simple_greater_result_with_constant() throws Exception {
// given
final Map<String, String> values = new HashMap<>();
values.put("0000", "02/01/2012");
RowMetadata rowMetadata = new RowMetadata();
rowMetadata.addColumn(createMetadata("0000", "last update", Type.DATE, "statistics_MM_dd_yyyy.json"));
final DataSetRow row = new DataSetRow(rowMetadata, values);
parameters.put(CompareDates.CONSTANT_VALUE, "02/21/2008");
parameters.put(CompareDates.MODE_PARAMETER, OtherColumnParameters.CONSTANT_MODE);
parameters.put(CompareDates.COMPARE_MODE, CompareDates.GT);
// when
ActionTestWorkbench.test(row, actionRegistry, factory.create(action, parameters));
// then
//
Assertions.assertThat(row.values()).hasSize(//
2).containsExactly(//
MapEntry.entry("0000", "02/01/2012"), MapEntry.entry("0001", "true"));
final ColumnMetadata expected = ColumnMetadata.Builder.column().id(1).name("last update_gt_02/21/2008?").type(Type.BOOLEAN).build();
ColumnMetadata actual = row.getRowMetadata().getById("0001");
assertEquals(expected, actual);
}
Aggregations