use of org.talend.dataprep.api.dataset.RowMetadata in project data-prep by Talend.
the class FuzzyMatching method applyOnColumn.
@Override
public void applyOnColumn(DataSetRow row, ActionContext context) {
Map<String, String> parameters = context.getParameters();
int sensitivity = NumberUtils.toInt(parameters.get(SENSITIVITY));
// create new column and append it after current column
RowMetadata rowMetadata = context.getRowMetadata();
String value = row.get(context.getColumnId());
String referenceValue;
if (parameters.get(OtherColumnParameters.MODE_PARAMETER).equals(OtherColumnParameters.CONSTANT_MODE)) {
referenceValue = parameters.get(VALUE_PARAMETER);
} else {
final ColumnMetadata selectedColumn = rowMetadata.getById(parameters.get(OtherColumnParameters.SELECTED_COLUMN_PARAMETER));
referenceValue = row.get(selectedColumn.getId());
}
final String columnValue = toStringTrueFalse(fuzzyMatches(value, referenceValue, sensitivity));
row.set(ActionsUtils.getTargetColumnId(context), columnValue);
}
use of org.talend.dataprep.api.dataset.RowMetadata in project data-prep by Talend.
the class FilterNodeTest method receive_should_filter_with_simple_predicate.
@Test
public void receive_should_filter_with_simple_predicate() throws Exception {
// given
final RowMetadata metadata0 = new RowMetadata();
final DataSetRow row0 = new DataSetRow(new HashMap<>());
// does not pass the predicate
row0.setTdpId(0L);
final RowMetadata metadata1 = new RowMetadata();
final DataSetRow row1 = new DataSetRow(new HashMap<>());
// pass the predicate
row1.setTdpId(1L);
final TestLink link = new TestLink(new BasicNode());
final FilterNode node = new FilterNode((row, metadata) -> row.getTdpId() == 1);
node.setLink(link);
// when
node.receive(row0, metadata0);
node.receive(row1, metadata1);
// then
assertThat(link.getEmittedRows(), hasSize(1));
assertThat(link.getEmittedRows(), contains(row1));
assertThat(link.getEmittedMetadata(), hasSize(1));
assertThat(link.getEmittedMetadata(), contains(metadata1));
}
use of org.talend.dataprep.api.dataset.RowMetadata in project data-prep by Talend.
the class PreparationAPITest method test_add_preparation_TDP3927.
/**
* Test presence of a bug that allow the reuse of the same column ID twice in the same preparation.
* <p>
* This bug is allowed by OptimizedStrategy that does not apply some actions on RowMetadata and thus
* RowMetadata.nextId is
* not properly updated.
* </p>
*/
@Test
public void test_add_preparation_TDP3927() throws Exception {
// given
final String preparationId = testClient.createPreparationFromFile("/org/talend/dataprep/api/service/dataset/bug_TDP-3927_import-col-not-deleted_truncated.csv", "bug_TDP-3927_import-col-not-deleted", home.getId());
Map<String, String> copyIdParameters = new HashMap<>();
copyIdParameters.put("column_id", "0000");
copyIdParameters.put("column_name", "id");
copyIdParameters.put("scope", "column");
testClient.applyAction(preparationId, "copy", copyIdParameters);
InputStream inputStream = testClient.getPreparation(preparationId).asInputStream();
mapper.getDeserializationConfig().without(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES);
RowMetadata preparationContent = mapper.readValue(inputStream, Data.class).metadata;
ColumnMetadata idCopyColumn = getColumnByName(preparationContent, "id_copy");
Map<String, String> deleteIdCopyParameters = new HashMap<>();
deleteIdCopyParameters.put("column_id", "0008");
deleteIdCopyParameters.put("column_name", "id_copy");
deleteIdCopyParameters.put("scope", "column");
testClient.applyAction(preparationId, "delete_column", deleteIdCopyParameters);
// force export to update cache
testClient.getPreparation(preparationId);
// when
Map<String, String> copyFirstNameParameters = new HashMap<>();
copyFirstNameParameters.put("column_id", "0001");
copyFirstNameParameters.put("column_name", "first_name");
copyFirstNameParameters.put("scope", "column");
testClient.applyAction(preparationId, "copy", copyFirstNameParameters);
// then
inputStream = testClient.getPreparation(preparationId).asInputStream();
mapper.getDeserializationConfig().without(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES);
preparationContent = mapper.readValue(inputStream, Data.class).metadata;
assertNotNull(preparationContent);
ColumnMetadata firstNameColumn = getColumnByName(preparationContent, "first_name_copy");
assertNotEquals(idCopyColumn.getId(), firstNameColumn.getId());
}
use of org.talend.dataprep.api.dataset.RowMetadata in project data-prep by Talend.
the class PreparationAPITest method testPreparationPreviewOnPreparationWithTrimAction_TDP_5057.
/**
* Verify a calculate time since preview after a trim step on a preparation
* see <a href="https://jira.talendforge.org/browse/TDP-5057">TDP-5057</a>
*/
@Test
public void testPreparationPreviewOnPreparationWithTrimAction_TDP_5057() throws IOException {
// Create a dataset from csv
final String datasetId = testClient.createDataset("preview/best_sad_songs_of_all_time.csv", "testPreview");
// Create a preparation
String preparationId = testClient.createPreparationFromDataset(datasetId, "testPrep", home.getId());
// apply trim action on the 8nd column to make this column date valid
Map<String, String> trimParameters = new HashMap<>();
trimParameters.put("create_new_column", "false");
trimParameters.put("padding_character", "whitespace");
trimParameters.put("scope", "column");
trimParameters.put("column_id", "0008");
trimParameters.put("column_name", "Added At");
trimParameters.put("row_id", "null");
testClient.applyAction(preparationId, Trim.TRIM_ACTION_NAME, trimParameters);
// check column is date valid after trim action
InputStream inputStream = testClient.getPreparation(preparationId).asInputStream();
mapper.getDeserializationConfig().without(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES);
RowMetadata preparationContent = mapper.readValue(inputStream, Data.class).metadata;
List<PatternFrequency> patternFrequencies = preparationContent.getColumns().get(8).getStatistics().getPatternFrequencies();
assertTrue(patternFrequencies.stream().map(//
PatternFrequency::getPattern).anyMatch("yyyy-MM-dd"::equals));
// create a preview of calculate time since action
PreviewAddParameters previewAddParameters = new PreviewAddParameters();
previewAddParameters.setDatasetId(datasetId);
previewAddParameters.setPreparationId(preparationId);
previewAddParameters.setTdpIds(Arrays.asList(1, 2, 3, 4, 5, 6, 7));
Action calculateTimeUntilAction = new Action();
calculateTimeUntilAction.setName(ComputeTimeSince.TIME_SINCE_ACTION_NAME);
MixedContentMap actionParameters = new MixedContentMap();
actionParameters.put("create_new_column", "true");
actionParameters.put("time_unit", "HOURS");
actionParameters.put("since_when", "now_server_side");
actionParameters.put("scope", "column");
actionParameters.put("column_id", "0008");
actionParameters.put("column_name", "Added At");
calculateTimeUntilAction.setParameters(actionParameters);
previewAddParameters.setActions(Collections.singletonList(calculateTimeUntilAction));
JsonPath jsonPath = given().contentType(//
ContentType.JSON).body(//
previewAddParameters).expect().statusCode(200).log().ifError().when().post(//
"/api/preparations/preview/add").jsonPath();
// check non empty value for the new column
assertEquals(//
"new preview column should contains values according to calculate time since action", //
0, jsonPath.getList("records.0009").stream().map(String::valueOf).filter(StringUtils::isBlank).count());
}
use of org.talend.dataprep.api.dataset.RowMetadata in project data-prep by Talend.
the class PipelineTest method testCompileAction.
@Test
public void testCompileAction() throws Exception {
// Given
final RunnableAction mockAction = new RunnableAction() {
@Override
public DataSetRowAction getRowAction() {
return new DataSetRowAction() {
@Override
public void compile(ActionContext actionContext) {
actionContext.get("ExecutedCompile", p -> true);
}
@Override
public DataSetRow apply(DataSetRow dataSetRow, ActionContext context) {
return dataSetRow;
}
};
}
};
final ActionContext actionContext = new ActionContext(new TransformationContext());
final Node node = NodeBuilder.source().to(new CompileNode(mockAction, actionContext)).to(output).build();
final RowMetadata rowMetadata = new RowMetadata();
final DataSetRow row = new DataSetRow(rowMetadata);
// when
assertFalse(actionContext.has("ExecutedCompile"));
node.exec().receive(row, rowMetadata);
// then
assertTrue(actionContext.has("ExecutedCompile"));
assertTrue(actionContext.get("ExecutedCompile"));
}
Aggregations