Search in sources :

Example 1 with RowMetadata

use of org.talend.dataprep.api.dataset.RowMetadata in project data-prep by Talend.

the class StandardizeInvalid method applyOnColumn.

@Override
public void applyOnColumn(DataSetRow row, ActionContext context) {
    if (isApplicable(row, context)) {
        final String columnId = context.getColumnId();
        final String value = row.get(columnId);
        final RowMetadata rowMetadata = context.getRowMetadata();
        final ColumnMetadata column = rowMetadata.getById(columnId);
        final Double threshold = context.get(MATCH_THRESHOLD_KEY);
        String closestValue = CategoryRegistryManager.getInstance().findMostSimilarValue(value, column.getDomain(), threshold);
        // If not found the similar value, display original value.
        if (!StringUtils.isEmpty(closestValue)) {
            row.set(columnId, closestValue);
        }
    }
}
Also used : ColumnMetadata(org.talend.dataprep.api.dataset.ColumnMetadata) RowMetadata(org.talend.dataprep.api.dataset.RowMetadata)

Example 2 with RowMetadata

use of org.talend.dataprep.api.dataset.RowMetadata in project data-prep by Talend.

the class ComputeTimeSince method applyOnColumn.

@Override
public void applyOnColumn(DataSetRow row, ActionContext context) {
    RowMetadata rowMetadata = context.getRowMetadata();
    Map<String, String> parameters = context.getParameters();
    String columnId = context.getColumnId();
    TemporalUnit unit = ChronoUnit.valueOf(parameters.get(TIME_UNIT_PARAMETER).toUpperCase());
    String newValue;
    try {
        String mode = context.get(SINCE_WHEN_PARAMETER);
        LocalDateTime since;
        switch(mode) {
            case OTHER_COLUMN_MODE:
                ColumnMetadata selectedColumn = rowMetadata.getById(parameters.get(SELECTED_COLUMN_PARAMETER));
                String dateToCompare = row.get(selectedColumn.getId());
                since = Providers.get().parse(dateToCompare, selectedColumn);
                break;
            case SPECIFIC_DATE_MODE:
            case NOW_SERVER_SIDE_MODE:
            default:
                since = context.get(SINCE_DATE_PARAMETER);
                break;
        }
        // parse the date
        if (since == null) {
            newValue = StringUtils.EMPTY;
        } else {
            String value = row.get(columnId);
            LocalDateTime temporalAccessor = Providers.get().parse(value, context.getRowMetadata().getById(columnId));
            Temporal valueAsDate = LocalDateTime.from(temporalAccessor);
            newValue = String.valueOf(unit.between(valueAsDate, since));
        }
    } catch (DateTimeException e) {
        LOGGER.trace("Error on dateTime parsing", e);
        // Nothing to do: in this case, temporalAccessor is left null
        newValue = StringUtils.EMPTY;
    }
    row.set(ActionsUtils.getTargetColumnId(context), newValue);
}
Also used : LocalDateTime(java.time.LocalDateTime) ColumnMetadata(org.talend.dataprep.api.dataset.ColumnMetadata) DateTimeException(java.time.DateTimeException) Temporal(java.time.temporal.Temporal) TemporalUnit(java.time.temporal.TemporalUnit) RowMetadata(org.talend.dataprep.api.dataset.RowMetadata)

Example 3 with RowMetadata

use of org.talend.dataprep.api.dataset.RowMetadata in project data-prep by Talend.

the class DomainChange method applyOnColumn.

/**
 * @see ColumnAction#applyOnColumn(DataSetRow, ActionContext)
 */
@Override
public void applyOnColumn(DataSetRow row, ActionContext context) {
    final String columnId = context.getColumnId();
    final Map<String, String> parameters = context.getParameters();
    LOGGER.debug("DomainChange for columnId {} with parameters {} ", columnId, parameters);
    final RowMetadata rowMetadata = context.getRowMetadata();
    final ColumnMetadata columnMetadata = rowMetadata.getById(columnId);
    final String newDomainId = parameters.get(NEW_DOMAIN_ID_PARAMETER_KEY);
    if (StringUtils.isNotEmpty(newDomainId)) {
        columnMetadata.setDomain(newDomainId);
        columnMetadata.setDomainLabel(parameters.get(NEW_DOMAIN_LABEL_PARAMETER_KEY));
        columnMetadata.setDomainFrequency(0);
        columnMetadata.setDomainForced(true);
    }
    rowMetadata.update(columnId, columnMetadata);
    context.setActionStatus(ActionContext.ActionStatus.DONE);
}
Also used : ColumnMetadata(org.talend.dataprep.api.dataset.ColumnMetadata) RowMetadata(org.talend.dataprep.api.dataset.RowMetadata)

Example 4 with RowMetadata

use of org.talend.dataprep.api.dataset.RowMetadata in project data-prep by Talend.

the class Swap method applyOnColumn.

@Override
public void applyOnColumn(DataSetRow row, ActionContext context) {
    RowMetadata rowMetadata = context.getRowMetadata();
    Map<String, String> parameters = context.getParameters();
    ColumnMetadata selectedColumn = rowMetadata.getById(parameters.get(SELECTED_COLUMN_PARAMETER));
    if (selectedColumn == null) {
        return;
    }
    final String columnId = context.getColumnId();
    LOGGER.debug("swapping columns {} <-> {}", columnId, selectedColumn.getId());
    String columnValue = row.get(columnId);
    String selectedColumnValue = row.get(selectedColumn.getId());
    row.set(columnId, selectedColumnValue == null ? StringUtils.EMPTY : selectedColumnValue);
    row.set(selectedColumn.getId(), columnValue == null ? StringUtils.EMPTY : columnValue);
}
Also used : ColumnMetadata(org.talend.dataprep.api.dataset.ColumnMetadata) RowMetadata(org.talend.dataprep.api.dataset.RowMetadata)

Example 5 with RowMetadata

use of org.talend.dataprep.api.dataset.RowMetadata in project data-prep by Talend.

the class AbstractActionMetadata method compile.

/**
 * Called by transformation process <b>before</b> the first transformation occurs. This method allows action
 * implementation to compute reusable objects in actual transformation execution. Implementations may also indicate
 * that action is not applicable and should be discarded ( {@link ActionContext.ActionStatus#CANCELED}.
 *
 * @param actionContext The action context that contains the parameters and allows compile step to change action
 * status.
 * @see ActionContext#setActionStatus(ActionContext.ActionStatus)
 */
@Override
public void compile(ActionContext actionContext) {
    final RowMetadata input = actionContext.getRowMetadata();
    final ScopeCategory scope = actionContext.getScope();
    if (scope != null) {
        switch(scope) {
            case CELL:
            case COLUMN:
                // Stop action if: there's actually column information in input AND column is not found
                if (input != null && !input.getColumns().isEmpty() && input.getById(actionContext.getColumnId()) == null) {
                    actionContext.setActionStatus(ActionContext.ActionStatus.CANCELED);
                    return;
                }
                break;
            case LINE:
            case DATASET:
            default:
                break;
        }
    }
    actionContext.setActionStatus(ActionContext.ActionStatus.OK);
}
Also used : ScopeCategory(org.talend.dataprep.transformation.actions.category.ScopeCategory) RowMetadata(org.talend.dataprep.api.dataset.RowMetadata)

Aggregations

RowMetadata (org.talend.dataprep.api.dataset.RowMetadata)199 Test (org.junit.Test)130 DataSetRow (org.talend.dataprep.api.dataset.row.DataSetRow)112 ColumnMetadata (org.talend.dataprep.api.dataset.ColumnMetadata)87 AbstractMetadataBaseTest (org.talend.dataprep.transformation.actions.AbstractMetadataBaseTest)68 HashMap (java.util.HashMap)48 ActionContext (org.talend.dataprep.transformation.api.action.context.ActionContext)21 DataSetMetadata (org.talend.dataprep.api.dataset.DataSetMetadata)20 RunnableAction (org.talend.dataprep.transformation.actions.common.RunnableAction)19 TransformationContext (org.talend.dataprep.transformation.api.action.context.TransformationContext)18 ArrayList (java.util.ArrayList)16 DataSet (org.talend.dataprep.api.dataset.DataSet)11 List (java.util.List)9 Statistics (org.talend.dataprep.api.dataset.statistics.Statistics)9 Before (org.junit.Before)7 SemanticDomain (org.talend.dataprep.api.dataset.statistics.SemanticDomain)6 Preparation (org.talend.dataprep.api.preparation.Preparation)6 Response (com.jayway.restassured.response.Response)5 ByteArrayOutputStream (java.io.ByteArrayOutputStream)5 Logger (org.slf4j.Logger)5