Search in sources :

Example 1 with ColumnMetadata

use of org.talend.dataprep.api.dataset.ColumnMetadata in project data-prep by Talend.

the class StandardizeInvalid method applyOnColumn.

@Override
public void applyOnColumn(DataSetRow row, ActionContext context) {
    if (isApplicable(row, context)) {
        final String columnId = context.getColumnId();
        final String value = row.get(columnId);
        final RowMetadata rowMetadata = context.getRowMetadata();
        final ColumnMetadata column = rowMetadata.getById(columnId);
        final Double threshold = context.get(MATCH_THRESHOLD_KEY);
        String closestValue = CategoryRegistryManager.getInstance().findMostSimilarValue(value, column.getDomain(), threshold);
        // If not found the similar value, display original value.
        if (!StringUtils.isEmpty(closestValue)) {
            row.set(columnId, closestValue);
        }
    }
}
Also used : ColumnMetadata(org.talend.dataprep.api.dataset.ColumnMetadata) RowMetadata(org.talend.dataprep.api.dataset.RowMetadata)

Example 2 with ColumnMetadata

use of org.talend.dataprep.api.dataset.ColumnMetadata in project data-prep by Talend.

the class ComputeTimeSince method applyOnColumn.

@Override
public void applyOnColumn(DataSetRow row, ActionContext context) {
    RowMetadata rowMetadata = context.getRowMetadata();
    Map<String, String> parameters = context.getParameters();
    String columnId = context.getColumnId();
    TemporalUnit unit = ChronoUnit.valueOf(parameters.get(TIME_UNIT_PARAMETER).toUpperCase());
    String newValue;
    try {
        String mode = context.get(SINCE_WHEN_PARAMETER);
        LocalDateTime since;
        switch(mode) {
            case OTHER_COLUMN_MODE:
                ColumnMetadata selectedColumn = rowMetadata.getById(parameters.get(SELECTED_COLUMN_PARAMETER));
                String dateToCompare = row.get(selectedColumn.getId());
                since = Providers.get().parse(dateToCompare, selectedColumn);
                break;
            case SPECIFIC_DATE_MODE:
            case NOW_SERVER_SIDE_MODE:
            default:
                since = context.get(SINCE_DATE_PARAMETER);
                break;
        }
        // parse the date
        if (since == null) {
            newValue = StringUtils.EMPTY;
        } else {
            String value = row.get(columnId);
            LocalDateTime temporalAccessor = Providers.get().parse(value, context.getRowMetadata().getById(columnId));
            Temporal valueAsDate = LocalDateTime.from(temporalAccessor);
            newValue = String.valueOf(unit.between(valueAsDate, since));
        }
    } catch (DateTimeException e) {
        LOGGER.trace("Error on dateTime parsing", e);
        // Nothing to do: in this case, temporalAccessor is left null
        newValue = StringUtils.EMPTY;
    }
    row.set(ActionsUtils.getTargetColumnId(context), newValue);
}
Also used : LocalDateTime(java.time.LocalDateTime) ColumnMetadata(org.talend.dataprep.api.dataset.ColumnMetadata) DateTimeException(java.time.DateTimeException) Temporal(java.time.temporal.Temporal) TemporalUnit(java.time.temporal.TemporalUnit) RowMetadata(org.talend.dataprep.api.dataset.RowMetadata)

Example 3 with ColumnMetadata

use of org.talend.dataprep.api.dataset.ColumnMetadata in project data-prep by Talend.

the class DomainChange method applyOnColumn.

/**
 * @see ColumnAction#applyOnColumn(DataSetRow, ActionContext)
 */
@Override
public void applyOnColumn(DataSetRow row, ActionContext context) {
    final String columnId = context.getColumnId();
    final Map<String, String> parameters = context.getParameters();
    LOGGER.debug("DomainChange for columnId {} with parameters {} ", columnId, parameters);
    final RowMetadata rowMetadata = context.getRowMetadata();
    final ColumnMetadata columnMetadata = rowMetadata.getById(columnId);
    final String newDomainId = parameters.get(NEW_DOMAIN_ID_PARAMETER_KEY);
    if (StringUtils.isNotEmpty(newDomainId)) {
        columnMetadata.setDomain(newDomainId);
        columnMetadata.setDomainLabel(parameters.get(NEW_DOMAIN_LABEL_PARAMETER_KEY));
        columnMetadata.setDomainFrequency(0);
        columnMetadata.setDomainForced(true);
    }
    rowMetadata.update(columnId, columnMetadata);
    context.setActionStatus(ActionContext.ActionStatus.DONE);
}
Also used : ColumnMetadata(org.talend.dataprep.api.dataset.ColumnMetadata) RowMetadata(org.talend.dataprep.api.dataset.RowMetadata)

Example 4 with ColumnMetadata

use of org.talend.dataprep.api.dataset.ColumnMetadata in project data-prep by Talend.

the class ReorderColumn method swapColumnMetadata.

protected void swapColumnMetadata(ColumnMetadata originColumn, ColumnMetadata targetColumn) throws Exception {
    ColumnMetadata targetColumnCopy = ColumnMetadata.Builder.column().copy(targetColumn).build();
    ColumnMetadata originColumnCopy = ColumnMetadata.Builder.column().copy(originColumn).build();
    BeanUtils.copyProperties(targetColumn, originColumn);
    BeanUtils.copyProperties(originColumn, targetColumnCopy);
    Statistics originalStatistics = originColumnCopy.getStatistics();
    Statistics targetStatistics = targetColumnCopy.getStatistics();
    BeanUtils.copyProperties(targetColumn.getStatistics(), originalStatistics);
    BeanUtils.copyProperties(originColumn.getStatistics(), targetStatistics);
    Quality originalQuality = originColumnCopy.getQuality();
    Quality targetQualityCopty = targetColumnCopy.getQuality();
    BeanUtils.copyProperties(targetColumn.getQuality(), originalQuality);
    BeanUtils.copyProperties(originColumn.getQuality(), targetQualityCopty);
}
Also used : ColumnMetadata(org.talend.dataprep.api.dataset.ColumnMetadata) Quality(org.talend.dataprep.api.dataset.Quality) Statistics(org.talend.dataprep.api.dataset.statistics.Statistics)

Example 5 with ColumnMetadata

use of org.talend.dataprep.api.dataset.ColumnMetadata in project data-prep by Talend.

the class Swap method applyOnColumn.

@Override
public void applyOnColumn(DataSetRow row, ActionContext context) {
    RowMetadata rowMetadata = context.getRowMetadata();
    Map<String, String> parameters = context.getParameters();
    ColumnMetadata selectedColumn = rowMetadata.getById(parameters.get(SELECTED_COLUMN_PARAMETER));
    if (selectedColumn == null) {
        return;
    }
    final String columnId = context.getColumnId();
    LOGGER.debug("swapping columns {} <-> {}", columnId, selectedColumn.getId());
    String columnValue = row.get(columnId);
    String selectedColumnValue = row.get(selectedColumn.getId());
    row.set(columnId, selectedColumnValue == null ? StringUtils.EMPTY : selectedColumnValue);
    row.set(selectedColumn.getId(), columnValue == null ? StringUtils.EMPTY : columnValue);
}
Also used : ColumnMetadata(org.talend.dataprep.api.dataset.ColumnMetadata) RowMetadata(org.talend.dataprep.api.dataset.RowMetadata)

Aggregations

ColumnMetadata (org.talend.dataprep.api.dataset.ColumnMetadata)320 Test (org.junit.Test)217 AbstractMetadataBaseTest (org.talend.dataprep.transformation.actions.AbstractMetadataBaseTest)115 RowMetadata (org.talend.dataprep.api.dataset.RowMetadata)86 DataSetRow (org.talend.dataprep.api.dataset.row.DataSetRow)80 DataSetMetadata (org.talend.dataprep.api.dataset.DataSetMetadata)48 InputStream (java.io.InputStream)25 Type (org.talend.dataprep.api.type.Type)23 DataSetBaseTest (org.talend.dataprep.dataset.DataSetBaseTest)22 ArrayList (java.util.ArrayList)19 HashMap (java.util.HashMap)17 IOException (java.io.IOException)14 TDPException (org.talend.dataprep.exception.TDPException)14 Schema (org.talend.dataprep.schema.Schema)14 Autowired (org.springframework.beans.factory.annotation.Autowired)13 Logger (org.slf4j.Logger)12 LoggerFactory (org.slf4j.LoggerFactory)12 SemanticDomain (org.talend.dataprep.api.dataset.statistics.SemanticDomain)12 DataSetServiceTest (org.talend.dataprep.dataset.service.DataSetServiceTest)11 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)10