use of org.talend.dataprep.api.dataset.ColumnMetadata in project data-prep by Talend.
the class ActionsUtils method createNewColumnsImpl.
private static Map<String, String> createNewColumnsImpl(ActionContext context, List<AdditionalColumn> additionalColumns, String columnId, RowMetadata rowMetadata) {
final Map<String, String> cols = new HashMap<>();
// id of the column to put the new one after, initially the current column
String nextId = columnId;
for (AdditionalColumn additionalColumn : additionalColumns) {
ColumnMetadata.Builder brandNewColumnBuilder = ColumnMetadata.Builder.column();
// it's often important to copy the original column type for the action which needs statistics
if (additionalColumn.getCopyMetadataFromId() != null) {
ColumnMetadata newColumn = context.getRowMetadata().getById(additionalColumn.getCopyMetadataFromId());
brandNewColumnBuilder.copy(newColumn).computedId(StringUtils.EMPTY);
brandNewColumnBuilder.type(Type.get(newColumn.getType()));
} else {
brandNewColumnBuilder.type(additionalColumn.getType());
}
brandNewColumnBuilder.name(additionalColumn.getName());
ColumnMetadata columnMetadata = brandNewColumnBuilder.build();
rowMetadata.insertAfter(nextId, columnMetadata);
// the new column to put next one after, is the fresh new one
nextId = columnMetadata.getId();
cols.put(additionalColumn.getKey(), columnMetadata.getId());
}
return cols;
}
use of org.talend.dataprep.api.dataset.ColumnMetadata in project data-prep by Talend.
the class ClearMatching method toClear.
@Override
public boolean toClear(DataSetRow dataSetRow, String columnId, ActionContext actionContext) {
final Map<String, String> parameters = actionContext.getParameters();
final RowMetadata rowMetadata = actionContext.getRowMetadata();
final ColumnMetadata columnMetadata = rowMetadata.getById(columnId);
final String value = dataSetRow.get(columnId);
final String equalsValue = parameters.get(VALUE_PARAMETER);
if (Type.get(columnMetadata.getType()) == Type.BOOLEAN) {
// for boolean we can accept True equalsIgnoreCase true
return StringUtils.equalsIgnoreCase(value, equalsValue);
} else {
ReplaceOnValueHelper replaceOnValueHelper = new ReplaceOnValueHelper().build(equalsValue, true);
return replaceOnValueHelper.matches(value);
}
}
use of org.talend.dataprep.api.dataset.ColumnMetadata in project data-prep by Talend.
the class Concat method getAdditionalColumns.
protected List<ActionsUtils.AdditionalColumn> getAdditionalColumns(ActionContext context) {
String result;
ColumnMetadata selectedColumn = context.getRowMetadata().getById(context.getParameters().get(SELECTED_COLUMN_PARAMETER));
String sourceColumnName = context.getColumnName();
final Map<String, String> parameters = context.getParameters();
final String prefix = getParameter(parameters, PREFIX_PARAMETER, StringUtils.EMPTY);
final String suffix = getParameter(parameters, SUFFIX_PARAMETER, StringUtils.EMPTY);
if (parameters.get(MODE_PARAMETER).equals(OTHER_COLUMN_MODE)) {
result = sourceColumnName + COLUMN_NAMES_SEPARATOR + selectedColumn.getName();
} else {
result = prefix + sourceColumnName + suffix;
}
return singletonList(ActionsUtils.additionalColumn().withName(result));
}
use of org.talend.dataprep.api.dataset.ColumnMetadata in project data-prep by Talend.
the class MakeLineHeader method setRemainingRowColumnsNames.
private void setRemainingRowColumnsNames(ActionContext context) {
for (ColumnMetadata column : context.getRowMetadata().getColumns()) {
if (!context.has(column.getId())) {
// Action hasn't yet found new headers
break;
}
String newColumnName = context.get(column.getId());
column.setName(newColumnName);
}
}
use of org.talend.dataprep.api.dataset.ColumnMetadata in project data-prep by Talend.
the class StatisticsAdapter method injectSemanticTypes.
private void injectSemanticTypes(final ColumnMetadata column, final Analyzers.Result result) {
if (result.exist(SemanticType.class) && !column.isDomainForced()) {
final SemanticType semanticType = result.get(SemanticType.class);
final List<CategoryFrequency> suggestedTypes = semanticType.getSuggestedCategories();
// TDP-471: Don't pick semantic type if lower than a threshold.
final Optional<CategoryFrequency> bestMatch = //
suggestedTypes.stream().filter(//
e -> !e.getCategoryName().isEmpty()).findFirst();
if (bestMatch.isPresent()) {
// TODO (TDP-734) Take into account limit of the semantic analyzer.
final float score = bestMatch.get().getScore();
if (score > semanticThreshold) {
updateMetadataWithCategoryInfo(column, bestMatch.get());
} else {
// Ensure the domain is cleared if score is lower than threshold (earlier analysis - e.g.
// on the first 20 lines - may be over threshold, but full scan may decide otherwise.
resetDomain(column);
}
} else if (StringUtils.isNotEmpty(column.getDomain())) {
// Column *had* a domain but seems like new analysis removed it.
resetDomain(column);
}
// Keep all suggested semantic categories in the column metadata
List<SemanticDomain> semanticDomains = //
suggestedTypes.stream().map(//
this::toSemanticDomain).filter(//
semanticDomain -> semanticDomain != null && semanticDomain.getScore() >= 1).limit(//
10).collect(Collectors.toList());
column.setSemanticDomains(semanticDomains);
}
}
Aggregations