use of org.talend.dataprep.api.dataset.ColumnMetadata in project data-prep by Talend.
the class DataSetRow method order.
/**
* Order values of this data set row according to <code>columns</code>. This method clones the current record, so no
* need to call {@link #clone()}.
*
* @param columns The columns to be used to order values.
* @return A new data set row for method with values ordered following <code>columns</code>.
*/
public DataSetRow order(List<ColumnMetadata> columns) {
if (columns == null) {
throw new IllegalArgumentException("Columns cannot be null.");
}
if (columns.isEmpty()) {
return this;
}
if (columns.size() < values.size() && (!values.containsKey(TDP_INVALID) || columns.size() + 1 < values().size())) {
throw new IllegalArgumentException("Expected " + values.size() + " columns but got " + columns.size());
}
Map<String, String> orderedValues = new LinkedHashMap<>();
for (ColumnMetadata column : columns) {
final String id = column.getId();
orderedValues.put(id, values.get(id));
}
final DataSetRow dataSetRow = new DataSetRow(rowMetadata);
dataSetRow.values = orderedValues;
return dataSetRow;
}
use of org.talend.dataprep.api.dataset.ColumnMetadata in project data-prep by Talend.
the class ColumnContextDeserializer method deserialize.
@Override
public List<ColumnMetadata> deserialize(JsonParser jsonParser, DeserializationContext deserializationContext) throws IOException {
ObjectCodec oc = jsonParser.getCodec();
final List<ColumnMetadata> columnMetadata = oc.readValue(jsonParser, new TypeReference<List<ColumnMetadata>>() {
});
deserializationContext.setAttribute(ColumnContextDeserializer.class.getName(), columnMetadata);
return columnMetadata;
}
use of org.talend.dataprep.api.dataset.ColumnMetadata in project data-prep by Talend.
the class DataSetRowStreamDeserializer method deserialize.
@Override
public Stream<DataSetRow> deserialize(JsonParser jp, DeserializationContext context) {
final List<ColumnMetadata> columns = (List<ColumnMetadata>) context.getAttribute(ColumnContextDeserializer.class.getName());
final RowMetadata rowMetadata;
if (columns == null) {
rowMetadata = new RowMetadata();
} else {
rowMetadata = new RowMetadata(columns);
}
final Iterable<DataSetRow> rowIterable = () -> new DataSetRowIterator(jp, rowMetadata);
return StreamSupport.stream(rowIterable.spliterator(), false);
}
use of org.talend.dataprep.api.dataset.ColumnMetadata in project data-prep by Talend.
the class SimpleFilterService method createDateRangePredicate.
/**
* Create a predicate that checks if the date value is within a range [min, max[
*
* @param columnId The column id
* @param start The start value
* @param end The end value
* @return The date range predicate
*/
private Predicate<DataSetRow> createDateRangePredicate(final String columnId, final String start, final String end, final RowMetadata rowMetadata) {
try {
final long minTimestamp = Long.parseLong(start);
final long maxTimestamp = Long.parseLong(end);
final LocalDateTime minDate = DateManipulator.fromEpochMillisecondsWithSystemOffset(minTimestamp);
final LocalDateTime maxDate = DateManipulator.fromEpochMillisecondsWithSystemOffset(maxTimestamp);
return safeDate(r -> {
final ColumnMetadata columnMetadata = rowMetadata.getById(columnId);
final LocalDateTime columnValue = getDateParser().parse(r.get(columnId), columnMetadata);
return minDate.compareTo(columnValue) == 0 || (minDate.isBefore(columnValue) && maxDate.isAfter(columnValue));
});
} catch (Exception e) {
LOGGER.debug("Unable to create date range predicate.", e);
throw new IllegalArgumentException("Unsupported query, malformed date 'range' (expected timestamps in min and max properties).");
}
}
use of org.talend.dataprep.api.dataset.ColumnMetadata in project data-prep by Talend.
the class AnalyzerService method buildPatternAnalyzer.
private static AbstractFrequencyAnalyzer buildPatternAnalyzer(List<ColumnMetadata> columns) {
// deal with specific date, even custom date pattern
final DateTimePatternRecognizer dateTimePatternFrequencyAnalyzer = new DateTimePatternRecognizer();
List<String> patterns = new ArrayList<>(columns.size());
for (ColumnMetadata column : columns) {
final String pattern = RowMetadataUtils.getMostUsedDatePattern(column);
if (StringUtils.isNotBlank(pattern)) {
patterns.add(pattern);
}
}
dateTimePatternFrequencyAnalyzer.addCustomDateTimePatterns(patterns);
// warning, the order is important
List<AbstractPatternRecognizer> patternFrequencyAnalyzers = new ArrayList<>();
patternFrequencyAnalyzers.add(new EmptyPatternRecognizer());
patternFrequencyAnalyzers.add(dateTimePatternFrequencyAnalyzer);
patternFrequencyAnalyzers.add(new LatinExtendedCharPatternRecognizer());
return new CompositePatternFrequencyAnalyzer(patternFrequencyAnalyzers, TypeUtils.convert(columns));
}
Aggregations