Search in sources :

Example 11 with ColumnMetadata

use of org.talend.dataprep.api.dataset.ColumnMetadata in project data-prep by Talend.

the class DataSetRow method order.

/**
 * Order values of this data set row according to <code>columns</code>. This method clones the current record, so no
 * need to call {@link #clone()}.
 *
 * @param columns The columns to be used to order values.
 * @return A new data set row for method with values ordered following <code>columns</code>.
 */
public DataSetRow order(List<ColumnMetadata> columns) {
    if (columns == null) {
        throw new IllegalArgumentException("Columns cannot be null.");
    }
    if (columns.isEmpty()) {
        return this;
    }
    if (columns.size() < values.size() && (!values.containsKey(TDP_INVALID) || columns.size() + 1 < values().size())) {
        throw new IllegalArgumentException("Expected " + values.size() + " columns but got " + columns.size());
    }
    Map<String, String> orderedValues = new LinkedHashMap<>();
    for (ColumnMetadata column : columns) {
        final String id = column.getId();
        orderedValues.put(id, values.get(id));
    }
    final DataSetRow dataSetRow = new DataSetRow(rowMetadata);
    dataSetRow.values = orderedValues;
    return dataSetRow;
}
Also used : ColumnMetadata(org.talend.dataprep.api.dataset.ColumnMetadata)

Example 12 with ColumnMetadata

use of org.talend.dataprep.api.dataset.ColumnMetadata in project data-prep by Talend.

the class ColumnContextDeserializer method deserialize.

@Override
public List<ColumnMetadata> deserialize(JsonParser jsonParser, DeserializationContext deserializationContext) throws IOException {
    ObjectCodec oc = jsonParser.getCodec();
    final List<ColumnMetadata> columnMetadata = oc.readValue(jsonParser, new TypeReference<List<ColumnMetadata>>() {
    });
    deserializationContext.setAttribute(ColumnContextDeserializer.class.getName(), columnMetadata);
    return columnMetadata;
}
Also used : ColumnMetadata(org.talend.dataprep.api.dataset.ColumnMetadata) List(java.util.List) ObjectCodec(com.fasterxml.jackson.core.ObjectCodec)

Example 13 with ColumnMetadata

use of org.talend.dataprep.api.dataset.ColumnMetadata in project data-prep by Talend.

the class DataSetRowStreamDeserializer method deserialize.

@Override
public Stream<DataSetRow> deserialize(JsonParser jp, DeserializationContext context) {
    final List<ColumnMetadata> columns = (List<ColumnMetadata>) context.getAttribute(ColumnContextDeserializer.class.getName());
    final RowMetadata rowMetadata;
    if (columns == null) {
        rowMetadata = new RowMetadata();
    } else {
        rowMetadata = new RowMetadata(columns);
    }
    final Iterable<DataSetRow> rowIterable = () -> new DataSetRowIterator(jp, rowMetadata);
    return StreamSupport.stream(rowIterable.spliterator(), false);
}
Also used : ColumnMetadata(org.talend.dataprep.api.dataset.ColumnMetadata) RowMetadata(org.talend.dataprep.api.dataset.RowMetadata) List(java.util.List) DataSetRow(org.talend.dataprep.api.dataset.row.DataSetRow)

Example 14 with ColumnMetadata

use of org.talend.dataprep.api.dataset.ColumnMetadata in project data-prep by Talend.

the class SimpleFilterService method createDateRangePredicate.

/**
 * Create a predicate that checks if the date value is within a range [min, max[
 *
 * @param columnId The column id
 * @param start The start value
 * @param end The end value
 * @return The date range predicate
 */
private Predicate<DataSetRow> createDateRangePredicate(final String columnId, final String start, final String end, final RowMetadata rowMetadata) {
    try {
        final long minTimestamp = Long.parseLong(start);
        final long maxTimestamp = Long.parseLong(end);
        final LocalDateTime minDate = DateManipulator.fromEpochMillisecondsWithSystemOffset(minTimestamp);
        final LocalDateTime maxDate = DateManipulator.fromEpochMillisecondsWithSystemOffset(maxTimestamp);
        return safeDate(r -> {
            final ColumnMetadata columnMetadata = rowMetadata.getById(columnId);
            final LocalDateTime columnValue = getDateParser().parse(r.get(columnId), columnMetadata);
            return minDate.compareTo(columnValue) == 0 || (minDate.isBefore(columnValue) && maxDate.isAfter(columnValue));
        });
    } catch (Exception e) {
        LOGGER.debug("Unable to create date range predicate.", e);
        throw new IllegalArgumentException("Unsupported query, malformed date 'range' (expected timestamps in min and max properties).");
    }
}
Also used : LocalDateTime(java.time.LocalDateTime) ColumnMetadata(org.talend.dataprep.api.dataset.ColumnMetadata) ParseException(java.text.ParseException) DateTimeException(java.time.DateTimeException) TalendRuntimeException(org.talend.daikon.exception.TalendRuntimeException)

Example 15 with ColumnMetadata

use of org.talend.dataprep.api.dataset.ColumnMetadata in project data-prep by Talend.

the class AnalyzerService method buildPatternAnalyzer.

private static AbstractFrequencyAnalyzer buildPatternAnalyzer(List<ColumnMetadata> columns) {
    // deal with specific date, even custom date pattern
    final DateTimePatternRecognizer dateTimePatternFrequencyAnalyzer = new DateTimePatternRecognizer();
    List<String> patterns = new ArrayList<>(columns.size());
    for (ColumnMetadata column : columns) {
        final String pattern = RowMetadataUtils.getMostUsedDatePattern(column);
        if (StringUtils.isNotBlank(pattern)) {
            patterns.add(pattern);
        }
    }
    dateTimePatternFrequencyAnalyzer.addCustomDateTimePatterns(patterns);
    // warning, the order is important
    List<AbstractPatternRecognizer> patternFrequencyAnalyzers = new ArrayList<>();
    patternFrequencyAnalyzers.add(new EmptyPatternRecognizer());
    patternFrequencyAnalyzers.add(dateTimePatternFrequencyAnalyzer);
    patternFrequencyAnalyzers.add(new LatinExtendedCharPatternRecognizer());
    return new CompositePatternFrequencyAnalyzer(patternFrequencyAnalyzers, TypeUtils.convert(columns));
}
Also used : CompositePatternFrequencyAnalyzer(org.talend.dataquality.statistics.frequency.pattern.CompositePatternFrequencyAnalyzer) AbstractPatternRecognizer(org.talend.dataquality.statistics.frequency.recognition.AbstractPatternRecognizer) DateTimePatternRecognizer(org.talend.dataquality.statistics.frequency.recognition.DateTimePatternRecognizer) ColumnMetadata(org.talend.dataprep.api.dataset.ColumnMetadata) LatinExtendedCharPatternRecognizer(org.talend.dataquality.statistics.frequency.recognition.LatinExtendedCharPatternRecognizer) EmptyPatternRecognizer(org.talend.dataquality.statistics.frequency.recognition.EmptyPatternRecognizer)

Aggregations

ColumnMetadata (org.talend.dataprep.api.dataset.ColumnMetadata)320 Test (org.junit.Test)217 AbstractMetadataBaseTest (org.talend.dataprep.transformation.actions.AbstractMetadataBaseTest)115 RowMetadata (org.talend.dataprep.api.dataset.RowMetadata)86 DataSetRow (org.talend.dataprep.api.dataset.row.DataSetRow)80 DataSetMetadata (org.talend.dataprep.api.dataset.DataSetMetadata)48 InputStream (java.io.InputStream)25 Type (org.talend.dataprep.api.type.Type)23 DataSetBaseTest (org.talend.dataprep.dataset.DataSetBaseTest)22 ArrayList (java.util.ArrayList)19 HashMap (java.util.HashMap)17 IOException (java.io.IOException)14 TDPException (org.talend.dataprep.exception.TDPException)14 Schema (org.talend.dataprep.schema.Schema)14 Autowired (org.springframework.beans.factory.annotation.Autowired)13 Logger (org.slf4j.Logger)12 LoggerFactory (org.slf4j.LoggerFactory)12 SemanticDomain (org.talend.dataprep.api.dataset.statistics.SemanticDomain)12 DataSetServiceTest (org.talend.dataprep.dataset.service.DataSetServiceTest)11 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)10