Search in sources :

Example 1 with AggregationResult

use of org.talend.dataprep.transformation.aggregation.api.AggregationResult in project data-prep by Talend.

the class AggregationService method aggregate.

/**
 * Process an aggregation.
 *
 * @param parameters the aggregation parameters.
 * @param dataset the dataset input.
 * @return the aggregation result.
 */
public AggregationResult aggregate(AggregationParameters parameters, DataSet dataset) {
    // check the parameters
    if (parameters.getOperations().isEmpty() || parameters.getGroupBy().isEmpty()) {
        throw new TDPException(CommonErrorCodes.BAD_AGGREGATION_PARAMETERS);
    }
    AggregationResult result = new AggregationResult(parameters.getOperations().get(0).getOperator());
    // get the aggregator
    Aggregator aggregator = factory.get(parameters);
    // Build optional filter
    final DataSetMetadata metadata = dataset.getMetadata();
    final RowMetadata rowMetadata = metadata != null ? metadata.getRowMetadata() : new RowMetadata();
    final Predicate<DataSetRow> filter = filterService.build(parameters.getFilter(), rowMetadata);
    // process the dataset
    dataset.getRecords().filter(filter).forEach(row -> aggregator.accept(row, result));
    // Normalize result (perform clean / optimization now that all input was processed).
    aggregator.normalize(result);
    return result;
}
Also used : TDPException(org.talend.dataprep.exception.TDPException) AggregationResult(org.talend.dataprep.transformation.aggregation.api.AggregationResult) Aggregator(org.talend.dataprep.transformation.aggregation.operation.Aggregator) RowMetadata(org.talend.dataprep.api.dataset.RowMetadata) DataSetMetadata(org.talend.dataprep.api.dataset.DataSetMetadata) DataSetRow(org.talend.dataprep.api.dataset.row.DataSetRow)

Example 2 with AggregationResult

use of org.talend.dataprep.transformation.aggregation.api.AggregationResult in project data-prep by Talend.

the class AverageTest method shouldRemoveEmptyDuringAverageNormalization.

@Test
public void shouldRemoveEmptyDuringAverageNormalization() {
    AggregationResult result = new AggregationResult(Operator.AVERAGE);
    aggregator.accept(getRow("toto", "10"), result);
    aggregator.accept(getRow("toto", "0"), result);
    aggregator.accept(getRow("empty", ""), result);
    aggregator.accept(getRow("empty", ""), result);
    final Average.AverageContext toto = (Average.AverageContext) result.get("toto");
    assertEquals(5d, toto.getValue(), 0d);
    final Average.AverageContext empty = (Average.AverageContext) result.get("empty");
    assertEquals(Double.NaN, empty.getValue(), 0d);
    // Empty results should be removed when normalize() is called on result
    aggregator.normalize(result);
    assertEquals(null, result.get("empty"));
}
Also used : AggregationResult(org.talend.dataprep.transformation.aggregation.api.AggregationResult) Test(org.junit.Test)

Example 3 with AggregationResult

use of org.talend.dataprep.transformation.aggregation.api.AggregationResult in project data-prep by Talend.

the class MaxTest method shouldComputeMin.

@Test
public void shouldComputeMin() {
    // given when
    AggregationResult result = new AggregationResult(Operator.MAX);
    aggregator.accept(getRow("toto", "5123.4"), result);
    aggregator.accept(getRow("tata", "-50.2"), result);
    aggregator.accept(getRow("toto", "786.884"), result);
    aggregator.accept(getRow("tata", "-0.2"), result);
    // <-- max here for toto
    aggregator.accept(getRow("toto", "41843.453"), result);
    aggregator.accept(getRow("toto", "0"), result);
    aggregator.accept(getRow("tata", "20"), result);
    aggregator.accept(getRow("toto", "-1"), result);
    aggregator.accept(getRow("toto", "8.87"), result);
    // <-- max here for tata
    aggregator.accept(getRow("tata", "875"), result);
    aggregator.accept(getRow("toto", "-0.01"), result);
    // <-- should not be part of the result
    aggregator.accept(getRow("tutu", "sdfs"), result);
    // then
    Assert.assertEquals(result.get("toto").getValue(), 41843.453, 0);
    Assert.assertEquals(result.get("tata").getValue(), 875, 0);
    Assert.assertNull(result.get("sdfs"));
}
Also used : AggregationResult(org.talend.dataprep.transformation.aggregation.api.AggregationResult) Test(org.junit.Test)

Example 4 with AggregationResult

use of org.talend.dataprep.transformation.aggregation.api.AggregationResult in project data-prep by Talend.

the class MaxTest method shouldNormalizeHaveNoEffect.

@Test
public void shouldNormalizeHaveNoEffect() {
    // given when
    AggregationResult result = new AggregationResult(Operator.MAX);
    aggregator.accept(getRow("toto", "5123.4"), result);
    aggregator.accept(getRow("tata", "-50.2"), result);
    aggregator.accept(getRow("toto", "786.884"), result);
    aggregator.accept(getRow("tata", "-0.2"), result);
    // <-- max here for toto
    aggregator.accept(getRow("toto", "41843.453"), result);
    aggregator.accept(getRow("toto", "0"), result);
    aggregator.accept(getRow("tata", "20"), result);
    aggregator.accept(getRow("toto", "-1"), result);
    aggregator.accept(getRow("toto", "8.87"), result);
    // <-- max here for tata
    aggregator.accept(getRow("tata", "875"), result);
    aggregator.accept(getRow("toto", "-0.01"), result);
    // <-- should not be part of the result
    aggregator.accept(getRow("tutu", "sdfs"), result);
    // No effect for operation
    aggregator.normalize(result);
    // then
    Assert.assertEquals(result.get("toto").getValue(), 41843.453, 0);
    Assert.assertEquals(result.get("tata").getValue(), 875, 0);
    Assert.assertNull(result.get("sdfs"));
}
Also used : AggregationResult(org.talend.dataprep.transformation.aggregation.api.AggregationResult) Test(org.junit.Test)

Example 5 with AggregationResult

use of org.talend.dataprep.transformation.aggregation.api.AggregationResult in project data-prep by Talend.

the class MinTest method shouldComputeMin.

@Test
public void shouldComputeMin() {
    // given when
    AggregationResult result = new AggregationResult(Operator.MIN);
    aggregator.accept(getRow("toto", "5123.4"), result);
    // <-- min here for tata
    aggregator.accept(getRow("tata", "-50.2"), result);
    aggregator.accept(getRow("toto", "786.884"), result);
    aggregator.accept(getRow("tata", "-0.2"), result);
    aggregator.accept(getRow("toto", "41843.453"), result);
    aggregator.accept(getRow("toto", "0"), result);
    aggregator.accept(getRow("tata", "20"), result);
    // <-- min here for toto
    aggregator.accept(getRow("toto", "-1"), result);
    aggregator.accept(getRow("toto", "8.87"), result);
    aggregator.accept(getRow("tata", "875"), result);
    aggregator.accept(getRow("toto", "-0.01"), result);
    // <-- should not be part of the result
    aggregator.accept(getRow("tutu", "dqsfqs"), result);
    // then
    Assert.assertEquals(result.get("toto").getValue(), -1, 0);
    Assert.assertEquals(result.get("tata").getValue(), -50.2, 0);
    Assert.assertNull(result.get("tutu"));
}
Also used : AggregationResult(org.talend.dataprep.transformation.aggregation.api.AggregationResult) Test(org.junit.Test)

Aggregations

AggregationResult (org.talend.dataprep.transformation.aggregation.api.AggregationResult)9 Test (org.junit.Test)8 DataSetMetadata (org.talend.dataprep.api.dataset.DataSetMetadata)1 RowMetadata (org.talend.dataprep.api.dataset.RowMetadata)1 DataSetRow (org.talend.dataprep.api.dataset.row.DataSetRow)1 TDPException (org.talend.dataprep.exception.TDPException)1 Aggregator (org.talend.dataprep.transformation.aggregation.operation.Aggregator)1