Search in sources :

Example 16 with Row

use of io.druid.data.input.Row in project druid by druid-io.

the class DefaultLimitSpec method makeComparator.

private Ordering<Row> makeComparator(List<DimensionSpec> dimensions, List<AggregatorFactory> aggs, List<PostAggregator> postAggs) {
    Ordering<Row> ordering = new Ordering<Row>() {

        @Override
        public int compare(Row left, Row right) {
            return Longs.compare(left.getTimestampFromEpoch(), right.getTimestampFromEpoch());
        }
    };
    Map<String, DimensionSpec> dimensionsMap = Maps.newHashMap();
    for (DimensionSpec spec : dimensions) {
        dimensionsMap.put(spec.getOutputName(), spec);
    }
    Map<String, AggregatorFactory> aggregatorsMap = Maps.newHashMap();
    for (final AggregatorFactory agg : aggs) {
        aggregatorsMap.put(agg.getName(), agg);
    }
    Map<String, PostAggregator> postAggregatorsMap = Maps.newHashMap();
    for (PostAggregator postAgg : postAggs) {
        postAggregatorsMap.put(postAgg.getName(), postAgg);
    }
    for (OrderByColumnSpec columnSpec : columns) {
        String columnName = columnSpec.getDimension();
        Ordering<Row> nextOrdering = null;
        if (postAggregatorsMap.containsKey(columnName)) {
            nextOrdering = metricOrdering(columnName, postAggregatorsMap.get(columnName).getComparator());
        } else if (aggregatorsMap.containsKey(columnName)) {
            nextOrdering = metricOrdering(columnName, aggregatorsMap.get(columnName).getComparator());
        } else if (dimensionsMap.containsKey(columnName)) {
            nextOrdering = dimensionOrdering(columnName, columnSpec.getDimensionComparator());
        }
        if (nextOrdering == null) {
            throw new ISE("Unknown column in order clause[%s]", columnSpec);
        }
        switch(columnSpec.getDirection()) {
            case DESCENDING:
                nextOrdering = nextOrdering.reverse();
        }
        ordering = ordering.compound(nextOrdering);
    }
    return ordering;
}
Also used : DimensionSpec(io.druid.query.dimension.DimensionSpec) PostAggregator(io.druid.query.aggregation.PostAggregator) Ordering(com.google.common.collect.Ordering) ISE(io.druid.java.util.common.ISE) Row(io.druid.data.input.Row) AggregatorFactory(io.druid.query.aggregation.AggregatorFactory)

Example 17 with Row

use of io.druid.data.input.Row in project druid by druid-io.

the class GroupByStrategyV2 method mergeResults.

@Override
public Sequence<Row> mergeResults(final QueryRunner<Row> baseRunner, final GroupByQuery query, final Map<String, Object> responseContext) {
    // Merge streams using ResultMergeQueryRunner, then apply postaggregators, then apply limit (which may
    // involve materialization)
    final ResultMergeQueryRunner<Row> mergingQueryRunner = new ResultMergeQueryRunner<Row>(baseRunner) {

        @Override
        protected Ordering<Row> makeOrdering(Query<Row> queryParam) {
            return ((GroupByQuery) queryParam).getRowOrdering(true);
        }

        @Override
        protected BinaryFn<Row, Row, Row> createMergeFn(Query<Row> queryParam) {
            return new GroupByBinaryFnV2((GroupByQuery) queryParam);
        }
    };
    // Fudge timestamp, maybe.
    final DateTime fudgeTimestamp = getUniversalTimestamp(query);
    return query.applyLimit(Sequences.map(mergingQueryRunner.run(new GroupByQuery(query.getDataSource(), query.getQuerySegmentSpec(), query.getVirtualColumns(), query.getDimFilter(), query.getGranularity(), query.getDimensions(), query.getAggregatorSpecs(), // Don't do post aggs until the end of this method.
    ImmutableList.<PostAggregator>of(), // Don't do "having" clause until the end of this method.
    null, null, query.getContext()).withOverriddenContext(ImmutableMap.<String, Object>of("finalize", false, GroupByQueryConfig.CTX_KEY_STRATEGY, GroupByStrategySelector.STRATEGY_V2, CTX_KEY_FUDGE_TIMESTAMP, fudgeTimestamp == null ? "" : String.valueOf(fudgeTimestamp.getMillis()), CTX_KEY_OUTERMOST, false)), responseContext), new Function<Row, Row>() {

        @Override
        public Row apply(final Row row) {
            if (!query.getContextBoolean(CTX_KEY_OUTERMOST, true)) {
                return row;
            }
            if (query.getPostAggregatorSpecs().isEmpty() && fudgeTimestamp == null) {
                return row;
            }
            final Map<String, Object> newMap;
            if (query.getPostAggregatorSpecs().isEmpty()) {
                newMap = ((MapBasedRow) row).getEvent();
            } else {
                newMap = Maps.newLinkedHashMap(((MapBasedRow) row).getEvent());
                for (PostAggregator postAggregator : query.getPostAggregatorSpecs()) {
                    newMap.put(postAggregator.getName(), postAggregator.compute(newMap));
                }
            }
            return new MapBasedRow(fudgeTimestamp != null ? fudgeTimestamp : row.getTimestamp(), newMap);
        }
    }));
}
Also used : ResultMergeQueryRunner(io.druid.query.ResultMergeQueryRunner) GroupByBinaryFnV2(io.druid.query.groupby.epinephelinae.GroupByBinaryFnV2) Query(io.druid.query.Query) GroupByQuery(io.druid.query.groupby.GroupByQuery) PostAggregator(io.druid.query.aggregation.PostAggregator) DateTime(org.joda.time.DateTime) MapBasedRow(io.druid.data.input.MapBasedRow) Function(com.google.common.base.Function) GroupByQuery(io.druid.query.groupby.GroupByQuery) Row(io.druid.data.input.Row) MapBasedRow(io.druid.data.input.MapBasedRow)

Example 18 with Row

use of io.druid.data.input.Row in project druid by druid-io.

the class SpatialDimensionRowTransformer method apply.

@Override
public InputRow apply(final InputRow row) {
    final Map<String, List<String>> spatialLookup = Maps.newHashMap();
    // remove all spatial dimensions
    final List<String> finalDims = Lists.newArrayList(Iterables.filter(row.getDimensions(), new Predicate<String>() {

        @Override
        public boolean apply(String input) {
            return !spatialDimensionMap.containsKey(input) && !spatialPartialDimNames.contains(input);
        }
    }));
    InputRow retVal = new InputRow() {

        @Override
        public List<String> getDimensions() {
            return finalDims;
        }

        @Override
        public long getTimestampFromEpoch() {
            return row.getTimestampFromEpoch();
        }

        @Override
        public DateTime getTimestamp() {
            return row.getTimestamp();
        }

        @Override
        public List<String> getDimension(String dimension) {
            List<String> retVal = spatialLookup.get(dimension);
            return (retVal == null) ? row.getDimension(dimension) : retVal;
        }

        @Override
        public Object getRaw(String dimension) {
            List<String> retVal = spatialLookup.get(dimension);
            return (retVal == null) ? row.getRaw(dimension) : retVal;
        }

        @Override
        public long getLongMetric(String metric) {
            try {
                return row.getLongMetric(metric);
            } catch (ParseException e) {
                throw Throwables.propagate(e);
            }
        }

        @Override
        public float getFloatMetric(String metric) {
            try {
                return row.getFloatMetric(metric);
            } catch (ParseException e) {
                throw Throwables.propagate(e);
            }
        }

        @Override
        public String toString() {
            return row.toString();
        }

        @Override
        public int compareTo(Row o) {
            return getTimestamp().compareTo(o.getTimestamp());
        }
    };
    for (Map.Entry<String, SpatialDimensionSchema> entry : spatialDimensionMap.entrySet()) {
        final String spatialDimName = entry.getKey();
        final SpatialDimensionSchema spatialDim = entry.getValue();
        List<String> dimVals = row.getDimension(spatialDimName);
        if (dimVals != null && !dimVals.isEmpty()) {
            if (dimVals.size() != 1) {
                throw new ISE("Spatial dimension value must be in an array!");
            }
            if (isJoinedSpatialDimValValid(dimVals.get(0))) {
                spatialLookup.put(spatialDimName, dimVals);
                finalDims.add(spatialDimName);
            }
        } else {
            List<String> spatialDimVals = Lists.newArrayList();
            for (String dim : spatialDim.getDims()) {
                List<String> partialDimVals = row.getDimension(dim);
                if (isSpatialDimValsValid(partialDimVals)) {
                    spatialDimVals.addAll(partialDimVals);
                }
            }
            if (spatialDimVals.size() == spatialDim.getDims().size()) {
                spatialLookup.put(spatialDimName, Arrays.asList(JOINER.join(spatialDimVals)));
                finalDims.add(spatialDimName);
            }
        }
    }
    return retVal;
}
Also used : SpatialDimensionSchema(io.druid.data.input.impl.SpatialDimensionSchema) InputRow(io.druid.data.input.InputRow) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) ISE(io.druid.java.util.common.ISE) ParseException(io.druid.java.util.common.parsers.ParseException) InputRow(io.druid.data.input.InputRow) Row(io.druid.data.input.Row) Map(java.util.Map) Predicate(com.google.common.base.Predicate)

Example 19 with Row

use of io.druid.data.input.Row in project druid by druid-io.

the class MultiValuedDimensionTest method testGroupByNoFilter.

@Test
public void testGroupByNoFilter() throws Exception {
    GroupByQuery query = GroupByQuery.builder().setDataSource("xx").setQuerySegmentSpec(new LegacySegmentSpec("1970/3000")).setGranularity(Granularities.ALL).setDimensions(Lists.<DimensionSpec>newArrayList(new DefaultDimensionSpec("tags", "tags"))).setAggregatorSpecs(Arrays.asList(new AggregatorFactory[] { new CountAggregatorFactory("count") })).build();
    Sequence<Row> result = helper.runQueryOnSegmentsObjs(ImmutableList.<Segment>of(new QueryableIndexSegment("sid1", queryableIndex), new IncrementalIndexSegment(incrementalIndex, "sid2")), query);
    List<Row> expectedResults = Arrays.asList(GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "tags", null, "count", 2L), GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "tags", "t1", "count", 2L), GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "tags", "t2", "count", 2L), GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "tags", "t3", "count", 4L), GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "tags", "t4", "count", 2L), GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "tags", "t5", "count", 4L), GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "tags", "t6", "count", 2L), GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "tags", "t7", "count", 2L));
    TestHelper.assertExpectedObjects(expectedResults, Sequences.toList(result, new ArrayList<Row>()), "");
}
Also used : QueryableIndexSegment(io.druid.segment.QueryableIndexSegment) GroupByQuery(io.druid.query.groupby.GroupByQuery) CountAggregatorFactory(io.druid.query.aggregation.CountAggregatorFactory) IncrementalIndexSegment(io.druid.segment.IncrementalIndexSegment) ArrayList(java.util.ArrayList) Row(io.druid.data.input.Row) LegacySegmentSpec(io.druid.query.spec.LegacySegmentSpec) DefaultDimensionSpec(io.druid.query.dimension.DefaultDimensionSpec) GroupByQueryRunnerTest(io.druid.query.groupby.GroupByQueryRunnerTest) Test(org.junit.Test)

Example 20 with Row

use of io.druid.data.input.Row in project druid by druid-io.

the class MultiValuedDimensionTest method testGroupByWithDimFilter.

@Test
public void testGroupByWithDimFilter() throws Exception {
    GroupByQuery query = GroupByQuery.builder().setDataSource("xx").setQuerySegmentSpec(new LegacySegmentSpec("1970/3000")).setGranularity(Granularities.ALL).setDimensions(Lists.<DimensionSpec>newArrayList(new DefaultDimensionSpec("tags", "tags"))).setAggregatorSpecs(Arrays.asList(new AggregatorFactory[] { new CountAggregatorFactory("count") })).setDimFilter(new SelectorDimFilter("tags", "t3", null)).build();
    Sequence<Row> result = helper.runQueryOnSegmentsObjs(ImmutableList.<Segment>of(new QueryableIndexSegment("sid1", queryableIndex), new IncrementalIndexSegment(incrementalIndex, "sid2")), query);
    List<Row> expectedResults = Arrays.asList(GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "tags", "t1", "count", 2L), GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "tags", "t2", "count", 2L), GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "tags", "t3", "count", 4L), GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "tags", "t4", "count", 2L), GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "tags", "t5", "count", 2L));
    TestHelper.assertExpectedObjects(expectedResults, Sequences.toList(result, new ArrayList<Row>()), "");
}
Also used : QueryableIndexSegment(io.druid.segment.QueryableIndexSegment) ListFilteredDimensionSpec(io.druid.query.dimension.ListFilteredDimensionSpec) DimensionSpec(io.druid.query.dimension.DimensionSpec) DefaultDimensionSpec(io.druid.query.dimension.DefaultDimensionSpec) RegexFilteredDimensionSpec(io.druid.query.dimension.RegexFilteredDimensionSpec) IncrementalIndexSegment(io.druid.segment.IncrementalIndexSegment) ArrayList(java.util.ArrayList) CountAggregatorFactory(io.druid.query.aggregation.CountAggregatorFactory) AggregatorFactory(io.druid.query.aggregation.AggregatorFactory) LegacySegmentSpec(io.druid.query.spec.LegacySegmentSpec) DefaultDimensionSpec(io.druid.query.dimension.DefaultDimensionSpec) GroupByQuery(io.druid.query.groupby.GroupByQuery) CountAggregatorFactory(io.druid.query.aggregation.CountAggregatorFactory) SelectorDimFilter(io.druid.query.filter.SelectorDimFilter) Row(io.druid.data.input.Row) GroupByQueryRunnerTest(io.druid.query.groupby.GroupByQueryRunnerTest) Test(org.junit.Test)

Aggregations

Row (io.druid.data.input.Row)167 Test (org.junit.Test)123 DefaultDimensionSpec (io.druid.query.dimension.DefaultDimensionSpec)105 LongSumAggregatorFactory (io.druid.query.aggregation.LongSumAggregatorFactory)91 DimensionSpec (io.druid.query.dimension.DimensionSpec)64 ExtractionDimensionSpec (io.druid.query.dimension.ExtractionDimensionSpec)59 ListFilteredDimensionSpec (io.druid.query.dimension.ListFilteredDimensionSpec)56 RegexFilteredDimensionSpec (io.druid.query.dimension.RegexFilteredDimensionSpec)56 InputRow (io.druid.data.input.InputRow)28 AggregatorFactory (io.druid.query.aggregation.AggregatorFactory)24 CountAggregatorFactory (io.druid.query.aggregation.CountAggregatorFactory)24 SelectorDimFilter (io.druid.query.filter.SelectorDimFilter)22 LookupExtractionFn (io.druid.query.lookup.LookupExtractionFn)22 Benchmark (org.openjdk.jmh.annotations.Benchmark)21 BenchmarkMode (org.openjdk.jmh.annotations.BenchmarkMode)21 OutputTimeUnit (org.openjdk.jmh.annotations.OutputTimeUnit)21 GroupByQuery (io.druid.query.groupby.GroupByQuery)20 MapBasedRow (io.druid.data.input.MapBasedRow)19 OrderByColumnSpec (io.druid.query.groupby.orderby.OrderByColumnSpec)19 QueryableIndexSegment (io.druid.segment.QueryableIndexSegment)19