Search in sources :

Example 11 with Row

use of io.druid.data.input.Row in project druid by druid-io.

the class GroupByTypeInterfaceBenchmark method querySingleQueryableIndexNumericOnly.

@Benchmark
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.MICROSECONDS)
public void querySingleQueryableIndexNumericOnly(Blackhole blackhole) throws Exception {
    QueryRunner<Row> runner = QueryBenchmarkUtil.makeQueryRunner(factory, "qIndex", new QueryableIndexSegment("qIndex", queryableIndexes.get(0)));
    List<Row> results = GroupByTypeInterfaceBenchmark.runQuery(factory, runner, longFloatQuery);
    for (Row result : results) {
        blackhole.consume(result);
    }
}
Also used : QueryableIndexSegment(io.druid.segment.QueryableIndexSegment) InputRow(io.druid.data.input.InputRow) Row(io.druid.data.input.Row) BenchmarkMode(org.openjdk.jmh.annotations.BenchmarkMode) Benchmark(org.openjdk.jmh.annotations.Benchmark) OutputTimeUnit(org.openjdk.jmh.annotations.OutputTimeUnit)

Example 12 with Row

use of io.druid.data.input.Row in project druid by druid-io.

the class GroupByRowProcessor method process.

public static Sequence<Row> process(final Query queryParam, final Sequence<Row> rows, final Map<String, ValueType> rowSignature, final GroupByQueryConfig config, final GroupByQueryResource resource, final ObjectMapper spillMapper, final String processingTmpDir) {
    final GroupByQuery query = (GroupByQuery) queryParam;
    final GroupByQueryConfig querySpecificConfig = config.withOverrides(query);
    final AggregatorFactory[] aggregatorFactories = new AggregatorFactory[query.getAggregatorSpecs().size()];
    for (int i = 0; i < query.getAggregatorSpecs().size(); i++) {
        aggregatorFactories[i] = query.getAggregatorSpecs().get(i);
    }
    final File temporaryStorageDirectory = new File(processingTmpDir, String.format("druid-groupBy-%s_%s", UUID.randomUUID(), query.getId()));
    final List<Interval> queryIntervals = query.getIntervals();
    final Filter filter = Filters.convertToCNFFromQueryContext(query, Filters.toFilter(query.getDimFilter()));
    final SettableSupplier<Row> rowSupplier = new SettableSupplier<>();
    final RowBasedColumnSelectorFactory columnSelectorFactory = RowBasedColumnSelectorFactory.create(rowSupplier, rowSignature);
    final ValueMatcher filterMatcher = filter == null ? BooleanValueMatcher.of(true) : filter.makeMatcher(columnSelectorFactory);
    final FilteredSequence<Row> filteredSequence = new FilteredSequence<>(rows, new Predicate<Row>() {

        @Override
        public boolean apply(Row input) {
            boolean inInterval = false;
            DateTime rowTime = input.getTimestamp();
            for (Interval queryInterval : queryIntervals) {
                if (queryInterval.contains(rowTime)) {
                    inInterval = true;
                    break;
                }
            }
            if (!inInterval) {
                return false;
            }
            rowSupplier.set(input);
            return filterMatcher.matches();
        }
    });
    return new BaseSequence<>(new BaseSequence.IteratorMaker<Row, CloseableGrouperIterator<RowBasedKey, Row>>() {

        @Override
        public CloseableGrouperIterator<RowBasedKey, Row> make() {
            // This contains all closeable objects which are closed when the returned iterator iterates all the elements,
            // or an exceptions is thrown. The objects are closed in their reverse order.
            final List<Closeable> closeOnExit = Lists.newArrayList();
            try {
                final LimitedTemporaryStorage temporaryStorage = new LimitedTemporaryStorage(temporaryStorageDirectory, querySpecificConfig.getMaxOnDiskStorage());
                closeOnExit.add(temporaryStorage);
                Pair<Grouper<RowBasedKey>, Accumulator<Grouper<RowBasedKey>, Row>> pair = RowBasedGrouperHelper.createGrouperAccumulatorPair(query, true, rowSignature, querySpecificConfig, new Supplier<ByteBuffer>() {

                    @Override
                    public ByteBuffer get() {
                        final ResourceHolder<ByteBuffer> mergeBufferHolder = resource.getMergeBuffer();
                        closeOnExit.add(mergeBufferHolder);
                        return mergeBufferHolder.get();
                    }
                }, -1, temporaryStorage, spillMapper, aggregatorFactories);
                final Grouper<RowBasedKey> grouper = pair.lhs;
                final Accumulator<Grouper<RowBasedKey>, Row> accumulator = pair.rhs;
                closeOnExit.add(grouper);
                final Grouper<RowBasedKey> retVal = filteredSequence.accumulate(grouper, accumulator);
                if (retVal != grouper) {
                    throw GroupByQueryHelper.throwAccumulationResourceLimitExceededException();
                }
                return RowBasedGrouperHelper.makeGrouperIterator(grouper, query, new Closeable() {

                    @Override
                    public void close() throws IOException {
                        for (Closeable closeable : Lists.reverse(closeOnExit)) {
                            CloseQuietly.close(closeable);
                        }
                    }
                });
            } catch (Throwable e) {
                // Exception caught while setting up the iterator; release resources.
                for (Closeable closeable : Lists.reverse(closeOnExit)) {
                    CloseQuietly.close(closeable);
                }
                throw e;
            }
        }

        @Override
        public void cleanup(CloseableGrouperIterator<RowBasedKey, Row> iterFromMake) {
            iterFromMake.close();
        }
    });
}
Also used : Accumulator(io.druid.java.util.common.guava.Accumulator) Closeable(java.io.Closeable) RowBasedColumnSelectorFactory(io.druid.query.groupby.RowBasedColumnSelectorFactory) DateTime(org.joda.time.DateTime) GroupByQuery(io.druid.query.groupby.GroupByQuery) List(java.util.List) Supplier(com.google.common.base.Supplier) SettableSupplier(io.druid.common.guava.SettableSupplier) Pair(io.druid.java.util.common.Pair) BooleanValueMatcher(io.druid.segment.filter.BooleanValueMatcher) ValueMatcher(io.druid.query.filter.ValueMatcher) GroupByQueryConfig(io.druid.query.groupby.GroupByQueryConfig) FilteredSequence(io.druid.java.util.common.guava.FilteredSequence) RowBasedKey(io.druid.query.groupby.epinephelinae.RowBasedGrouperHelper.RowBasedKey) AggregatorFactory(io.druid.query.aggregation.AggregatorFactory) ByteBuffer(java.nio.ByteBuffer) BaseSequence(io.druid.java.util.common.guava.BaseSequence) SettableSupplier(io.druid.common.guava.SettableSupplier) Filter(io.druid.query.filter.Filter) Row(io.druid.data.input.Row) File(java.io.File) Interval(org.joda.time.Interval)

Example 13 with Row

use of io.druid.data.input.Row in project druid by druid-io.

the class GroupByQueryQueryToolChest method getCacheStrategy.

@Override
public CacheStrategy<Row, Object, GroupByQuery> getCacheStrategy(final GroupByQuery query) {
    return new CacheStrategy<Row, Object, GroupByQuery>() {

        private static final byte CACHE_STRATEGY_VERSION = 0x1;

        private final List<AggregatorFactory> aggs = query.getAggregatorSpecs();

        private final List<DimensionSpec> dims = query.getDimensions();

        @Override
        public boolean isCacheable(GroupByQuery query, boolean willMergeRunners) {
            return strategySelector.strategize(query).isCacheable(willMergeRunners);
        }

        @Override
        public byte[] computeCacheKey(GroupByQuery query) {
            return new CacheKeyBuilder(GROUPBY_QUERY).appendByte(CACHE_STRATEGY_VERSION).appendCacheable(query.getGranularity()).appendCacheable(query.getDimFilter()).appendCacheablesIgnoringOrder(query.getAggregatorSpecs()).appendCacheablesIgnoringOrder(query.getDimensions()).appendCacheable(query.getVirtualColumns()).build();
        }

        @Override
        public TypeReference<Object> getCacheObjectClazz() {
            return OBJECT_TYPE_REFERENCE;
        }

        @Override
        public Function<Row, Object> prepareForCache() {
            return new Function<Row, Object>() {

                @Override
                public Object apply(Row input) {
                    if (input instanceof MapBasedRow) {
                        final MapBasedRow row = (MapBasedRow) input;
                        final List<Object> retVal = Lists.newArrayListWithCapacity(1 + dims.size() + aggs.size());
                        retVal.add(row.getTimestamp().getMillis());
                        Map<String, Object> event = row.getEvent();
                        for (DimensionSpec dim : dims) {
                            retVal.add(event.get(dim.getOutputName()));
                        }
                        for (AggregatorFactory agg : aggs) {
                            retVal.add(event.get(agg.getName()));
                        }
                        return retVal;
                    }
                    throw new ISE("Don't know how to cache input rows of type[%s]", input.getClass());
                }
            };
        }

        @Override
        public Function<Object, Row> pullFromCache() {
            return new Function<Object, Row>() {

                private final Granularity granularity = query.getGranularity();

                @Override
                public Row apply(Object input) {
                    Iterator<Object> results = ((List<Object>) input).iterator();
                    DateTime timestamp = granularity.toDateTime(((Number) results.next()).longValue());
                    Map<String, Object> event = Maps.newLinkedHashMap();
                    Iterator<DimensionSpec> dimsIter = dims.iterator();
                    while (dimsIter.hasNext() && results.hasNext()) {
                        final DimensionSpec factory = dimsIter.next();
                        event.put(factory.getOutputName(), results.next());
                    }
                    Iterator<AggregatorFactory> aggsIter = aggs.iterator();
                    while (aggsIter.hasNext() && results.hasNext()) {
                        final AggregatorFactory factory = aggsIter.next();
                        event.put(factory.getName(), factory.deserialize(results.next()));
                    }
                    if (dimsIter.hasNext() || aggsIter.hasNext() || results.hasNext()) {
                        throw new ISE("Found left over objects while reading from cache!! dimsIter[%s] aggsIter[%s] results[%s]", dimsIter.hasNext(), aggsIter.hasNext(), results.hasNext());
                    }
                    return new MapBasedRow(timestamp, event);
                }
            };
        }
    };
}
Also used : DimensionSpec(io.druid.query.dimension.DimensionSpec) DefaultDimensionSpec(io.druid.query.dimension.DefaultDimensionSpec) CacheKeyBuilder(io.druid.query.cache.CacheKeyBuilder) AggregatorFactory(io.druid.query.aggregation.AggregatorFactory) Granularity(io.druid.java.util.common.granularity.Granularity) DateTime(org.joda.time.DateTime) MapBasedRow(io.druid.data.input.MapBasedRow) Function(com.google.common.base.Function) ArrayList(java.util.ArrayList) List(java.util.List) ISE(io.druid.java.util.common.ISE) Row(io.druid.data.input.Row) MapBasedRow(io.druid.data.input.MapBasedRow) CacheStrategy(io.druid.query.CacheStrategy)

Example 14 with Row

use of io.druid.data.input.Row in project druid by druid-io.

the class GroupByQueryQueryToolChest method makePreComputeManipulatorFn.

@Override
public Function<Row, Row> makePreComputeManipulatorFn(final GroupByQuery query, final MetricManipulationFn fn) {
    if (MetricManipulatorFns.identity().equals(fn)) {
        return Functions.identity();
    }
    return new Function<Row, Row>() {

        @Override
        public Row apply(Row input) {
            if (input instanceof MapBasedRow) {
                final MapBasedRow inputRow = (MapBasedRow) input;
                final Map<String, Object> values = Maps.newHashMap(inputRow.getEvent());
                for (AggregatorFactory agg : query.getAggregatorSpecs()) {
                    values.put(agg.getName(), fn.manipulate(agg, inputRow.getEvent().get(agg.getName())));
                }
                return new MapBasedRow(inputRow.getTimestamp(), values);
            }
            return input;
        }
    };
}
Also used : MapBasedRow(io.druid.data.input.MapBasedRow) Function(com.google.common.base.Function) Row(io.druid.data.input.Row) MapBasedRow(io.druid.data.input.MapBasedRow) AggregatorFactory(io.druid.query.aggregation.AggregatorFactory)

Example 15 with Row

use of io.druid.data.input.Row in project druid by druid-io.

the class GroupByQueryQueryToolChest method makePostComputeManipulatorFn.

@Override
public Function<Row, Row> makePostComputeManipulatorFn(final GroupByQuery query, final MetricManipulationFn fn) {
    final Set<String> optimizedDims = ImmutableSet.copyOf(Iterables.transform(extractionsToRewrite(query), new Function<DimensionSpec, String>() {

        @Override
        public String apply(DimensionSpec input) {
            return input.getOutputName();
        }
    }));
    final Function<Row, Row> preCompute = makePreComputeManipulatorFn(query, fn);
    if (optimizedDims.isEmpty()) {
        return preCompute;
    }
    // If we have optimizations that can be done at this level, we apply them here
    final Map<String, ExtractionFn> extractionFnMap = new HashMap<>();
    for (DimensionSpec dimensionSpec : query.getDimensions()) {
        final String dimension = dimensionSpec.getOutputName();
        if (optimizedDims.contains(dimension)) {
            extractionFnMap.put(dimension, dimensionSpec.getExtractionFn());
        }
    }
    return new Function<Row, Row>() {

        @Nullable
        @Override
        public Row apply(Row input) {
            Row preRow = preCompute.apply(input);
            if (preRow instanceof MapBasedRow) {
                MapBasedRow preMapRow = (MapBasedRow) preRow;
                Map<String, Object> event = Maps.newHashMap(preMapRow.getEvent());
                for (String dim : optimizedDims) {
                    final Object eventVal = event.get(dim);
                    event.put(dim, extractionFnMap.get(dim).apply(eventVal));
                }
                return new MapBasedRow(preMapRow.getTimestamp(), event);
            } else {
                return preRow;
            }
        }
    };
}
Also used : MapBasedRow(io.druid.data.input.MapBasedRow) Function(com.google.common.base.Function) DimensionSpec(io.druid.query.dimension.DimensionSpec) DefaultDimensionSpec(io.druid.query.dimension.DefaultDimensionSpec) ExtractionFn(io.druid.query.extraction.ExtractionFn) HashMap(java.util.HashMap) Row(io.druid.data.input.Row) MapBasedRow(io.druid.data.input.MapBasedRow)

Aggregations

Row (io.druid.data.input.Row)167 Test (org.junit.Test)123 DefaultDimensionSpec (io.druid.query.dimension.DefaultDimensionSpec)105 LongSumAggregatorFactory (io.druid.query.aggregation.LongSumAggregatorFactory)91 DimensionSpec (io.druid.query.dimension.DimensionSpec)64 ExtractionDimensionSpec (io.druid.query.dimension.ExtractionDimensionSpec)59 ListFilteredDimensionSpec (io.druid.query.dimension.ListFilteredDimensionSpec)56 RegexFilteredDimensionSpec (io.druid.query.dimension.RegexFilteredDimensionSpec)56 InputRow (io.druid.data.input.InputRow)28 AggregatorFactory (io.druid.query.aggregation.AggregatorFactory)24 CountAggregatorFactory (io.druid.query.aggregation.CountAggregatorFactory)24 SelectorDimFilter (io.druid.query.filter.SelectorDimFilter)22 LookupExtractionFn (io.druid.query.lookup.LookupExtractionFn)22 Benchmark (org.openjdk.jmh.annotations.Benchmark)21 BenchmarkMode (org.openjdk.jmh.annotations.BenchmarkMode)21 OutputTimeUnit (org.openjdk.jmh.annotations.OutputTimeUnit)21 GroupByQuery (io.druid.query.groupby.GroupByQuery)20 MapBasedRow (io.druid.data.input.MapBasedRow)19 OrderByColumnSpec (io.druid.query.groupby.orderby.OrderByColumnSpec)19 QueryableIndexSegment (io.druid.segment.QueryableIndexSegment)19