Search in sources :

Example 6 with SelectQuery

use of io.druid.query.select.SelectQuery in project hive by apache.

the class DruidQueryBasedInputFormat method getInputSplits.

@SuppressWarnings("deprecation")
private HiveDruidSplit[] getInputSplits(Configuration conf) throws IOException {
    String address = HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_DRUID_BROKER_DEFAULT_ADDRESS);
    if (StringUtils.isEmpty(address)) {
        throw new IOException("Druid broker address not specified in configuration");
    }
    String druidQuery = StringEscapeUtils.unescapeJava(conf.get(Constants.DRUID_QUERY_JSON));
    String druidQueryType;
    if (StringUtils.isEmpty(druidQuery)) {
        // full Select query
        if (LOG.isWarnEnabled()) {
            LOG.warn("Druid query is empty; creating Select query");
        }
        String dataSource = conf.get(Constants.DRUID_DATA_SOURCE);
        if (dataSource == null) {
            throw new IOException("Druid data source cannot be empty");
        }
        druidQuery = createSelectStarQuery(dataSource);
        druidQueryType = Query.SELECT;
    } else {
        druidQueryType = conf.get(Constants.DRUID_QUERY_TYPE);
        if (druidQueryType == null) {
            throw new IOException("Druid query type not recognized");
        }
    }
    // hive depends on FileSplits
    Job job = new Job(conf);
    JobContext jobContext = ShimLoader.getHadoopShims().newJobContext(job);
    Path[] paths = FileInputFormat.getInputPaths(jobContext);
    // Then, create splits with the Druid queries.
    switch(druidQueryType) {
        case Query.TIMESERIES:
        case Query.TOPN:
        case Query.GROUP_BY:
            return new HiveDruidSplit[] { new HiveDruidSplit(deserializeSerialize(druidQuery), paths[0], new String[] { address }) };
        case Query.SELECT:
            SelectQuery selectQuery = DruidStorageHandlerUtils.JSON_MAPPER.readValue(druidQuery, SelectQuery.class);
            boolean distributed = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_DRUID_SELECT_DISTRIBUTE);
            if (distributed) {
                return distributeSelectQuery(conf, address, selectQuery, paths[0]);
            } else {
                return splitSelectQuery(conf, address, selectQuery, paths[0]);
            }
        default:
            throw new IOException("Druid query type not recognized");
    }
}
Also used : Path(org.apache.hadoop.fs.Path) SelectQuery(io.druid.query.select.SelectQuery) IOException(java.io.IOException) JobContext(org.apache.hadoop.mapreduce.JobContext) Job(org.apache.hadoop.mapreduce.Job)

Example 7 with SelectQuery

use of io.druid.query.select.SelectQuery in project druid by druid-io.

the class QueryMaker method executeSelect.

private Sequence<Object[]> executeSelect(final DruidQueryBuilder queryBuilder, final SelectQuery baseQuery) {
    Preconditions.checkState(queryBuilder.getGrouping() == null, "grouping must be null");
    final List<RelDataTypeField> fieldList = queryBuilder.getRowType().getFieldList();
    final Integer limit = queryBuilder.getLimitSpec() != null ? queryBuilder.getLimitSpec().getLimit() : null;
    // Select is paginated, we need to make multiple queries.
    final Sequence<Sequence<Object[]>> sequenceOfSequences = Sequences.simple(new Iterable<Sequence<Object[]>>() {

        @Override
        public Iterator<Sequence<Object[]>> iterator() {
            final AtomicBoolean morePages = new AtomicBoolean(true);
            final AtomicReference<Map<String, Integer>> pagingIdentifiers = new AtomicReference<>();
            final AtomicLong rowsRead = new AtomicLong();
            // Each Sequence<Object[]> is one page.
            return new Iterator<Sequence<Object[]>>() {

                @Override
                public boolean hasNext() {
                    return morePages.get();
                }

                @Override
                public Sequence<Object[]> next() {
                    final SelectQuery queryWithPagination = baseQuery.withPagingSpec(new PagingSpec(pagingIdentifiers.get(), plannerContext.getPlannerConfig().getSelectThreshold(), true));
                    Hook.QUERY_PLAN.run(queryWithPagination);
                    morePages.set(false);
                    final AtomicBoolean gotResult = new AtomicBoolean();
                    return Sequences.concat(Sequences.map(queryWithPagination.run(walker, Maps.<String, Object>newHashMap()), new Function<Result<SelectResultValue>, Sequence<Object[]>>() {

                        @Override
                        public Sequence<Object[]> apply(final Result<SelectResultValue> result) {
                            if (!gotResult.compareAndSet(false, true)) {
                                throw new ISE("WTF?! Expected single result from Select query but got multiple!");
                            }
                            pagingIdentifiers.set(result.getValue().getPagingIdentifiers());
                            final List<Object[]> retVals = new ArrayList<>();
                            for (EventHolder holder : result.getValue().getEvents()) {
                                morePages.set(true);
                                final Map<String, Object> map = holder.getEvent();
                                final Object[] retVal = new Object[fieldList.size()];
                                for (RelDataTypeField field : fieldList) {
                                    final String outputName = queryBuilder.getRowOrder().get(field.getIndex());
                                    if (outputName.equals(Column.TIME_COLUMN_NAME)) {
                                        retVal[field.getIndex()] = coerce(holder.getTimestamp().getMillis(), field.getType().getSqlTypeName());
                                    } else {
                                        retVal[field.getIndex()] = coerce(map.get(outputName), field.getType().getSqlTypeName());
                                    }
                                }
                                if (limit == null || rowsRead.incrementAndGet() <= limit) {
                                    retVals.add(retVal);
                                } else {
                                    morePages.set(false);
                                    return Sequences.simple(retVals);
                                }
                            }
                            return Sequences.simple(retVals);
                        }
                    }));
                }

                @Override
                public void remove() {
                    throw new UnsupportedOperationException();
                }
            };
        }
    });
    return Sequences.concat(sequenceOfSequences);
}
Also used : NlsString(org.apache.calcite.util.NlsString) Result(io.druid.query.Result) Iterator(java.util.Iterator) ISE(io.druid.java.util.common.ISE) ArrayList(java.util.ArrayList) List(java.util.List) SelectResultValue(io.druid.query.select.SelectResultValue) AtomicReference(java.util.concurrent.atomic.AtomicReference) Sequence(io.druid.java.util.common.guava.Sequence) SelectQuery(io.druid.query.select.SelectQuery) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) RelDataTypeField(org.apache.calcite.rel.type.RelDataTypeField) AtomicLong(java.util.concurrent.atomic.AtomicLong) PagingSpec(io.druid.query.select.PagingSpec) EventHolder(io.druid.query.select.EventHolder) Map(java.util.Map)

Example 8 with SelectQuery

use of io.druid.query.select.SelectQuery in project druid by druid-io.

the class DruidQueryBuilder method toSelectQuery.

/**
   * Return this query as a Select query, or null if this query is not compatible with Select.
   *
   * @param dataSource         data source to query
   * @param sourceRowSignature row signature of the dataSource
   * @param context            query context
   *
   * @return query or null
   */
public SelectQuery toSelectQuery(final DataSource dataSource, final RowSignature sourceRowSignature, final Map<String, Object> context) {
    if (grouping != null) {
        return null;
    }
    final Filtration filtration = Filtration.create(filter).optimize(sourceRowSignature);
    final boolean descending;
    if (limitSpec != null) {
        // Safe to assume limitSpec has zero or one entry; DruidSelectSortRule wouldn't push in anything else.
        if (limitSpec.getColumns().size() > 0) {
            final OrderByColumnSpec orderBy = Iterables.getOnlyElement(limitSpec.getColumns());
            if (!orderBy.getDimension().equals(Column.TIME_COLUMN_NAME)) {
                throw new ISE("WTF?! Got select with non-time orderBy[%s]", orderBy);
            }
            descending = orderBy.getDirection() == OrderByColumnSpec.Direction.DESCENDING;
        } else {
            descending = false;
        }
    } else {
        descending = false;
    }
    return new SelectQuery(dataSource, filtration.getQuerySegmentSpec(), descending, filtration.getDimFilter(), Granularities.ALL, selectProjection != null ? selectProjection.getDimensions() : ImmutableList.<DimensionSpec>of(), selectProjection != null ? selectProjection.getMetrics() : ImmutableList.<String>of(), null, new PagingSpec(null, 0), /* dummy -- will be replaced */
    context);
}
Also used : OrderByColumnSpec(io.druid.query.groupby.orderby.OrderByColumnSpec) SelectQuery(io.druid.query.select.SelectQuery) DimensionSpec(io.druid.query.dimension.DimensionSpec) Filtration(io.druid.sql.calcite.filtration.Filtration) PagingSpec(io.druid.query.select.PagingSpec) ISE(io.druid.java.util.common.ISE)

Example 9 with SelectQuery

use of io.druid.query.select.SelectQuery in project druid by druid-io.

the class SelectBenchmark method queryIncrementalIndex.

@Benchmark
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.MICROSECONDS)
public void queryIncrementalIndex(Blackhole blackhole) throws Exception {
    SelectQuery queryCopy = query.withPagingSpec(PagingSpec.newSpec(pagingThreshold));
    String segmentId = "incIndex";
    QueryRunner<Row> runner = QueryBenchmarkUtil.makeQueryRunner(factory, segmentId, new IncrementalIndexSegment(incIndexes.get(0), segmentId));
    boolean done = false;
    while (!done) {
        List<Result<SelectResultValue>> results = SelectBenchmark.runQuery(factory, runner, queryCopy);
        SelectResultValue result = results.get(0).getValue();
        if (result.getEvents().size() == 0) {
            done = true;
        } else {
            for (EventHolder eh : result.getEvents()) {
                blackhole.consume(eh);
            }
            queryCopy = incrementQueryPagination(queryCopy, result);
        }
    }
}
Also used : SelectQuery(io.druid.query.select.SelectQuery) SelectResultValue(io.druid.query.select.SelectResultValue) IncrementalIndexSegment(io.druid.segment.IncrementalIndexSegment) InputRow(io.druid.data.input.InputRow) Row(io.druid.data.input.Row) EventHolder(io.druid.query.select.EventHolder) Result(io.druid.query.Result) BenchmarkMode(org.openjdk.jmh.annotations.BenchmarkMode) Benchmark(org.openjdk.jmh.annotations.Benchmark) OutputTimeUnit(org.openjdk.jmh.annotations.OutputTimeUnit)

Example 10 with SelectQuery

use of io.druid.query.select.SelectQuery in project druid by druid-io.

the class SelectBenchmark method queryQueryableIndex.

@Benchmark
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.MICROSECONDS)
public void queryQueryableIndex(Blackhole blackhole) throws Exception {
    SelectQuery queryCopy = query.withPagingSpec(PagingSpec.newSpec(pagingThreshold));
    String segmentId = "qIndex";
    QueryRunner<Result<SelectResultValue>> runner = QueryBenchmarkUtil.makeQueryRunner(factory, segmentId, new QueryableIndexSegment(segmentId, qIndexes.get(0)));
    boolean done = false;
    while (!done) {
        List<Result<SelectResultValue>> results = SelectBenchmark.runQuery(factory, runner, queryCopy);
        SelectResultValue result = results.get(0).getValue();
        if (result.getEvents().size() == 0) {
            done = true;
        } else {
            for (EventHolder eh : result.getEvents()) {
                blackhole.consume(eh);
            }
            queryCopy = incrementQueryPagination(queryCopy, result);
        }
    }
}
Also used : SelectQuery(io.druid.query.select.SelectQuery) QueryableIndexSegment(io.druid.segment.QueryableIndexSegment) SelectResultValue(io.druid.query.select.SelectResultValue) EventHolder(io.druid.query.select.EventHolder) Result(io.druid.query.Result) BenchmarkMode(org.openjdk.jmh.annotations.BenchmarkMode) Benchmark(org.openjdk.jmh.annotations.Benchmark) OutputTimeUnit(org.openjdk.jmh.annotations.OutputTimeUnit)

Aggregations

SelectQuery (io.druid.query.select.SelectQuery)12 Result (io.druid.query.Result)6 EventHolder (io.druid.query.select.EventHolder)4 SelectResultValue (io.druid.query.select.SelectResultValue)4 IOException (java.io.IOException)4 ArrayList (java.util.ArrayList)4 ISE (io.druid.java.util.common.ISE)3 GroupByQuery (io.druid.query.groupby.GroupByQuery)3 TimeseriesQuery (io.druid.query.timeseries.TimeseriesQuery)3 TopNQuery (io.druid.query.topn.TopNQuery)3 List (java.util.List)3 Map (java.util.Map)3 Period (org.joda.time.Period)3 JsonParseException (com.fasterxml.jackson.core.JsonParseException)2 TypeReference (com.fasterxml.jackson.core.type.TypeReference)2 JsonMappingException (com.fasterxml.jackson.databind.JsonMappingException)2 Function (com.google.common.base.Function)2 ImmutableMap (com.google.common.collect.ImmutableMap)2 Lifecycle (com.metamx.common.lifecycle.Lifecycle)2 HttpClient (com.metamx.http.client.HttpClient)2