Search in sources :

Example 6 with ScanQuery

use of org.apache.druid.query.scan.ScanQuery in project hive by apache.

the class DruidStorageHandlerUtils method createScanAllQuery.

public static String createScanAllQuery(String dataSourceName, List<String> columns) throws JsonProcessingException {
    final Druids.ScanQueryBuilder scanQueryBuilder = Druids.newScanQueryBuilder();
    final List<Interval> intervals = Collections.singletonList(DEFAULT_INTERVAL);
    ScanQuery scanQuery = scanQueryBuilder.dataSource(dataSourceName).resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST).intervals(new MultipleIntervalSegmentSpec(intervals)).columns(columns).build();
    return JSON_MAPPER.writeValueAsString(scanQuery);
}
Also used : Druids(org.apache.druid.query.Druids) ScanQuery(org.apache.druid.query.scan.ScanQuery) MultipleIntervalSegmentSpec(org.apache.druid.query.spec.MultipleIntervalSegmentSpec) Interval(org.joda.time.Interval)

Example 7 with ScanQuery

use of org.apache.druid.query.scan.ScanQuery in project hive by apache.

the class DruidQueryBasedInputFormat method distributeScanQuery.

/* New method that distributes the Scan query by creating splits containing
   * information about different Druid nodes that have the data for the given
   * query. */
private static HiveDruidSplit[] distributeScanQuery(String address, ScanQuery query, Path dummyPath) throws IOException {
    // If it has a limit, we use it and we do not distribute the query
    final boolean isFetch = query.getScanRowsLimit() < Long.MAX_VALUE;
    if (isFetch) {
        return new HiveDruidSplit[] { new HiveDruidSplit(DruidStorageHandlerUtils.JSON_MAPPER.writeValueAsString(query), dummyPath, new String[] { address }) };
    }
    final List<LocatedSegmentDescriptor> segmentDescriptors = fetchLocatedSegmentDescriptors(address, query);
    // Create one input split for each segment
    final int numSplits = segmentDescriptors.size();
    final HiveDruidSplit[] splits = new HiveDruidSplit[segmentDescriptors.size()];
    for (int i = 0; i < numSplits; i++) {
        final LocatedSegmentDescriptor locatedSD = segmentDescriptors.get(i);
        final String[] hosts = new String[locatedSD.getLocations().size() + 1];
        for (int j = 0; j < locatedSD.getLocations().size(); j++) {
            hosts[j] = locatedSD.getLocations().get(j).getHost();
        }
        // Default to broker if all other hosts fail.
        hosts[locatedSD.getLocations().size()] = address;
        // Create partial Select query
        final SegmentDescriptor newSD = new SegmentDescriptor(locatedSD.getInterval(), locatedSD.getVersion(), locatedSD.getPartitionNumber());
        final Query partialQuery = query.withQuerySegmentSpec(new MultipleSpecificSegmentSpec(Lists.newArrayList(newSD)));
        splits[i] = new HiveDruidSplit(DruidStorageHandlerUtils.JSON_MAPPER.writeValueAsString(partialQuery), dummyPath, hosts);
    }
    return splits;
}
Also used : MultipleSpecificSegmentSpec(org.apache.druid.query.spec.MultipleSpecificSegmentSpec) LocatedSegmentDescriptor(org.apache.druid.query.LocatedSegmentDescriptor) BaseQuery(org.apache.druid.query.BaseQuery) ScanQuery(org.apache.druid.query.scan.ScanQuery) Query(org.apache.druid.query.Query) LocatedSegmentDescriptor(org.apache.druid.query.LocatedSegmentDescriptor) SegmentDescriptor(org.apache.druid.query.SegmentDescriptor)

Example 8 with ScanQuery

use of org.apache.druid.query.scan.ScanQuery in project hive by apache.

the class DruidQueryBasedInputFormat method getInputSplits.

protected HiveDruidSplit[] getInputSplits(Configuration conf) throws IOException {
    String address = HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_DRUID_BROKER_DEFAULT_ADDRESS);
    String queryId = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEQUERYID);
    if (StringUtils.isEmpty(address)) {
        throw new IOException("Druid broker address not specified in configuration");
    }
    String druidQuery = StringEscapeUtils.unescapeJava(conf.get(Constants.DRUID_QUERY_JSON));
    String druidQueryType;
    if (StringUtils.isEmpty(druidQuery)) {
        // Empty, maybe because CBO did not run; we fall back to
        // full Select query
        LOG.warn("Druid query is empty; creating Select query");
        String dataSource = conf.get(Constants.DRUID_DATA_SOURCE);
        if (dataSource == null || dataSource.isEmpty()) {
            throw new IOException("Druid data source cannot be empty or null");
        }
        druidQuery = DruidStorageHandlerUtils.createScanAllQuery(dataSource, Utilities.getColumnNames(conf));
        druidQueryType = Query.SCAN;
        conf.set(Constants.DRUID_QUERY_TYPE, druidQueryType);
    } else {
        druidQueryType = conf.get(Constants.DRUID_QUERY_TYPE);
        if (druidQueryType == null) {
            throw new IOException("Druid query type not recognized");
        }
    }
    // Add Hive Query ID to Druid Query
    if (queryId != null) {
        druidQuery = withQueryId(druidQuery, queryId);
    }
    // hive depends on FileSplits
    Job job = Job.getInstance(conf);
    JobContext jobContext = ShimLoader.getHadoopShims().newJobContext(job);
    Path[] paths = FileInputFormat.getInputPaths(jobContext);
    // Then, create splits with the Druid queries.
    switch(druidQueryType) {
        case Query.TIMESERIES:
        case Query.TOPN:
        case Query.GROUP_BY:
            return new HiveDruidSplit[] { new HiveDruidSplit(druidQuery, paths[0], new String[] { address }) };
        case Query.SCAN:
            ScanQuery scanQuery = DruidStorageHandlerUtils.JSON_MAPPER.readValue(druidQuery, ScanQuery.class);
            return distributeScanQuery(address, scanQuery, paths[0]);
        default:
            throw new IOException("Druid query type not recognized");
    }
}
Also used : Path(org.apache.hadoop.fs.Path) ScanQuery(org.apache.druid.query.scan.ScanQuery) IOException(java.io.IOException) JobContext(org.apache.hadoop.mapreduce.JobContext) Job(org.apache.hadoop.mapreduce.Job)

Example 9 with ScanQuery

use of org.apache.druid.query.scan.ScanQuery in project druid by druid-io.

the class AbstractMultiPhaseParallelIndexingTest method querySegment.

List<ScanResultValue> querySegment(DataSegment dataSegment, List<String> columns, File tempSegmentDir) {
    Segment segment = loadSegment(dataSegment, tempSegmentDir);
    final QueryRunner<ScanResultValue> runner = SCAN_QUERY_RUNNER_FACTORY.createRunner(segment);
    return runner.run(QueryPlus.wrap(new ScanQuery(new TableDataSource("dataSource"), new SpecificSegmentSpec(new SegmentDescriptor(dataSegment.getInterval(), dataSegment.getVersion(), dataSegment.getShardSpec().getPartitionNum())), null, null, 0, 0, 0, null, null, null, columns, false, null))).toList();
}
Also used : TableDataSource(org.apache.druid.query.TableDataSource) SpecificSegmentSpec(org.apache.druid.query.spec.SpecificSegmentSpec) ScanResultValue(org.apache.druid.query.scan.ScanResultValue) SegmentDescriptor(org.apache.druid.query.SegmentDescriptor) ScanQuery(org.apache.druid.query.scan.ScanQuery) DataSegment(org.apache.druid.timeline.DataSegment) Segment(org.apache.druid.segment.Segment)

Example 10 with ScanQuery

use of org.apache.druid.query.scan.ScanQuery in project druid by druid-io.

the class CalciteInsertDmlTest method testExplainInsertFromExternal.

@Test
public void testExplainInsertFromExternal() throws Exception {
    // Skip vectorization since otherwise the "context" will change for each subtest.
    skipVectorize();
    final ScanQuery expectedQuery = newScanQueryBuilder().dataSource(externalDataSource).intervals(querySegmentSpec(Filtration.eternity())).columns("x", "y", "z").context(queryJsonMapper.readValue("{\"defaultTimeout\":300000,\"maxScatterGatherBytes\":9223372036854775807,\"sqlCurrentTimestamp\":\"2000-01-01T00:00:00Z\",\"sqlInsertSegmentGranularity\":\"{\\\"type\\\":\\\"all\\\"}\",\"sqlQueryId\":\"dummy\",\"vectorize\":\"false\",\"vectorizeVirtualColumns\":\"false\"}", JacksonUtils.TYPE_REFERENCE_MAP_STRING_OBJECT)).build();
    final String expectedExplanation = "DruidQueryRel(query=[" + queryJsonMapper.writeValueAsString(expectedQuery) + "], signature=[{x:STRING, y:STRING, z:LONG}])\n";
    // Use testQuery for EXPLAIN (not testInsertQuery).
    testQuery(new PlannerConfig(), StringUtils.format("EXPLAIN PLAN FOR INSERT INTO dst SELECT * FROM %s PARTITIONED BY ALL TIME", externSql(externalDataSource)), CalciteTests.SUPER_USER_AUTH_RESULT, ImmutableList.of(), ImmutableList.of(new Object[] { expectedExplanation, "[{\"name\":\"EXTERNAL\",\"type\":\"EXTERNAL\"},{\"name\":\"dst\",\"type\":\"DATASOURCE\"}]" }));
    // Not using testInsertQuery, so must set didTest manually to satisfy the check in tearDown.
    didTest = true;
}
Also used : PlannerConfig(org.apache.druid.sql.calcite.planner.PlannerConfig) ScanQuery(org.apache.druid.query.scan.ScanQuery) Test(org.junit.Test)

Aggregations

ScanQuery (org.apache.druid.query.scan.ScanQuery)13 Test (org.junit.Test)7 QueryDataSource (org.apache.druid.query.QueryDataSource)5 LongSumAggregatorFactory (org.apache.druid.query.aggregation.LongSumAggregatorFactory)3 MultipleIntervalSegmentSpec (org.apache.druid.query.spec.MultipleIntervalSegmentSpec)3 ImmutableList (com.google.common.collect.ImmutableList)2 List (java.util.List)2 SegmentDescriptor (org.apache.druid.query.SegmentDescriptor)2 TableDataSource (org.apache.druid.query.TableDataSource)2 GroupByQuery (org.apache.druid.query.groupby.GroupByQuery)2 ScanResultValue (org.apache.druid.query.scan.ScanResultValue)2 MultipleSpecificSegmentSpec (org.apache.druid.query.spec.MultipleSpecificSegmentSpec)2 TimeseriesQuery (org.apache.druid.query.timeseries.TimeseriesQuery)2 TopNQuery (org.apache.druid.query.topn.TopNQuery)2 VisibleForTesting (com.google.common.annotations.VisibleForTesting)1 Preconditions (com.google.common.base.Preconditions)1 ImmutableSortedMap (com.google.common.collect.ImmutableSortedMap)1 Iterables (com.google.common.collect.Iterables)1 Iterators (com.google.common.collect.Iterators)1 Ints (com.google.common.primitives.Ints)1