use of io.druid.query.groupby.GroupByQuery in project hive by apache.
the class DruidSerDe method initialize.
@Override
public void initialize(Configuration configuration, Properties properties) throws SerDeException {
// Init connection properties
numConnection = HiveConf.getIntVar(configuration, HiveConf.ConfVars.HIVE_DRUID_NUM_HTTP_CONNECTION);
readTimeout = new Period(HiveConf.getVar(configuration, HiveConf.ConfVars.HIVE_DRUID_HTTP_READ_TIMEOUT));
final List<String> columnNames = new ArrayList<>();
final List<PrimitiveTypeInfo> columnTypes = new ArrayList<>();
List<ObjectInspector> inspectors = new ArrayList<>();
// Druid query
String druidQuery = properties.getProperty(Constants.DRUID_QUERY_JSON);
if (druidQuery == null) {
// the data source (dimensions and metrics).
if (!org.apache.commons.lang3.StringUtils.isEmpty(properties.getProperty(serdeConstants.LIST_COLUMNS)) && !org.apache.commons.lang3.StringUtils.isEmpty(properties.getProperty(serdeConstants.LIST_COLUMN_TYPES))) {
columnNames.addAll(Utilities.getColumnNames(properties));
if (!columnNames.contains(DruidTable.DEFAULT_TIMESTAMP_COLUMN)) {
throw new SerDeException("Timestamp column (' " + DruidTable.DEFAULT_TIMESTAMP_COLUMN + "') not specified in create table; list of columns is : " + properties.getProperty(serdeConstants.LIST_COLUMNS));
}
columnTypes.addAll(Lists.transform(Utilities.getColumnTypes(properties), new Function<String, PrimitiveTypeInfo>() {
@Override
public PrimitiveTypeInfo apply(String type) {
return TypeInfoFactory.getPrimitiveTypeInfo(type);
}
}));
inspectors.addAll(Lists.transform(columnTypes, new Function<PrimitiveTypeInfo, ObjectInspector>() {
@Override
public ObjectInspector apply(PrimitiveTypeInfo type) {
return PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(type);
}
}));
columns = columnNames.toArray(new String[columnNames.size()]);
types = columnTypes.toArray(new PrimitiveTypeInfo[columnTypes.size()]);
inspector = ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, inspectors);
} else {
String dataSource = properties.getProperty(Constants.DRUID_DATA_SOURCE);
if (dataSource == null) {
throw new SerDeException("Druid data source not specified; use " + Constants.DRUID_DATA_SOURCE + " in table properties");
}
SegmentMetadataQueryBuilder builder = new Druids.SegmentMetadataQueryBuilder();
builder.dataSource(dataSource);
builder.merge(true);
builder.analysisTypes();
SegmentMetadataQuery query = builder.build();
// Execute query in Druid
String address = HiveConf.getVar(configuration, HiveConf.ConfVars.HIVE_DRUID_BROKER_DEFAULT_ADDRESS);
if (org.apache.commons.lang3.StringUtils.isEmpty(address)) {
throw new SerDeException("Druid broker address not specified in configuration");
}
// Infer schema
SegmentAnalysis schemaInfo;
try {
schemaInfo = submitMetadataRequest(address, query);
} catch (IOException e) {
throw new SerDeException(e);
}
for (Entry<String, ColumnAnalysis> columnInfo : schemaInfo.getColumns().entrySet()) {
if (columnInfo.getKey().equals(DruidTable.DEFAULT_TIMESTAMP_COLUMN)) {
// Special handling for timestamp column
// field name
columnNames.add(columnInfo.getKey());
// field type
PrimitiveTypeInfo type = TypeInfoFactory.timestampTypeInfo;
columnTypes.add(type);
inspectors.add(PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(type));
continue;
}
// field name
columnNames.add(columnInfo.getKey());
PrimitiveTypeInfo type = DruidSerDeUtils.convertDruidToHiveType(// field type
columnInfo.getValue().getType());
columnTypes.add(type);
inspectors.add(PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(type));
}
columns = columnNames.toArray(new String[columnNames.size()]);
types = columnTypes.toArray(new PrimitiveTypeInfo[columnTypes.size()]);
inspector = ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, inspectors);
}
} else {
// Query is specified, we can extract the results schema from the query
Query<?> query;
try {
query = DruidStorageHandlerUtils.JSON_MAPPER.readValue(druidQuery, Query.class);
switch(query.getType()) {
case Query.TIMESERIES:
inferSchema((TimeseriesQuery) query, columnNames, columnTypes);
break;
case Query.TOPN:
inferSchema((TopNQuery) query, columnNames, columnTypes);
break;
case Query.SELECT:
String address = HiveConf.getVar(configuration, HiveConf.ConfVars.HIVE_DRUID_BROKER_DEFAULT_ADDRESS);
if (org.apache.commons.lang3.StringUtils.isEmpty(address)) {
throw new SerDeException("Druid broker address not specified in configuration");
}
inferSchema((SelectQuery) query, columnNames, columnTypes, address);
break;
case Query.GROUP_BY:
inferSchema((GroupByQuery) query, columnNames, columnTypes);
break;
default:
throw new SerDeException("Not supported Druid query");
}
} catch (Exception e) {
throw new SerDeException(e);
}
columns = new String[columnNames.size()];
types = new PrimitiveTypeInfo[columnNames.size()];
for (int i = 0; i < columnTypes.size(); ++i) {
columns[i] = columnNames.get(i);
types[i] = columnTypes.get(i);
inspectors.add(PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(types[i]));
}
inspector = ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, inspectors);
}
if (LOG.isDebugEnabled()) {
LOG.debug("DruidSerDe initialized with\n" + "\t columns: " + columnNames + "\n\t types: " + columnTypes);
}
}
use of io.druid.query.groupby.GroupByQuery in project druid by druid-io.
the class RealtimeManagerTest method testQueryWithSegmentSpec.
@Test(timeout = 10_000L)
public void testQueryWithSegmentSpec() throws IOException, InterruptedException {
List<Row> expectedResults = Arrays.asList(GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "automotive", "rows", 1L, "idx", 135L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "business", "rows", 1L, "idx", 118L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "entertainment", "rows", 1L, "idx", 158L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "health", "rows", 1L, "idx", 120L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "mezzanine", "rows", 3L, "idx", 2870L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "news", "rows", 1L, "idx", 121L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "premium", "rows", 3L, "idx", 2900L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "technology", "rows", 1L, "idx", 78L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "travel", "rows", 1L, "idx", 119L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "automotive", "rows", 1L, "idx", 147L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "business", "rows", 1L, "idx", 112L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "entertainment", "rows", 1L, "idx", 166L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "health", "rows", 1L, "idx", 113L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "mezzanine", "rows", 3L, "idx", 2447L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "news", "rows", 1L, "idx", 114L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "premium", "rows", 3L, "idx", 2505L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "technology", "rows", 1L, "idx", 97L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "travel", "rows", 1L, "idx", 126L));
chiefStartedLatch.await();
for (QueryRunner runner : QueryRunnerTestHelper.makeQueryRunners((GroupByQueryRunnerFactory) factory)) {
GroupByQuery query = GroupByQuery.builder().setDataSource(QueryRunnerTestHelper.dataSource).setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird).setDimensions(Lists.<DimensionSpec>newArrayList(new DefaultDimensionSpec("quality", "alias"))).setAggregatorSpecs(Arrays.asList(QueryRunnerTestHelper.rowsCount, new LongSumAggregatorFactory("idx", "index"))).setGranularity(QueryRunnerTestHelper.dayGran).build();
plumber.setRunners(ImmutableMap.of(query.getIntervals().get(0), runner));
plumber2.setRunners(ImmutableMap.of(query.getIntervals().get(0), runner));
Iterable<Row> results = GroupByQueryRunnerTestHelper.runQuery(factory, realtimeManager3.getQueryRunnerForSegments(query, ImmutableList.<SegmentDescriptor>of(new SegmentDescriptor(new Interval("2011-04-01T00:00:00.000Z/2011-04-03T00:00:00.000Z"), "ver", 0))), query);
TestHelper.assertExpectedObjects(expectedResults, results, "");
results = GroupByQueryRunnerTestHelper.runQuery(factory, realtimeManager3.getQueryRunnerForSegments(query, ImmutableList.<SegmentDescriptor>of(new SegmentDescriptor(new Interval("2011-04-01T00:00:00.000Z/2011-04-03T00:00:00.000Z"), "ver", 1))), query);
TestHelper.assertExpectedObjects(expectedResults, results, "");
}
}
use of io.druid.query.groupby.GroupByQuery in project druid by druid-io.
the class RealtimeManagerTest method testQueryWithInterval.
@Test(timeout = 10_000L)
public void testQueryWithInterval() throws IOException, InterruptedException {
List<Row> expectedResults = Arrays.asList(GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "automotive", "rows", 2L, "idx", 270L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "business", "rows", 2L, "idx", 236L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "entertainment", "rows", 2L, "idx", 316L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "health", "rows", 2L, "idx", 240L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "mezzanine", "rows", 6L, "idx", 5740L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "news", "rows", 2L, "idx", 242L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "premium", "rows", 6L, "idx", 5800L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "technology", "rows", 2L, "idx", 156L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "travel", "rows", 2L, "idx", 238L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "automotive", "rows", 2L, "idx", 294L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "business", "rows", 2L, "idx", 224L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "entertainment", "rows", 2L, "idx", 332L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "health", "rows", 2L, "idx", 226L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "mezzanine", "rows", 6L, "idx", 4894L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "news", "rows", 2L, "idx", 228L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "premium", "rows", 6L, "idx", 5010L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "technology", "rows", 2L, "idx", 194L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "travel", "rows", 2L, "idx", 252L));
chiefStartedLatch.await();
for (QueryRunner runner : QueryRunnerTestHelper.makeQueryRunners((GroupByQueryRunnerFactory) factory)) {
GroupByQuery query = GroupByQuery.builder().setDataSource(QueryRunnerTestHelper.dataSource).setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird).setDimensions(Lists.<DimensionSpec>newArrayList(new DefaultDimensionSpec("quality", "alias"))).setAggregatorSpecs(Arrays.asList(QueryRunnerTestHelper.rowsCount, new LongSumAggregatorFactory("idx", "index"))).setGranularity(QueryRunnerTestHelper.dayGran).build();
plumber.setRunners(ImmutableMap.of(query.getIntervals().get(0), runner));
plumber2.setRunners(ImmutableMap.of(query.getIntervals().get(0), runner));
Iterable<Row> results = GroupByQueryRunnerTestHelper.runQuery(factory, realtimeManager3.getQueryRunnerForIntervals(query, QueryRunnerTestHelper.firstToThird.getIntervals()), query);
TestHelper.assertExpectedObjects(expectedResults, results, "");
}
}
use of io.druid.query.groupby.GroupByQuery in project druid by druid-io.
the class QueryMaker method runQuery.
public Sequence<Object[]> runQuery(final DataSource dataSource, final RowSignature sourceRowSignature, final DruidQueryBuilder queryBuilder) {
if (dataSource instanceof QueryDataSource) {
final GroupByQuery outerQuery = queryBuilder.toGroupByQuery(dataSource, sourceRowSignature, plannerContext.getQueryContext());
if (outerQuery == null) {
// Bug in the planner rules. They shouldn't allow this to happen.
throw new IllegalStateException("Can't use QueryDataSource without an outer groupBy query!");
}
return executeGroupBy(queryBuilder, outerQuery);
}
final TimeseriesQuery timeseriesQuery = queryBuilder.toTimeseriesQuery(dataSource, sourceRowSignature, plannerContext.getQueryContext());
if (timeseriesQuery != null) {
return executeTimeseries(queryBuilder, timeseriesQuery);
}
final TopNQuery topNQuery = queryBuilder.toTopNQuery(dataSource, sourceRowSignature, plannerContext.getQueryContext(), plannerContext.getPlannerConfig().getMaxTopNLimit(), plannerContext.getPlannerConfig().isUseApproximateTopN());
if (topNQuery != null) {
return executeTopN(queryBuilder, topNQuery);
}
final GroupByQuery groupByQuery = queryBuilder.toGroupByQuery(dataSource, sourceRowSignature, plannerContext.getQueryContext());
if (groupByQuery != null) {
return executeGroupBy(queryBuilder, groupByQuery);
}
final SelectQuery selectQuery = queryBuilder.toSelectQuery(dataSource, sourceRowSignature, plannerContext.getQueryContext());
if (selectQuery != null) {
return executeSelect(queryBuilder, selectQuery);
}
throw new IllegalStateException("WTF?! Cannot execute query even though we planned it?");
}
use of io.druid.query.groupby.GroupByQuery in project druid by druid-io.
the class GroupByTypeInterfaceBenchmark method setupQueries.
private void setupQueries() {
// queries for the basic schema
Map<String, GroupByQuery> basicQueries = new LinkedHashMap<>();
BenchmarkSchemaInfo basicSchema = BenchmarkSchemas.SCHEMA_MAP.get("basic");
{
// basic.A
QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Arrays.asList(basicSchema.getDataInterval()));
List<AggregatorFactory> queryAggs = new ArrayList<>();
queryAggs.add(new LongSumAggregatorFactory("sumLongSequential", "sumLongSequential"));
GroupByQuery queryString = GroupByQuery.builder().setDataSource("blah").setQuerySegmentSpec(intervalSpec).setDimensions(Lists.<DimensionSpec>newArrayList(new DefaultDimensionSpec("dimSequential", null))).setAggregatorSpecs(queryAggs).setGranularity(Granularity.fromString(queryGranularity)).build();
GroupByQuery queryLongFloat = GroupByQuery.builder().setDataSource("blah").setQuerySegmentSpec(intervalSpec).setDimensions(Lists.<DimensionSpec>newArrayList(new DefaultDimensionSpec("metLongUniform", null), new DefaultDimensionSpec("metFloatNormal", null))).setAggregatorSpecs(queryAggs).setGranularity(Granularity.fromString(queryGranularity)).build();
GroupByQuery queryLong = GroupByQuery.builder().setDataSource("blah").setQuerySegmentSpec(intervalSpec).setDimensions(Lists.<DimensionSpec>newArrayList(new DefaultDimensionSpec("metLongUniform", null))).setAggregatorSpecs(queryAggs).setGranularity(Granularity.fromString(queryGranularity)).build();
GroupByQuery queryFloat = GroupByQuery.builder().setDataSource("blah").setQuerySegmentSpec(intervalSpec).setDimensions(Lists.<DimensionSpec>newArrayList(new DefaultDimensionSpec("metFloatNormal", null))).setAggregatorSpecs(queryAggs).setGranularity(Granularity.fromString(queryGranularity)).build();
basicQueries.put("string", queryString);
basicQueries.put("longFloat", queryLongFloat);
basicQueries.put("long", queryLong);
basicQueries.put("float", queryFloat);
}
{
// basic.nested
QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Arrays.asList(basicSchema.getDataInterval()));
List<AggregatorFactory> queryAggs = new ArrayList<>();
queryAggs.add(new LongSumAggregatorFactory("sumLongSequential", "sumLongSequential"));
GroupByQuery subqueryA = GroupByQuery.builder().setDataSource("blah").setQuerySegmentSpec(intervalSpec).setDimensions(Lists.<DimensionSpec>newArrayList(new DefaultDimensionSpec("dimSequential", null), new DefaultDimensionSpec("dimZipf", null))).setAggregatorSpecs(queryAggs).setGranularity(Granularities.DAY).build();
GroupByQuery queryA = GroupByQuery.builder().setDataSource(subqueryA).setQuerySegmentSpec(intervalSpec).setDimensions(Lists.<DimensionSpec>newArrayList(new DefaultDimensionSpec("dimSequential", null))).setAggregatorSpecs(queryAggs).setGranularity(Granularities.WEEK).build();
basicQueries.put("nested", queryA);
}
SCHEMA_QUERY_MAP.put("basic", basicQueries);
}
Aggregations