Search in sources :

Example 11 with SegmentAnalysis

use of org.apache.druid.query.metadata.metadata.SegmentAnalysis in project druid by druid-io.

the class DoubleStorageTest method dataFeeder.

@Parameterized.Parameters
public static Collection<?> dataFeeder() {
    SegmentAnalysis expectedSegmentAnalysisDouble = new SegmentAnalysis(SEGMENT_ID.toString(), ImmutableList.of(INTERVAL), ImmutableMap.of(TIME_COLUMN, new ColumnAnalysis(ColumnType.LONG, ValueType.LONG.name(), false, false, 100, null, null, null, null), DIM_NAME, new ColumnAnalysis(ColumnType.STRING, ValueType.STRING.name(), false, false, 120, 1, DIM_VALUE, DIM_VALUE, null), DIM_FLOAT_NAME, new ColumnAnalysis(ColumnType.DOUBLE, ValueType.DOUBLE.name(), false, false, 80, null, null, null, null)), 330, MAX_ROWS, null, null, null, null);
    SegmentAnalysis expectedSegmentAnalysisFloat = new SegmentAnalysis(SEGMENT_ID.toString(), ImmutableList.of(INTERVAL), ImmutableMap.of(TIME_COLUMN, new ColumnAnalysis(ColumnType.LONG, ValueType.LONG.name(), false, false, 100, null, null, null, null), DIM_NAME, new ColumnAnalysis(ColumnType.STRING, ValueType.STRING.name(), false, false, 120, 1, DIM_VALUE, DIM_VALUE, null), DIM_FLOAT_NAME, new ColumnAnalysis(ColumnType.FLOAT, ValueType.FLOAT.name(), false, false, 80, null, null, null, null)), 330, MAX_ROWS, null, null, null, null);
    return ImmutableList.of(new Object[] { "double", expectedSegmentAnalysisDouble }, new Object[] { "float", expectedSegmentAnalysisFloat });
}
Also used : ColumnAnalysis(org.apache.druid.query.metadata.metadata.ColumnAnalysis) SegmentAnalysis(org.apache.druid.query.metadata.metadata.SegmentAnalysis)

Example 12 with SegmentAnalysis

use of org.apache.druid.query.metadata.metadata.SegmentAnalysis in project druid by druid-io.

the class DoubleStorageTest method testMetaDataAnalysis.

@Test
public void testMetaDataAnalysis() {
    QueryRunner runner = QueryRunnerTestHelper.makeQueryRunner(METADATA_QR_FACTORY, SEGMENT_ID, new QueryableIndexSegment(index, SEGMENT_ID), null);
    SegmentMetadataQuery segmentMetadataQuery = Druids.newSegmentMetadataQueryBuilder().dataSource("testing").intervals(ImmutableList.of(INTERVAL)).toInclude(new ListColumnIncluderator(Arrays.asList(TIME_COLUMN, DIM_NAME, DIM_FLOAT_NAME))).analysisTypes(SegmentMetadataQuery.AnalysisType.CARDINALITY, SegmentMetadataQuery.AnalysisType.SIZE, SegmentMetadataQuery.AnalysisType.INTERVAL, SegmentMetadataQuery.AnalysisType.MINMAX).merge(true).build();
    List<SegmentAnalysis> results = runner.run(QueryPlus.wrap(segmentMetadataQuery)).toList();
    Assert.assertEquals(Collections.singletonList(expectedSegmentAnalysis), results);
}
Also used : QueryableIndexSegment(org.apache.druid.segment.QueryableIndexSegment) SegmentMetadataQuery(org.apache.druid.query.metadata.metadata.SegmentMetadataQuery) ListColumnIncluderator(org.apache.druid.query.metadata.metadata.ListColumnIncluderator) SegmentAnalysis(org.apache.druid.query.metadata.metadata.SegmentAnalysis) ScanQueryRunnerTest(org.apache.druid.query.scan.ScanQueryRunnerTest) Test(org.junit.Test)

Example 13 with SegmentAnalysis

use of org.apache.druid.query.metadata.metadata.SegmentAnalysis in project druid by druid-io.

the class SegmentMetadataQueryQueryToolChest method mergeResults.

@Override
public QueryRunner<SegmentAnalysis> mergeResults(final QueryRunner<SegmentAnalysis> runner) {
    return new BySegmentSkippingQueryRunner<SegmentAnalysis>(runner) {

        @Override
        public Sequence<SegmentAnalysis> doRun(QueryRunner<SegmentAnalysis> baseRunner, QueryPlus<SegmentAnalysis> queryPlus, ResponseContext context) {
            SegmentMetadataQuery updatedQuery = ((SegmentMetadataQuery) queryPlus.getQuery()).withFinalizedAnalysisTypes(config);
            QueryPlus<SegmentAnalysis> updatedQueryPlus = queryPlus.withQuery(updatedQuery);
            return new MappedSequence<>(CombiningSequence.create(baseRunner.run(updatedQueryPlus, context), makeOrdering(updatedQuery), createMergeFn(updatedQuery)), MERGE_TRANSFORM_FN::apply);
        }

        private Ordering<SegmentAnalysis> makeOrdering(SegmentMetadataQuery query) {
            return (Ordering<SegmentAnalysis>) SegmentMetadataQueryQueryToolChest.this.createResultComparator(query);
        }

        private BinaryOperator<SegmentAnalysis> createMergeFn(final SegmentMetadataQuery inQ) {
            return SegmentMetadataQueryQueryToolChest.this.createMergeFn(inQ);
        }
    };
}
Also used : MappedSequence(org.apache.druid.java.util.common.guava.MappedSequence) SegmentMetadataQuery(org.apache.druid.query.metadata.metadata.SegmentMetadataQuery) ResponseContext(org.apache.druid.query.context.ResponseContext) Ordering(com.google.common.collect.Ordering) SegmentAnalysis(org.apache.druid.query.metadata.metadata.SegmentAnalysis) BySegmentSkippingQueryRunner(org.apache.druid.query.BySegmentSkippingQueryRunner) QueryRunner(org.apache.druid.query.QueryRunner) BySegmentSkippingQueryRunner(org.apache.druid.query.BySegmentSkippingQueryRunner) QueryPlus(org.apache.druid.query.QueryPlus)

Example 14 with SegmentAnalysis

use of org.apache.druid.query.metadata.metadata.SegmentAnalysis in project hive by apache.

the class DruidSerDe method initFromMetaDataQuery.

private void initFromMetaDataQuery(final Configuration configuration, final Properties properties) throws SerDeException {
    final List<String> columnNames = new ArrayList<>();
    final List<PrimitiveTypeInfo> columnTypes = new ArrayList<>();
    final List<ObjectInspector> inspectors = new ArrayList<>();
    String dataSource = properties.getProperty(Constants.DRUID_DATA_SOURCE);
    if (dataSource == null) {
        throw new SerDeException("Druid data source not specified; use " + Constants.DRUID_DATA_SOURCE + " in table properties");
    }
    SegmentMetadataQueryBuilder builder = new Druids.SegmentMetadataQueryBuilder();
    builder.dataSource(dataSource);
    builder.merge(true);
    builder.analysisTypes();
    SegmentMetadataQuery query = builder.build();
    // Execute query in Druid
    String address = HiveConf.getVar(configuration, HiveConf.ConfVars.HIVE_DRUID_BROKER_DEFAULT_ADDRESS);
    if (org.apache.commons.lang3.StringUtils.isEmpty(address)) {
        throw new SerDeException("Druid broker address not specified in configuration");
    }
    // Infer schema
    SegmentAnalysis schemaInfo;
    try {
        schemaInfo = submitMetadataRequest(address, query);
    } catch (IOException e) {
        throw new SerDeException(e);
    }
    for (Entry<String, ColumnAnalysis> columnInfo : schemaInfo.getColumns().entrySet()) {
        if (columnInfo.getKey().equals(DruidConstants.DEFAULT_TIMESTAMP_COLUMN)) {
            // Special handling for timestamp column
            // field name
            columnNames.add(columnInfo.getKey());
            // field type
            PrimitiveTypeInfo type = tsTZTypeInfo;
            columnTypes.add(type);
            inspectors.add(PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(type));
            continue;
        }
        // field name
        columnNames.add(columnInfo.getKey());
        // field type
        PrimitiveTypeInfo type = DruidSerDeUtils.convertDruidToHiveType(columnInfo.getValue().getType());
        columnTypes.add(type instanceof TimestampLocalTZTypeInfo ? tsTZTypeInfo : type);
        inspectors.add(PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(type));
    }
    columns = columnNames.toArray(new String[0]);
    types = columnTypes.toArray(new PrimitiveTypeInfo[0]);
    inspector = ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, inspectors);
}
Also used : BooleanObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector) ShortObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) FloatObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector) StringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector) TimestampLocalTZObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampLocalTZObjectInspector) HiveVarcharObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveVarcharObjectInspector) HiveCharObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveCharObjectInspector) IntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector) LongObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector) ByteObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector) DoubleObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector) TimestampObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector) ArrayList(java.util.ArrayList) TimestampLocalTZTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TimestampLocalTZTypeInfo) IOException(java.io.IOException) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) SegmentMetadataQuery(org.apache.druid.query.metadata.metadata.SegmentMetadataQuery) ColumnAnalysis(org.apache.druid.query.metadata.metadata.ColumnAnalysis) SegmentMetadataQueryBuilder(org.apache.druid.query.Druids.SegmentMetadataQueryBuilder) SegmentAnalysis(org.apache.druid.query.metadata.metadata.SegmentAnalysis) SerDeException(org.apache.hadoop.hive.serde2.SerDeException)

Example 15 with SegmentAnalysis

use of org.apache.druid.query.metadata.metadata.SegmentAnalysis in project hive by apache.

the class DruidSerDe method submitMetadataRequest.

/* Submits the request and returns */
protected SegmentAnalysis submitMetadataRequest(String address, SegmentMetadataQuery query) throws SerDeException, IOException {
    InputStream response;
    try {
        response = DruidStorageHandlerUtils.submitRequest(DruidStorageHandler.getHttpClient(), DruidStorageHandlerUtils.createSmileRequest(address, query));
    } catch (Exception e) {
        throw new SerDeException(StringUtils.stringifyException(e));
    }
    // Retrieve results
    List<SegmentAnalysis> resultsList;
    try {
        // This will throw an exception in case of the response from druid is not an array
        // this case occurs if for instance druid query execution returns an exception instead of array of results.
        resultsList = DruidStorageHandlerUtils.SMILE_MAPPER.readValue(response, new TypeReference<List<SegmentAnalysis>>() {
        });
    } catch (Exception e) {
        response.close();
        throw new SerDeException(StringUtils.stringifyException(e));
    }
    if (resultsList == null || resultsList.isEmpty()) {
        throw new SerDeException("Connected to Druid but could not retrieve datasource information");
    }
    if (resultsList.size() != 1) {
        throw new SerDeException("Information about segments should have been merged");
    }
    return resultsList.get(0);
}
Also used : InputStream(java.io.InputStream) SegmentAnalysis(org.apache.druid.query.metadata.metadata.SegmentAnalysis) TypeReference(com.fasterxml.jackson.core.type.TypeReference) IOException(java.io.IOException) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) SerDeException(org.apache.hadoop.hive.serde2.SerDeException)

Aggregations

SegmentAnalysis (org.apache.druid.query.metadata.metadata.SegmentAnalysis)30 ColumnAnalysis (org.apache.druid.query.metadata.metadata.ColumnAnalysis)20 Test (org.junit.Test)18 SegmentMetadataQuery (org.apache.druid.query.metadata.metadata.SegmentMetadataQuery)16 QueryRunner (org.apache.druid.query.QueryRunner)11 ListColumnIncluderator (org.apache.druid.query.metadata.metadata.ListColumnIncluderator)11 ExecutorService (java.util.concurrent.ExecutorService)9 FinalizeResultsQueryRunner (org.apache.druid.query.FinalizeResultsQueryRunner)8 QueryToolChest (org.apache.druid.query.QueryToolChest)8 AggregatorFactory (org.apache.druid.query.aggregation.AggregatorFactory)5 IOException (java.io.IOException)4 Map (java.util.Map)4 TimestampSpec (org.apache.druid.data.input.impl.TimestampSpec)4 HashMap (java.util.HashMap)3 TableDataSource (org.apache.druid.query.TableDataSource)3 DoubleSumAggregatorFactory (org.apache.druid.query.aggregation.DoubleSumAggregatorFactory)3 LongSumAggregatorFactory (org.apache.druid.query.aggregation.LongSumAggregatorFactory)3 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)2 VisibleForTesting (com.google.common.annotations.VisibleForTesting)2 List (java.util.List)2