Search in sources :

Example 1 with DruidSelectQueryRecordReader

use of org.apache.hadoop.hive.druid.serde.DruidSelectQueryRecordReader in project hive by apache.

the class DruidQueryBasedInputFormat method getRecordReader.

@Override
public org.apache.hadoop.mapred.RecordReader<NullWritable, DruidWritable> getRecordReader(org.apache.hadoop.mapred.InputSplit split, JobConf job, Reporter reporter) throws IOException {
    // We need to provide a different record reader for every type of Druid query.
    // The reason is that Druid results format is different for each type.
    final DruidQueryRecordReader<?, ?> reader;
    final String druidQueryType = job.get(Constants.DRUID_QUERY_TYPE);
    if (druidQueryType == null) {
        // By default
        reader = new DruidSelectQueryRecordReader();
        reader.initialize((HiveDruidSplit) split, job);
        return reader;
    }
    switch(druidQueryType) {
        case Query.TIMESERIES:
            reader = new DruidTimeseriesQueryRecordReader();
            break;
        case Query.TOPN:
            reader = new DruidTopNQueryRecordReader();
            break;
        case Query.GROUP_BY:
            reader = new DruidGroupByQueryRecordReader();
            break;
        case Query.SELECT:
            reader = new DruidSelectQueryRecordReader();
            break;
        default:
            throw new IOException("Druid query type not recognized");
    }
    reader.initialize((HiveDruidSplit) split, job);
    return reader;
}
Also used : DruidTopNQueryRecordReader(org.apache.hadoop.hive.druid.serde.DruidTopNQueryRecordReader) DruidTimeseriesQueryRecordReader(org.apache.hadoop.hive.druid.serde.DruidTimeseriesQueryRecordReader) DruidSelectQueryRecordReader(org.apache.hadoop.hive.druid.serde.DruidSelectQueryRecordReader) DruidGroupByQueryRecordReader(org.apache.hadoop.hive.druid.serde.DruidGroupByQueryRecordReader) IOException(java.io.IOException)

Example 2 with DruidSelectQueryRecordReader

use of org.apache.hadoop.hive.druid.serde.DruidSelectQueryRecordReader in project hive by apache.

the class DruidQueryBasedInputFormat method createRecordReader.

@Override
public RecordReader<NullWritable, DruidWritable> createRecordReader(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
    // We need to provide a different record reader for every type of Druid query.
    // The reason is that Druid results format is different for each type.
    final String druidQueryType = context.getConfiguration().get(Constants.DRUID_QUERY_TYPE);
    if (druidQueryType == null) {
        // By default
        return new DruidSelectQueryRecordReader();
    }
    final DruidQueryRecordReader<?, ?> reader;
    switch(druidQueryType) {
        case Query.TIMESERIES:
            reader = new DruidTimeseriesQueryRecordReader();
            break;
        case Query.TOPN:
            reader = new DruidTopNQueryRecordReader();
            break;
        case Query.GROUP_BY:
            reader = new DruidGroupByQueryRecordReader();
            break;
        case Query.SELECT:
            reader = new DruidSelectQueryRecordReader();
            break;
        default:
            throw new IOException("Druid query type not recognized");
    }
    return reader;
}
Also used : DruidTopNQueryRecordReader(org.apache.hadoop.hive.druid.serde.DruidTopNQueryRecordReader) DruidTimeseriesQueryRecordReader(org.apache.hadoop.hive.druid.serde.DruidTimeseriesQueryRecordReader) DruidSelectQueryRecordReader(org.apache.hadoop.hive.druid.serde.DruidSelectQueryRecordReader) DruidGroupByQueryRecordReader(org.apache.hadoop.hive.druid.serde.DruidGroupByQueryRecordReader) IOException(java.io.IOException)

Example 3 with DruidSelectQueryRecordReader

use of org.apache.hadoop.hive.druid.serde.DruidSelectQueryRecordReader in project hive by apache.

the class TestDruidSerDe method deserializeQueryResults.

private static void deserializeQueryResults(DruidSerDe serDe, String queryType, String jsonQuery, String resultString, Object[][] records) throws SerDeException, JsonParseException, JsonMappingException, IOException, NoSuchFieldException, SecurityException, IllegalArgumentException, IllegalAccessException, InterruptedException, NoSuchMethodException, InvocationTargetException {
    // Initialize
    Query<?> query = null;
    DruidQueryRecordReader<?, ?> reader = null;
    List<?> resultsList = null;
    ObjectMapper mapper = new DefaultObjectMapper();
    switch(queryType) {
        case Query.TIMESERIES:
            query = mapper.readValue(jsonQuery, TimeseriesQuery.class);
            reader = new DruidTimeseriesQueryRecordReader();
            resultsList = mapper.readValue(resultString, new TypeReference<List<Result<TimeseriesResultValue>>>() {
            });
            break;
        case Query.TOPN:
            query = mapper.readValue(jsonQuery, TopNQuery.class);
            reader = new DruidTopNQueryRecordReader();
            resultsList = mapper.readValue(resultString, new TypeReference<List<Result<TopNResultValue>>>() {
            });
            break;
        case Query.GROUP_BY:
            query = mapper.readValue(jsonQuery, GroupByQuery.class);
            reader = new DruidGroupByQueryRecordReader();
            resultsList = mapper.readValue(resultString, new TypeReference<List<Row>>() {
            });
            break;
        case Query.SELECT:
            query = mapper.readValue(jsonQuery, SelectQuery.class);
            reader = new DruidSelectQueryRecordReader();
            resultsList = mapper.readValue(resultString, new TypeReference<List<Result<SelectResultValue>>>() {
            });
            break;
    }
    // Set query and fields access
    Field field1 = DruidQueryRecordReader.class.getDeclaredField("query");
    field1.setAccessible(true);
    field1.set(reader, query);
    if (reader instanceof DruidGroupByQueryRecordReader) {
        Method method1 = DruidGroupByQueryRecordReader.class.getDeclaredMethod("initExtractors");
        method1.setAccessible(true);
        method1.invoke(reader);
    }
    Field field2 = DruidQueryRecordReader.class.getDeclaredField("results");
    field2.setAccessible(true);
    // Get the row structure
    StructObjectInspector oi = (StructObjectInspector) serDe.getObjectInspector();
    List<? extends StructField> fieldRefs = oi.getAllStructFieldRefs();
    // Check mapred
    Iterator<?> results = resultsList.iterator();
    field2.set(reader, results);
    DruidWritable writable = new DruidWritable();
    int pos = 0;
    while (reader.next(NullWritable.get(), writable)) {
        Object row = serDe.deserialize(writable);
        Object[] expectedFieldsData = records[pos];
        assertEquals(expectedFieldsData.length, fieldRefs.size());
        for (int i = 0; i < fieldRefs.size(); i++) {
            Object fieldData = oi.getStructFieldData(row, fieldRefs.get(i));
            assertEquals("Field " + i, expectedFieldsData[i], fieldData);
        }
        pos++;
    }
    assertEquals(pos, records.length);
    // Check mapreduce
    results = resultsList.iterator();
    field2.set(reader, results);
    pos = 0;
    while (reader.nextKeyValue()) {
        Object row = serDe.deserialize(reader.getCurrentValue());
        Object[] expectedFieldsData = records[pos];
        assertEquals(expectedFieldsData.length, fieldRefs.size());
        for (int i = 0; i < fieldRefs.size(); i++) {
            Object fieldData = oi.getStructFieldData(row, fieldRefs.get(i));
            assertEquals("Field " + i, expectedFieldsData[i], fieldData);
        }
        pos++;
    }
    assertEquals(pos, records.length);
}
Also used : Result(io.druid.query.Result) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) Field(java.lang.reflect.Field) GroupByQuery(io.druid.query.groupby.GroupByQuery) TopNQuery(io.druid.query.topn.TopNQuery) TypeReference(com.fasterxml.jackson.core.type.TypeReference) DefaultObjectMapper(io.druid.jackson.DefaultObjectMapper) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) DruidTopNQueryRecordReader(org.apache.hadoop.hive.druid.serde.DruidTopNQueryRecordReader) TimeseriesQuery(io.druid.query.timeseries.TimeseriesQuery) DruidSelectQueryRecordReader(org.apache.hadoop.hive.druid.serde.DruidSelectQueryRecordReader) Method(java.lang.reflect.Method) SelectQuery(io.druid.query.select.SelectQuery) DruidWritable(org.apache.hadoop.hive.druid.serde.DruidWritable) DruidTimeseriesQueryRecordReader(org.apache.hadoop.hive.druid.serde.DruidTimeseriesQueryRecordReader) DefaultObjectMapper(io.druid.jackson.DefaultObjectMapper) DruidGroupByQueryRecordReader(org.apache.hadoop.hive.druid.serde.DruidGroupByQueryRecordReader) Row(io.druid.data.input.Row) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Aggregations

DruidGroupByQueryRecordReader (org.apache.hadoop.hive.druid.serde.DruidGroupByQueryRecordReader)3 DruidSelectQueryRecordReader (org.apache.hadoop.hive.druid.serde.DruidSelectQueryRecordReader)3 DruidTimeseriesQueryRecordReader (org.apache.hadoop.hive.druid.serde.DruidTimeseriesQueryRecordReader)3 DruidTopNQueryRecordReader (org.apache.hadoop.hive.druid.serde.DruidTopNQueryRecordReader)3 IOException (java.io.IOException)2 TypeReference (com.fasterxml.jackson.core.type.TypeReference)1 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)1 Row (io.druid.data.input.Row)1 DefaultObjectMapper (io.druid.jackson.DefaultObjectMapper)1 Result (io.druid.query.Result)1 GroupByQuery (io.druid.query.groupby.GroupByQuery)1 SelectQuery (io.druid.query.select.SelectQuery)1 TimeseriesQuery (io.druid.query.timeseries.TimeseriesQuery)1 TopNQuery (io.druid.query.topn.TopNQuery)1 Field (java.lang.reflect.Field)1 Method (java.lang.reflect.Method)1 DruidWritable (org.apache.hadoop.hive.druid.serde.DruidWritable)1 StructField (org.apache.hadoop.hive.serde2.objectinspector.StructField)1 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)1