use of org.apache.hadoop.hive.druid.serde.DruidGroupByQueryRecordReader in project hive by apache.
the class DruidQueryBasedInputFormat method getRecordReader.
@Override
public org.apache.hadoop.mapred.RecordReader<NullWritable, DruidWritable> getRecordReader(org.apache.hadoop.mapred.InputSplit split, JobConf job, Reporter reporter) throws IOException {
// We need to provide a different record reader for every type of Druid query.
// The reason is that Druid results format is different for each type.
final DruidQueryRecordReader<?, ?> reader;
final String druidQueryType = job.get(Constants.DRUID_QUERY_TYPE);
if (druidQueryType == null) {
// By default
reader = new DruidSelectQueryRecordReader();
reader.initialize((HiveDruidSplit) split, job);
return reader;
}
switch(druidQueryType) {
case Query.TIMESERIES:
reader = new DruidTimeseriesQueryRecordReader();
break;
case Query.TOPN:
reader = new DruidTopNQueryRecordReader();
break;
case Query.GROUP_BY:
reader = new DruidGroupByQueryRecordReader();
break;
case Query.SELECT:
reader = new DruidSelectQueryRecordReader();
break;
default:
throw new IOException("Druid query type not recognized");
}
reader.initialize((HiveDruidSplit) split, job);
return reader;
}
use of org.apache.hadoop.hive.druid.serde.DruidGroupByQueryRecordReader in project hive by apache.
the class DruidQueryBasedInputFormat method createRecordReader.
@Override
public RecordReader<NullWritable, DruidWritable> createRecordReader(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
// We need to provide a different record reader for every type of Druid query.
// The reason is that Druid results format is different for each type.
final String druidQueryType = context.getConfiguration().get(Constants.DRUID_QUERY_TYPE);
if (druidQueryType == null) {
// By default
return new DruidSelectQueryRecordReader();
}
final DruidQueryRecordReader<?, ?> reader;
switch(druidQueryType) {
case Query.TIMESERIES:
reader = new DruidTimeseriesQueryRecordReader();
break;
case Query.TOPN:
reader = new DruidTopNQueryRecordReader();
break;
case Query.GROUP_BY:
reader = new DruidGroupByQueryRecordReader();
break;
case Query.SELECT:
reader = new DruidSelectQueryRecordReader();
break;
default:
throw new IOException("Druid query type not recognized");
}
return reader;
}
use of org.apache.hadoop.hive.druid.serde.DruidGroupByQueryRecordReader in project hive by apache.
the class TestDruidSerDe method deserializeQueryResults.
private static void deserializeQueryResults(DruidSerDe serDe, String queryType, String jsonQuery, String resultString, Object[][] records) throws SerDeException, JsonParseException, JsonMappingException, IOException, NoSuchFieldException, SecurityException, IllegalArgumentException, IllegalAccessException, InterruptedException, NoSuchMethodException, InvocationTargetException {
// Initialize
Query<?> query = null;
DruidQueryRecordReader<?, ?> reader = null;
List<?> resultsList = null;
ObjectMapper mapper = new DefaultObjectMapper();
switch(queryType) {
case Query.TIMESERIES:
query = mapper.readValue(jsonQuery, TimeseriesQuery.class);
reader = new DruidTimeseriesQueryRecordReader();
resultsList = mapper.readValue(resultString, new TypeReference<List<Result<TimeseriesResultValue>>>() {
});
break;
case Query.TOPN:
query = mapper.readValue(jsonQuery, TopNQuery.class);
reader = new DruidTopNQueryRecordReader();
resultsList = mapper.readValue(resultString, new TypeReference<List<Result<TopNResultValue>>>() {
});
break;
case Query.GROUP_BY:
query = mapper.readValue(jsonQuery, GroupByQuery.class);
reader = new DruidGroupByQueryRecordReader();
resultsList = mapper.readValue(resultString, new TypeReference<List<Row>>() {
});
break;
case Query.SELECT:
query = mapper.readValue(jsonQuery, SelectQuery.class);
reader = new DruidSelectQueryRecordReader();
resultsList = mapper.readValue(resultString, new TypeReference<List<Result<SelectResultValue>>>() {
});
break;
}
// Set query and fields access
Field field1 = DruidQueryRecordReader.class.getDeclaredField("query");
field1.setAccessible(true);
field1.set(reader, query);
if (reader instanceof DruidGroupByQueryRecordReader) {
Method method1 = DruidGroupByQueryRecordReader.class.getDeclaredMethod("initExtractors");
method1.setAccessible(true);
method1.invoke(reader);
}
Field field2 = DruidQueryRecordReader.class.getDeclaredField("results");
field2.setAccessible(true);
// Get the row structure
StructObjectInspector oi = (StructObjectInspector) serDe.getObjectInspector();
List<? extends StructField> fieldRefs = oi.getAllStructFieldRefs();
// Check mapred
Iterator<?> results = resultsList.iterator();
field2.set(reader, results);
DruidWritable writable = new DruidWritable();
int pos = 0;
while (reader.next(NullWritable.get(), writable)) {
Object row = serDe.deserialize(writable);
Object[] expectedFieldsData = records[pos];
assertEquals(expectedFieldsData.length, fieldRefs.size());
for (int i = 0; i < fieldRefs.size(); i++) {
Object fieldData = oi.getStructFieldData(row, fieldRefs.get(i));
assertEquals("Field " + i, expectedFieldsData[i], fieldData);
}
pos++;
}
assertEquals(pos, records.length);
// Check mapreduce
results = resultsList.iterator();
field2.set(reader, results);
pos = 0;
while (reader.nextKeyValue()) {
Object row = serDe.deserialize(reader.getCurrentValue());
Object[] expectedFieldsData = records[pos];
assertEquals(expectedFieldsData.length, fieldRefs.size());
for (int i = 0; i < fieldRefs.size(); i++) {
Object fieldData = oi.getStructFieldData(row, fieldRefs.get(i));
assertEquals("Field " + i, expectedFieldsData[i], fieldData);
}
pos++;
}
assertEquals(pos, records.length);
}
Aggregations