use of org.apache.hadoop.hive.ql.io.arrow.ArrowWrapperWritable in project hive by apache.
the class TestJdbcWithMiniLlapVectorArrowBatch method runQueryUsingLlapArrowBatchReader.
private MultiSet<List<Object>> runQueryUsingLlapArrowBatchReader(String query, Map<String, String> extraHiveConfs) throws Exception {
String url = miniHS2.getJdbcURL();
if (extraHiveConfs != null) {
url = url + "?" + extraHiveConfs.entrySet().stream().map(e -> e.getKey() + "=" + e.getValue()).collect(Collectors.joining(";"));
}
String user = System.getProperty("user.name");
String pwd = user;
String handleId = UUID.randomUUID().toString();
// Get splits
JobConf job = new JobConf(conf);
job.set(LlapBaseInputFormat.URL_KEY, url);
job.set(LlapBaseInputFormat.USER_KEY, user);
job.set(LlapBaseInputFormat.PWD_KEY, pwd);
job.set(LlapBaseInputFormat.QUERY_KEY, query);
job.set(LlapBaseInputFormat.HANDLE_ID, handleId);
job.set(LlapBaseInputFormat.USE_NEW_SPLIT_FORMAT, "false");
BufferAllocator allocator = RootAllocatorFactory.INSTANCE.getOrCreateRootAllocator(Long.MAX_VALUE).newChildAllocator(UUID.randomUUID().toString(), 0, Long.MAX_VALUE);
LlapBaseInputFormat llapBaseInputFormat = new LlapBaseInputFormat(true, allocator);
InputSplit[] splits = llapBaseInputFormat.getSplits(job, 1);
assertTrue(splits.length > 0);
MultiSet<List<Object>> queryResult = new HashMultiSet<>();
for (InputSplit split : splits) {
System.out.println("Processing split " + Arrays.toString(split.getLocations()));
RecordReader<NullWritable, ArrowWrapperWritable> reader = llapBaseInputFormat.getRecordReader(split, job, null);
ArrowWrapperWritable wrapperWritable = new ArrowWrapperWritable();
while (reader.next(NullWritable.get(), wrapperWritable)) {
queryResult.addAll(collectResultFromArrowVector(wrapperWritable));
}
reader.close();
}
LlapBaseInputFormat.close(handleId);
return queryResult;
}
Aggregations