Search in sources :

Example 6 with ArrowWrapperWritable

use of org.apache.hadoop.hive.ql.io.arrow.ArrowWrapperWritable in project hive by apache.

the class TestJdbcWithMiniLlapVectorArrowBatch method runQueryUsingLlapArrowBatchReader.

private MultiSet<List<Object>> runQueryUsingLlapArrowBatchReader(String query, Map<String, String> extraHiveConfs) throws Exception {
    String url = miniHS2.getJdbcURL();
    if (extraHiveConfs != null) {
        url = url + "?" + extraHiveConfs.entrySet().stream().map(e -> e.getKey() + "=" + e.getValue()).collect(Collectors.joining(";"));
    }
    String user = System.getProperty("user.name");
    String pwd = user;
    String handleId = UUID.randomUUID().toString();
    // Get splits
    JobConf job = new JobConf(conf);
    job.set(LlapBaseInputFormat.URL_KEY, url);
    job.set(LlapBaseInputFormat.USER_KEY, user);
    job.set(LlapBaseInputFormat.PWD_KEY, pwd);
    job.set(LlapBaseInputFormat.QUERY_KEY, query);
    job.set(LlapBaseInputFormat.HANDLE_ID, handleId);
    job.set(LlapBaseInputFormat.USE_NEW_SPLIT_FORMAT, "false");
    BufferAllocator allocator = RootAllocatorFactory.INSTANCE.getOrCreateRootAllocator(Long.MAX_VALUE).newChildAllocator(UUID.randomUUID().toString(), 0, Long.MAX_VALUE);
    LlapBaseInputFormat llapBaseInputFormat = new LlapBaseInputFormat(true, allocator);
    InputSplit[] splits = llapBaseInputFormat.getSplits(job, 1);
    assertTrue(splits.length > 0);
    MultiSet<List<Object>> queryResult = new HashMultiSet<>();
    for (InputSplit split : splits) {
        System.out.println("Processing split " + Arrays.toString(split.getLocations()));
        RecordReader<NullWritable, ArrowWrapperWritable> reader = llapBaseInputFormat.getRecordReader(split, job, null);
        ArrowWrapperWritable wrapperWritable = new ArrowWrapperWritable();
        while (reader.next(NullWritable.get(), wrapperWritable)) {
            queryResult.addAll(collectResultFromArrowVector(wrapperWritable));
        }
        reader.close();
    }
    LlapBaseInputFormat.close(handleId);
    return queryResult;
}
Also used : Arrays(java.util.Arrays) NullWritable(org.apache.hadoop.io.NullWritable) BeforeClass(org.junit.BeforeClass) ArrowWrapperWritable(org.apache.hadoop.hive.ql.io.arrow.ArrowWrapperWritable) ConfVars(org.apache.hadoop.hive.conf.HiveConf.ConfVars) LLAP_EXTERNAL_CLIENT_USE_HYBRID_CALENDAR(org.apache.hadoop.hive.conf.HiveConf.ConfVars.LLAP_EXTERNAL_CLIENT_USE_HYBRID_CALENDAR) RootAllocatorFactory(org.apache.hadoop.hive.ql.io.arrow.RootAllocatorFactory) ArrayList(java.util.ArrayList) Row(org.apache.hadoop.hive.llap.Row) SQLException(java.sql.SQLException) Lists(com.google.common.collect.Lists) Map(java.util.Map) InputFormat(org.apache.hadoop.mapred.InputFormat) BufferAllocator(org.apache.arrow.memory.BufferAllocator) FieldVector(org.apache.arrow.vector.FieldVector) CalendarUtils(org.apache.hadoop.hive.common.type.CalendarUtils) ImmutableMap(com.google.common.collect.ImmutableMap) HashMultiSet(org.apache.commons.collections4.multiset.HashMultiSet) HiveConf(org.apache.hadoop.hive.conf.HiveConf) Assert.assertTrue(org.junit.Assert.assertTrue) Test(org.junit.Test) UUID(java.util.UUID) LlapArrowRowInputFormat(org.apache.hadoop.hive.llap.LlapArrowRowInputFormat) HIVE_PARQUET_DATE_PROLEPTIC_GREGORIAN_DEFAULT(org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_PARQUET_DATE_PROLEPTIC_GREGORIAN_DEFAULT) Collectors(java.util.stream.Collectors) JobConf(org.apache.hadoop.mapred.JobConf) List(java.util.List) Ignore(org.junit.Ignore) InputSplit(org.apache.hadoop.mapred.InputSplit) Statement(java.sql.Statement) RecordReader(org.apache.hadoop.mapred.RecordReader) HIVE_AVRO_PROLEPTIC_GREGORIAN_DEFAULT(org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_AVRO_PROLEPTIC_GREGORIAN_DEFAULT) MultiSet(org.apache.commons.collections4.MultiSet) Assert.assertEquals(org.junit.Assert.assertEquals) LlapBaseInputFormat(org.apache.hadoop.hive.llap.LlapBaseInputFormat) ArrowWrapperWritable(org.apache.hadoop.hive.ql.io.arrow.ArrowWrapperWritable) HashMultiSet(org.apache.commons.collections4.multiset.HashMultiSet) NullWritable(org.apache.hadoop.io.NullWritable) BufferAllocator(org.apache.arrow.memory.BufferAllocator) LlapBaseInputFormat(org.apache.hadoop.hive.llap.LlapBaseInputFormat) ArrayList(java.util.ArrayList) List(java.util.List) JobConf(org.apache.hadoop.mapred.JobConf) InputSplit(org.apache.hadoop.mapred.InputSplit)

Aggregations

ArrowWrapperWritable (org.apache.hadoop.hive.ql.io.arrow.ArrowWrapperWritable)6 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)3 IOException (java.io.IOException)2 ArrayList (java.util.ArrayList)2 FieldVector (org.apache.arrow.vector.FieldVector)2 ImmutableMap (com.google.common.collect.ImmutableMap)1 Lists (com.google.common.collect.Lists)1 SQLException (java.sql.SQLException)1 Statement (java.sql.Statement)1 Arrays (java.util.Arrays)1 List (java.util.List)1 Map (java.util.Map)1 UUID (java.util.UUID)1 Collectors (java.util.stream.Collectors)1 BufferAllocator (org.apache.arrow.memory.BufferAllocator)1 ArrowStreamWriter (org.apache.arrow.vector.ipc.ArrowStreamWriter)1 MultiSet (org.apache.commons.collections4.MultiSet)1 HashMultiSet (org.apache.commons.collections4.multiset.HashMultiSet)1 Path (org.apache.hadoop.fs.Path)1 CalendarUtils (org.apache.hadoop.hive.common.type.CalendarUtils)1