Search in sources :

Example 1 with HashMultiSet

use of org.apache.commons.collections4.multiset.HashMultiSet in project hive by apache.

the class TestJdbcWithMiniLlapVectorArrowBatch method runQueryUsingLlapArrowBatchReader.

private MultiSet<List<Object>> runQueryUsingLlapArrowBatchReader(String query, Map<String, String> extraHiveConfs) throws Exception {
    String url = miniHS2.getJdbcURL();
    if (extraHiveConfs != null) {
        url = url + "?" + extraHiveConfs.entrySet().stream().map(e -> e.getKey() + "=" + e.getValue()).collect(Collectors.joining(";"));
    }
    String user = System.getProperty("user.name");
    String pwd = user;
    String handleId = UUID.randomUUID().toString();
    // Get splits
    JobConf job = new JobConf(conf);
    job.set(LlapBaseInputFormat.URL_KEY, url);
    job.set(LlapBaseInputFormat.USER_KEY, user);
    job.set(LlapBaseInputFormat.PWD_KEY, pwd);
    job.set(LlapBaseInputFormat.QUERY_KEY, query);
    job.set(LlapBaseInputFormat.HANDLE_ID, handleId);
    job.set(LlapBaseInputFormat.USE_NEW_SPLIT_FORMAT, "false");
    BufferAllocator allocator = RootAllocatorFactory.INSTANCE.getOrCreateRootAllocator(Long.MAX_VALUE).newChildAllocator(UUID.randomUUID().toString(), 0, Long.MAX_VALUE);
    LlapBaseInputFormat llapBaseInputFormat = new LlapBaseInputFormat(true, allocator);
    InputSplit[] splits = llapBaseInputFormat.getSplits(job, 1);
    assertTrue(splits.length > 0);
    MultiSet<List<Object>> queryResult = new HashMultiSet<>();
    for (InputSplit split : splits) {
        System.out.println("Processing split " + Arrays.toString(split.getLocations()));
        RecordReader<NullWritable, ArrowWrapperWritable> reader = llapBaseInputFormat.getRecordReader(split, job, null);
        ArrowWrapperWritable wrapperWritable = new ArrowWrapperWritable();
        while (reader.next(NullWritable.get(), wrapperWritable)) {
            queryResult.addAll(collectResultFromArrowVector(wrapperWritable));
        }
        reader.close();
    }
    LlapBaseInputFormat.close(handleId);
    return queryResult;
}
Also used : Arrays(java.util.Arrays) NullWritable(org.apache.hadoop.io.NullWritable) BeforeClass(org.junit.BeforeClass) ArrowWrapperWritable(org.apache.hadoop.hive.ql.io.arrow.ArrowWrapperWritable) ConfVars(org.apache.hadoop.hive.conf.HiveConf.ConfVars) LLAP_EXTERNAL_CLIENT_USE_HYBRID_CALENDAR(org.apache.hadoop.hive.conf.HiveConf.ConfVars.LLAP_EXTERNAL_CLIENT_USE_HYBRID_CALENDAR) RootAllocatorFactory(org.apache.hadoop.hive.ql.io.arrow.RootAllocatorFactory) ArrayList(java.util.ArrayList) Row(org.apache.hadoop.hive.llap.Row) SQLException(java.sql.SQLException) Lists(com.google.common.collect.Lists) Map(java.util.Map) InputFormat(org.apache.hadoop.mapred.InputFormat) BufferAllocator(org.apache.arrow.memory.BufferAllocator) FieldVector(org.apache.arrow.vector.FieldVector) CalendarUtils(org.apache.hadoop.hive.common.type.CalendarUtils) ImmutableMap(com.google.common.collect.ImmutableMap) HashMultiSet(org.apache.commons.collections4.multiset.HashMultiSet) HiveConf(org.apache.hadoop.hive.conf.HiveConf) Assert.assertTrue(org.junit.Assert.assertTrue) Test(org.junit.Test) UUID(java.util.UUID) LlapArrowRowInputFormat(org.apache.hadoop.hive.llap.LlapArrowRowInputFormat) HIVE_PARQUET_DATE_PROLEPTIC_GREGORIAN_DEFAULT(org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_PARQUET_DATE_PROLEPTIC_GREGORIAN_DEFAULT) Collectors(java.util.stream.Collectors) JobConf(org.apache.hadoop.mapred.JobConf) List(java.util.List) Ignore(org.junit.Ignore) InputSplit(org.apache.hadoop.mapred.InputSplit) Statement(java.sql.Statement) RecordReader(org.apache.hadoop.mapred.RecordReader) HIVE_AVRO_PROLEPTIC_GREGORIAN_DEFAULT(org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_AVRO_PROLEPTIC_GREGORIAN_DEFAULT) MultiSet(org.apache.commons.collections4.MultiSet) Assert.assertEquals(org.junit.Assert.assertEquals) LlapBaseInputFormat(org.apache.hadoop.hive.llap.LlapBaseInputFormat) ArrowWrapperWritable(org.apache.hadoop.hive.ql.io.arrow.ArrowWrapperWritable) HashMultiSet(org.apache.commons.collections4.multiset.HashMultiSet) NullWritable(org.apache.hadoop.io.NullWritable) BufferAllocator(org.apache.arrow.memory.BufferAllocator) LlapBaseInputFormat(org.apache.hadoop.hive.llap.LlapBaseInputFormat) ArrayList(java.util.ArrayList) List(java.util.List) JobConf(org.apache.hadoop.mapred.JobConf) InputSplit(org.apache.hadoop.mapred.InputSplit)

Example 2 with HashMultiSet

use of org.apache.commons.collections4.multiset.HashMultiSet in project hive by apache.

the class TestJdbcWithMiniLlapVectorArrowBatch method collectResultFromArrowVector.

private MultiSet<List<Object>> collectResultFromArrowVector(ArrowWrapperWritable wrapperWritable) {
    List<FieldVector> fieldVectors = wrapperWritable.getVectorSchemaRoot().getFieldVectors();
    MultiSet<List<Object>> result = new HashMultiSet<>();
    int valueCount = fieldVectors.get(0).getValueCount();
    for (int recordIndex = 0; recordIndex < valueCount; recordIndex++) {
        List<Object> row = new ArrayList<>();
        for (FieldVector fieldVector : fieldVectors) {
            row.add(fieldVector.getObject(recordIndex));
        }
        result.add(row);
    }
    return result;
}
Also used : ArrayList(java.util.ArrayList) FieldVector(org.apache.arrow.vector.FieldVector) ArrayList(java.util.ArrayList) List(java.util.List) HashMultiSet(org.apache.commons.collections4.multiset.HashMultiSet)

Aggregations

ArrayList (java.util.ArrayList)2 List (java.util.List)2 FieldVector (org.apache.arrow.vector.FieldVector)2 HashMultiSet (org.apache.commons.collections4.multiset.HashMultiSet)2 ImmutableMap (com.google.common.collect.ImmutableMap)1 Lists (com.google.common.collect.Lists)1 SQLException (java.sql.SQLException)1 Statement (java.sql.Statement)1 Arrays (java.util.Arrays)1 Map (java.util.Map)1 UUID (java.util.UUID)1 Collectors (java.util.stream.Collectors)1 BufferAllocator (org.apache.arrow.memory.BufferAllocator)1 MultiSet (org.apache.commons.collections4.MultiSet)1 CalendarUtils (org.apache.hadoop.hive.common.type.CalendarUtils)1 HiveConf (org.apache.hadoop.hive.conf.HiveConf)1 ConfVars (org.apache.hadoop.hive.conf.HiveConf.ConfVars)1 HIVE_AVRO_PROLEPTIC_GREGORIAN_DEFAULT (org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_AVRO_PROLEPTIC_GREGORIAN_DEFAULT)1 HIVE_PARQUET_DATE_PROLEPTIC_GREGORIAN_DEFAULT (org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_PARQUET_DATE_PROLEPTIC_GREGORIAN_DEFAULT)1 LLAP_EXTERNAL_CLIENT_USE_HYBRID_CALENDAR (org.apache.hadoop.hive.conf.HiveConf.ConfVars.LLAP_EXTERNAL_CLIENT_USE_HYBRID_CALENDAR)1