Search in sources :

Example 1 with LlapBaseInputFormat

use of org.apache.hadoop.hive.llap.LlapBaseInputFormat in project hive by apache.

the class TestJdbcWithMiniLlapVectorArrowBatch method runQueryUsingLlapArrowBatchReader.

private MultiSet<List<Object>> runQueryUsingLlapArrowBatchReader(String query, Map<String, String> extraHiveConfs) throws Exception {
    String url = miniHS2.getJdbcURL();
    if (extraHiveConfs != null) {
        url = url + "?" + extraHiveConfs.entrySet().stream().map(e -> e.getKey() + "=" + e.getValue()).collect(Collectors.joining(";"));
    }
    String user = System.getProperty("user.name");
    String pwd = user;
    String handleId = UUID.randomUUID().toString();
    // Get splits
    JobConf job = new JobConf(conf);
    job.set(LlapBaseInputFormat.URL_KEY, url);
    job.set(LlapBaseInputFormat.USER_KEY, user);
    job.set(LlapBaseInputFormat.PWD_KEY, pwd);
    job.set(LlapBaseInputFormat.QUERY_KEY, query);
    job.set(LlapBaseInputFormat.HANDLE_ID, handleId);
    job.set(LlapBaseInputFormat.USE_NEW_SPLIT_FORMAT, "false");
    BufferAllocator allocator = RootAllocatorFactory.INSTANCE.getOrCreateRootAllocator(Long.MAX_VALUE).newChildAllocator(UUID.randomUUID().toString(), 0, Long.MAX_VALUE);
    LlapBaseInputFormat llapBaseInputFormat = new LlapBaseInputFormat(true, allocator);
    InputSplit[] splits = llapBaseInputFormat.getSplits(job, 1);
    assertTrue(splits.length > 0);
    MultiSet<List<Object>> queryResult = new HashMultiSet<>();
    for (InputSplit split : splits) {
        System.out.println("Processing split " + Arrays.toString(split.getLocations()));
        RecordReader<NullWritable, ArrowWrapperWritable> reader = llapBaseInputFormat.getRecordReader(split, job, null);
        ArrowWrapperWritable wrapperWritable = new ArrowWrapperWritable();
        while (reader.next(NullWritable.get(), wrapperWritable)) {
            queryResult.addAll(collectResultFromArrowVector(wrapperWritable));
        }
        reader.close();
    }
    LlapBaseInputFormat.close(handleId);
    return queryResult;
}
Also used : Arrays(java.util.Arrays) NullWritable(org.apache.hadoop.io.NullWritable) BeforeClass(org.junit.BeforeClass) ArrowWrapperWritable(org.apache.hadoop.hive.ql.io.arrow.ArrowWrapperWritable) ConfVars(org.apache.hadoop.hive.conf.HiveConf.ConfVars) LLAP_EXTERNAL_CLIENT_USE_HYBRID_CALENDAR(org.apache.hadoop.hive.conf.HiveConf.ConfVars.LLAP_EXTERNAL_CLIENT_USE_HYBRID_CALENDAR) RootAllocatorFactory(org.apache.hadoop.hive.ql.io.arrow.RootAllocatorFactory) ArrayList(java.util.ArrayList) Row(org.apache.hadoop.hive.llap.Row) SQLException(java.sql.SQLException) Lists(com.google.common.collect.Lists) Map(java.util.Map) InputFormat(org.apache.hadoop.mapred.InputFormat) BufferAllocator(org.apache.arrow.memory.BufferAllocator) FieldVector(org.apache.arrow.vector.FieldVector) CalendarUtils(org.apache.hadoop.hive.common.type.CalendarUtils) ImmutableMap(com.google.common.collect.ImmutableMap) HashMultiSet(org.apache.commons.collections4.multiset.HashMultiSet) HiveConf(org.apache.hadoop.hive.conf.HiveConf) Assert.assertTrue(org.junit.Assert.assertTrue) Test(org.junit.Test) UUID(java.util.UUID) LlapArrowRowInputFormat(org.apache.hadoop.hive.llap.LlapArrowRowInputFormat) HIVE_PARQUET_DATE_PROLEPTIC_GREGORIAN_DEFAULT(org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_PARQUET_DATE_PROLEPTIC_GREGORIAN_DEFAULT) Collectors(java.util.stream.Collectors) JobConf(org.apache.hadoop.mapred.JobConf) List(java.util.List) Ignore(org.junit.Ignore) InputSplit(org.apache.hadoop.mapred.InputSplit) Statement(java.sql.Statement) RecordReader(org.apache.hadoop.mapred.RecordReader) HIVE_AVRO_PROLEPTIC_GREGORIAN_DEFAULT(org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_AVRO_PROLEPTIC_GREGORIAN_DEFAULT) MultiSet(org.apache.commons.collections4.MultiSet) Assert.assertEquals(org.junit.Assert.assertEquals) LlapBaseInputFormat(org.apache.hadoop.hive.llap.LlapBaseInputFormat) ArrowWrapperWritable(org.apache.hadoop.hive.ql.io.arrow.ArrowWrapperWritable) HashMultiSet(org.apache.commons.collections4.multiset.HashMultiSet) NullWritable(org.apache.hadoop.io.NullWritable) BufferAllocator(org.apache.arrow.memory.BufferAllocator) LlapBaseInputFormat(org.apache.hadoop.hive.llap.LlapBaseInputFormat) ArrayList(java.util.ArrayList) List(java.util.List) JobConf(org.apache.hadoop.mapred.JobConf) InputSplit(org.apache.hadoop.mapred.InputSplit)

Example 2 with LlapBaseInputFormat

use of org.apache.hadoop.hive.llap.LlapBaseInputFormat in project hive by apache.

the class TestJdbcGenericUDTFGetSplits method testDecimalPrecisionAndScale.

@Test
public void testDecimalPrecisionAndScale() throws Exception {
    try (Statement stmt = hs2Conn.createStatement()) {
        stmt.execute("CREATE TABLE decimal_test_table(decimal_col DECIMAL(6,2))");
        stmt.execute("INSERT INTO decimal_test_table VALUES(2507.92)");
        ResultSet rs = stmt.executeQuery("SELECT * FROM decimal_test_table");
        assertTrue(rs.next());
        rs.close();
        String url = miniHS2.getJdbcURL();
        String user = System.getProperty("user.name");
        String pwd = user;
        String handleId = UUID.randomUUID().toString();
        String sql = "SELECT avg(decimal_col)/3 FROM decimal_test_table";
        // make request through llap-ext-client
        JobConf job = new JobConf(conf);
        job.set(LlapBaseInputFormat.URL_KEY, url);
        job.set(LlapBaseInputFormat.USER_KEY, user);
        job.set(LlapBaseInputFormat.PWD_KEY, pwd);
        job.set(LlapBaseInputFormat.QUERY_KEY, sql);
        job.set(LlapBaseInputFormat.HANDLE_ID, handleId);
        LlapBaseInputFormat llapBaseInputFormat = new LlapBaseInputFormat();
        // schema split
        LlapInputSplit schemaSplit = (LlapInputSplit) llapBaseInputFormat.getSplits(job, 0)[0];
        assertNotNull(schemaSplit);
        FieldDesc fieldDesc = schemaSplit.getSchema().getColumns().get(0);
        DecimalTypeInfo type = (DecimalTypeInfo) fieldDesc.getTypeInfo();
        assertEquals(12, type.getPrecision());
        assertEquals(8, type.scale());
        LlapBaseInputFormat.close(handleId);
    }
}
Also used : DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) LlapBaseInputFormat(org.apache.hadoop.hive.llap.LlapBaseInputFormat) LlapInputSplit(org.apache.hadoop.hive.llap.LlapInputSplit) Statement(java.sql.Statement) ResultSet(java.sql.ResultSet) JobConf(org.apache.hadoop.mapred.JobConf) FieldDesc(org.apache.hadoop.hive.llap.FieldDesc) Test(org.junit.Test)

Aggregations

Statement (java.sql.Statement)2 LlapBaseInputFormat (org.apache.hadoop.hive.llap.LlapBaseInputFormat)2 JobConf (org.apache.hadoop.mapred.JobConf)2 Test (org.junit.Test)2 ImmutableMap (com.google.common.collect.ImmutableMap)1 Lists (com.google.common.collect.Lists)1 ResultSet (java.sql.ResultSet)1 SQLException (java.sql.SQLException)1 ArrayList (java.util.ArrayList)1 Arrays (java.util.Arrays)1 List (java.util.List)1 Map (java.util.Map)1 UUID (java.util.UUID)1 Collectors (java.util.stream.Collectors)1 BufferAllocator (org.apache.arrow.memory.BufferAllocator)1 FieldVector (org.apache.arrow.vector.FieldVector)1 MultiSet (org.apache.commons.collections4.MultiSet)1 HashMultiSet (org.apache.commons.collections4.multiset.HashMultiSet)1 CalendarUtils (org.apache.hadoop.hive.common.type.CalendarUtils)1 HiveConf (org.apache.hadoop.hive.conf.HiveConf)1