Search in sources :

Example 1 with KuduInputSplit

use of org.apache.hadoop.hive.kudu.KuduInputFormat.KuduInputSplit in project hive by apache.

the class TestKuduInputFormat method testAllColumns.

@Test
public void testAllColumns() throws Exception {
    KuduInputFormat input = new KuduInputFormat();
    JobConf jobConf = new JobConf(BASE_CONF);
    String columnsStr = SCHEMA.getColumns().stream().map(ColumnSchema::getName).collect(Collectors.joining(","));
    jobConf.set(serdeConstants.LIST_COLUMNS, columnsStr);
    InputSplit[] splits = input.getSplits(jobConf, 1);
    assertEquals(1, splits.length);
    KuduInputSplit split = (KuduInputSplit) splits[0];
    KuduRecordReader reader = (KuduRecordReader) input.getRecordReader(split, jobConf, null);
    assertTrue(reader.nextKeyValue());
    RowResult value = reader.getCurrentValue().getRowResult();
    verfiyRow(value);
    assertFalse(reader.nextKeyValue());
}
Also used : RowResult(org.apache.kudu.client.RowResult) KuduRecordReader(org.apache.hadoop.hive.kudu.KuduInputFormat.KuduRecordReader) CoreMatchers.containsString(org.hamcrest.CoreMatchers.containsString) JobConf(org.apache.hadoop.mapred.JobConf) KuduInputSplit(org.apache.hadoop.hive.kudu.KuduInputFormat.KuduInputSplit) InputSplit(org.apache.hadoop.mapred.InputSplit) KuduInputSplit(org.apache.hadoop.hive.kudu.KuduInputFormat.KuduInputSplit) Test(org.junit.Test)

Example 2 with KuduInputSplit

use of org.apache.hadoop.hive.kudu.KuduInputFormat.KuduInputSplit in project hive by apache.

the class TestKuduInputFormat method testProjection.

@Test
public void testProjection() throws Exception {
    KuduInputFormat input = new KuduInputFormat();
    JobConf jobConf = new JobConf(BASE_CONF);
    jobConf.set(serdeConstants.LIST_COLUMNS, "bool,key");
    InputSplit[] splits = input.getSplits(jobConf, 1);
    assertEquals(1, splits.length);
    KuduInputSplit split = (KuduInputSplit) splits[0];
    KuduRecordReader reader = (KuduRecordReader) input.getRecordReader(split, jobConf, null);
    assertTrue(reader.nextKeyValue());
    RowResult value = reader.getCurrentValue().getRowResult();
    assertEquals(2, value.getSchema().getColumnCount());
    assertTrue(value.getBoolean(0));
    assertEquals((byte) 1, value.getByte(1));
    assertFalse(reader.nextKeyValue());
}
Also used : RowResult(org.apache.kudu.client.RowResult) KuduRecordReader(org.apache.hadoop.hive.kudu.KuduInputFormat.KuduRecordReader) JobConf(org.apache.hadoop.mapred.JobConf) KuduInputSplit(org.apache.hadoop.hive.kudu.KuduInputFormat.KuduInputSplit) InputSplit(org.apache.hadoop.mapred.InputSplit) KuduInputSplit(org.apache.hadoop.hive.kudu.KuduInputFormat.KuduInputSplit) Test(org.junit.Test)

Example 3 with KuduInputSplit

use of org.apache.hadoop.hive.kudu.KuduInputFormat.KuduInputSplit in project hive by apache.

the class TestKuduInputFormat method testPredicate.

@Test
public void testPredicate() throws Exception {
    // Insert a second test row that will be filtered out.
    KuduTable table = harness.getClient().openTable(TABLE_NAME);
    KuduSession session = harness.getClient().newSession();
    Insert insert = table.newInsert();
    PartialRow row = insert.getRow();
    row.addByte("key", (byte) 2);
    row.addShort("int16", (short) 2);
    row.addInt("int32", 2);
    row.addLong("int64", 2L);
    row.addBoolean("bool", false);
    row.addFloat("float", 2.2f);
    row.addDouble("double", 2.2d);
    row.addString("string", "two");
    row.addBinary("binary", "two".getBytes(UTF_8));
    row.addTimestamp("timestamp", new Timestamp(NOW_MS + 1));
    row.addDecimal("decimal", new BigDecimal("2.222"));
    row.setNull("null");
    // Not setting the "default" column.
    session.apply(insert);
    session.close();
    KuduInputFormat input = new KuduInputFormat();
    // Test an equality predicate for each column.
    for (ColumnSchema col : SCHEMA.getColumns()) {
        // Skip binary columns because binary predicates are not supported. (HIVE-11370)
        if (col.getName().equals("null") || col.getName().equals("default") || col.getName().equals("binary")) {
            continue;
        }
        JobConf jobConf = new JobConf(BASE_CONF);
        String columnsStr = SCHEMA.getColumns().stream().map(ColumnSchema::getName).collect(Collectors.joining(","));
        jobConf.set(serdeConstants.LIST_COLUMNS, columnsStr);
        PrimitiveTypeInfo typeInfo = toHiveType(col.getType(), col.getTypeAttributes());
        ExprNodeDesc colExpr = new ExprNodeColumnDesc(typeInfo, col.getName(), null, false);
        ExprNodeDesc constExpr = new ExprNodeConstantDesc(typeInfo, ROW.getObject(col.getName()));
        List<ExprNodeDesc> children = Lists.newArrayList();
        children.add(colExpr);
        children.add(constExpr);
        ExprNodeGenericFuncDesc predicateExpr = new ExprNodeGenericFuncDesc(typeInfo, new GenericUDFOPEqual(), children);
        String filterExpr = SerializationUtilities.serializeExpression(predicateExpr);
        jobConf.set(TableScanDesc.FILTER_EXPR_CONF_STR, filterExpr);
        InputSplit[] splits = input.getSplits(jobConf, 1);
        assertEquals(1, splits.length);
        KuduInputSplit split = (KuduInputSplit) splits[0];
        KuduRecordReader reader = (KuduRecordReader) input.getRecordReader(split, jobConf, null);
        assertTrue(reader.nextKeyValue());
        RowResult value = reader.getCurrentValue().getRowResult();
        verfiyRow(value);
        assertFalse("Extra row on column: " + col.getName(), reader.nextKeyValue());
    }
}
Also used : ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) KuduSession(org.apache.kudu.client.KuduSession) PartialRow(org.apache.kudu.client.PartialRow) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) KuduTable(org.apache.kudu.client.KuduTable) ColumnSchema(org.apache.kudu.ColumnSchema) CoreMatchers.containsString(org.hamcrest.CoreMatchers.containsString) Insert(org.apache.kudu.client.Insert) Timestamp(java.sql.Timestamp) BigDecimal(java.math.BigDecimal) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) RowResult(org.apache.kudu.client.RowResult) KuduRecordReader(org.apache.hadoop.hive.kudu.KuduInputFormat.KuduRecordReader) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) GenericUDFOPEqual(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) JobConf(org.apache.hadoop.mapred.JobConf) KuduInputSplit(org.apache.hadoop.hive.kudu.KuduInputFormat.KuduInputSplit) InputSplit(org.apache.hadoop.mapred.InputSplit) KuduInputSplit(org.apache.hadoop.hive.kudu.KuduInputFormat.KuduInputSplit) Test(org.junit.Test)

Aggregations

KuduInputSplit (org.apache.hadoop.hive.kudu.KuduInputFormat.KuduInputSplit)3 KuduRecordReader (org.apache.hadoop.hive.kudu.KuduInputFormat.KuduRecordReader)3 InputSplit (org.apache.hadoop.mapred.InputSplit)3 JobConf (org.apache.hadoop.mapred.JobConf)3 RowResult (org.apache.kudu.client.RowResult)3 Test (org.junit.Test)3 CoreMatchers.containsString (org.hamcrest.CoreMatchers.containsString)2 BigDecimal (java.math.BigDecimal)1 Timestamp (java.sql.Timestamp)1 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)1 ExprNodeConstantDesc (org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc)1 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)1 ExprNodeGenericFuncDesc (org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc)1 GenericUDFOPEqual (org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual)1 PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)1 ColumnSchema (org.apache.kudu.ColumnSchema)1 Insert (org.apache.kudu.client.Insert)1 KuduSession (org.apache.kudu.client.KuduSession)1 KuduTable (org.apache.kudu.client.KuduTable)1 PartialRow (org.apache.kudu.client.PartialRow)1