Search in sources :

Example 1 with TypeInfoFactory.getCharTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.getCharTypeInfo in project hive by apache.

the class GenericUDFLower method initialize.

@Override
public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
    if (arguments.length != 1) {
        throw new UDFArgumentLengthException("LOWER requires 1 argument, got " + arguments.length);
    }
    if (arguments[0].getCategory() != Category.PRIMITIVE) {
        throw new UDFArgumentException("LOWER only takes primitive types, got " + arguments[0].getTypeName());
    }
    argumentOI = (PrimitiveObjectInspector) arguments[0];
    stringConverter = new PrimitiveObjectInspectorConverter.StringConverter(argumentOI);
    PrimitiveCategory inputType = argumentOI.getPrimitiveCategory();
    ObjectInspector outputOI = null;
    BaseCharTypeInfo typeInfo;
    switch(inputType) {
        case CHAR:
            // return type should have same length as the input.
            returnType = inputType;
            typeInfo = TypeInfoFactory.getCharTypeInfo(GenericUDFUtils.StringHelper.getFixedStringSizeForType(argumentOI));
            outputOI = PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(typeInfo);
            break;
        case VARCHAR:
            // return type should have same length as the input.
            returnType = inputType;
            typeInfo = TypeInfoFactory.getVarcharTypeInfo(GenericUDFUtils.StringHelper.getFixedStringSizeForType(argumentOI));
            outputOI = PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(typeInfo);
            break;
        default:
            returnType = PrimitiveCategory.STRING;
            outputOI = PrimitiveObjectInspectorFactory.writableStringObjectInspector;
            break;
    }
    returnHelper = new GenericUDFUtils.StringHelper(returnType);
    return outputOI;
}
Also used : UDFArgumentException(org.apache.hadoop.hive.ql.exec.UDFArgumentException) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) BaseCharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo) UDFArgumentLengthException(org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException) PrimitiveObjectInspectorConverter(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorConverter) StringConverter(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorConverter.StringConverter) PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory)

Example 2 with TypeInfoFactory.getCharTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.getCharTypeInfo in project hive by apache.

the class TestParquetFilterPredicate method testFilterComplexTypes.

/**
 * Check the converted filter predicate is null if unsupported types are included
 * @throws Exception
 */
@Test
public void testFilterComplexTypes() throws Exception {
    SearchArgument sarg = SearchArgumentFactory.newBuilder().startAnd().lessThan("x", PredicateLeaf.Type.DATE, Date.valueOf("1970-1-11")).lessThanEquals("y", PredicateLeaf.Type.STRING, new HiveChar("hi", 10).toString()).equals("z", PredicateLeaf.Type.DECIMAL, new HiveDecimalWritable("1.0")).end().build();
    MessageType schema = MessageTypeParser.parseMessageType("message test {" + " required int32 x; required binary y; required binary z;}");
    Map<String, TypeInfo> columnTypes = new HashMap<>();
    columnTypes.put("x", TypeInfoFactory.getPrimitiveTypeInfo("date"));
    columnTypes.put("y", TypeInfoFactory.getCharTypeInfo(10));
    columnTypes.put("z", TypeInfoFactory.getDecimalTypeInfo(4, 2));
    assertEquals(null, ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema, columnTypes));
    sarg = SearchArgumentFactory.newBuilder().startNot().startOr().isNull("x", PredicateLeaf.Type.LONG).between("y", PredicateLeaf.Type.DECIMAL, new HiveDecimalWritable("10"), new HiveDecimalWritable("20.0")).in("z", PredicateLeaf.Type.LONG, 1L, 2L, 3L).nullSafeEquals("a", PredicateLeaf.Type.STRING, new HiveVarchar("stinger", 100).toString()).end().end().build();
    schema = MessageTypeParser.parseMessageType("message test {" + " optional int32 x; required binary y; required int32 z;" + " optional binary a;}");
    columnTypes = new HashMap<>();
    columnTypes.put("x", TypeInfoFactory.getPrimitiveTypeInfo("int"));
    columnTypes.put("y", TypeInfoFactory.getDecimalTypeInfo(4, 2));
    columnTypes.put("z", TypeInfoFactory.getPrimitiveTypeInfo("int"));
    columnTypes.put("z", TypeInfoFactory.getCharTypeInfo(100));
    assertEquals(null, ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema, columnTypes));
}
Also used : HashMap(java.util.HashMap) HiveDecimalWritable(org.apache.hadoop.hive.serde2.io.HiveDecimalWritable) HiveChar(org.apache.hadoop.hive.common.type.HiveChar) SearchArgument(org.apache.hadoop.hive.ql.io.sarg.SearchArgument) HiveVarchar(org.apache.hadoop.hive.common.type.HiveVarchar) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) MessageType(org.apache.parquet.schema.MessageType) Test(org.junit.Test)

Example 3 with TypeInfoFactory.getCharTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.getCharTypeInfo in project hive by apache.

the class TestParquetFilterPredicate method testFilterCharColumnIn.

@Test
public void testFilterCharColumnIn() throws Exception {
    SearchArgument sarg = SearchArgumentFactory.newBuilder().in("a", PredicateLeaf.Type.STRING, new HiveChar("cherry", 10).toString(), new HiveChar("orange", 10).toString()).build();
    MessageType schema = MessageTypeParser.parseMessageType("message test {required binary a;}");
    Map<String, TypeInfo> columnTypes = new HashMap<>();
    columnTypes.put("a", TypeInfoFactory.getCharTypeInfo(10));
    FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema, columnTypes);
    String expected = "or(eq(a, Binary{\"cherry\"}), eq(a, Binary{\"orange\"}))";
    assertEquals(expected, p.toString());
}
Also used : HashMap(java.util.HashMap) HiveChar(org.apache.hadoop.hive.common.type.HiveChar) SearchArgument(org.apache.hadoop.hive.ql.io.sarg.SearchArgument) FilterPredicate(org.apache.parquet.filter2.predicate.FilterPredicate) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) MessageType(org.apache.parquet.schema.MessageType) Test(org.junit.Test)

Example 4 with TypeInfoFactory.getCharTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.getCharTypeInfo in project hive by apache.

the class TestParquetFilterPredicate method testFilterCharColumnWhiteSpacePrefix.

@Test
public void testFilterCharColumnWhiteSpacePrefix() throws Exception {
    SearchArgument sarg = SearchArgumentFactory.newBuilder().startAnd().lessThan("a", PredicateLeaf.Type.STRING, new HiveChar("  apple", 10).toString()).lessThanEquals("b", PredicateLeaf.Type.STRING, new HiveChar("  pear", 10).toString()).equals("c", PredicateLeaf.Type.STRING, new HiveChar("  orange", 10).toString()).nullSafeEquals("d", PredicateLeaf.Type.STRING, new HiveChar(" pineapple", 10).toString()).end().build();
    MessageType schema = MessageTypeParser.parseMessageType("message test {" + " required binary a; required binary b;" + " required binary c; required binary d;}");
    Map<String, TypeInfo> columnTypes = new HashMap<>();
    columnTypes.put("a", TypeInfoFactory.getCharTypeInfo(10));
    columnTypes.put("b", TypeInfoFactory.getCharTypeInfo(10));
    columnTypes.put("c", TypeInfoFactory.getCharTypeInfo(10));
    columnTypes.put("d", TypeInfoFactory.getCharTypeInfo(10));
    FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema, columnTypes);
    String expected = "and(and(and(" + "lt(a, Binary{\"  apple\"}), " + "lteq(b, Binary{\"  pear\"})), " + "eq(c, Binary{\"  orange\"})), " + "eq(d, Binary{\" pineapple\"}))";
    assertEquals(expected, p.toString());
}
Also used : HashMap(java.util.HashMap) HiveChar(org.apache.hadoop.hive.common.type.HiveChar) SearchArgument(org.apache.hadoop.hive.ql.io.sarg.SearchArgument) FilterPredicate(org.apache.parquet.filter2.predicate.FilterPredicate) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) MessageType(org.apache.parquet.schema.MessageType) Test(org.junit.Test)

Example 5 with TypeInfoFactory.getCharTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.getCharTypeInfo in project hive by apache.

the class TestParquetFilterPredicate method testFilterCharColumnIsNull.

@Test
public void testFilterCharColumnIsNull() throws Exception {
    SearchArgument sarg = SearchArgumentFactory.newBuilder().isNull("a", PredicateLeaf.Type.STRING).build();
    MessageType schema = MessageTypeParser.parseMessageType("message test {required binary a;}");
    Map<String, TypeInfo> columnTypes = new HashMap<>();
    columnTypes.put("a", TypeInfoFactory.getCharTypeInfo(10));
    FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema, columnTypes);
    String expected = "eq(a, null)";
    assertEquals(expected, p.toString());
}
Also used : HashMap(java.util.HashMap) SearchArgument(org.apache.hadoop.hive.ql.io.sarg.SearchArgument) FilterPredicate(org.apache.parquet.filter2.predicate.FilterPredicate) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) MessageType(org.apache.parquet.schema.MessageType) Test(org.junit.Test)

Aggregations

Test (org.junit.Test)21 HiveChar (org.apache.hadoop.hive.common.type.HiveChar)17 HashMap (java.util.HashMap)14 SearchArgument (org.apache.hadoop.hive.ql.io.sarg.SearchArgument)14 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)14 MessageType (org.apache.parquet.schema.MessageType)14 FilterPredicate (org.apache.parquet.filter2.predicate.FilterPredicate)12 PrimitiveObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector)10 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)9 DeferredJavaObject (org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredJavaObject)7 DeferredObject (org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject)7 HiveCharWritable (org.apache.hadoop.hive.serde2.io.HiveCharWritable)7 CharTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo)4 UDFArgumentException (org.apache.hadoop.hive.ql.exec.UDFArgumentException)3 HiveVarcharWritable (org.apache.hadoop.hive.serde2.io.HiveVarcharWritable)3 PrimitiveCategory (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory)3 BaseCharTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo)3 Text (org.apache.hadoop.io.Text)3 HiveVarchar (org.apache.hadoop.hive.common.type.HiveVarchar)2 UDFArgumentLengthException (org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException)2