Search in sources :

Example 6 with MessageType

use of org.apache.parquet.schema.MessageType in project hive by apache.

the class TestConvertAstToSearchArg method testExpression8.

@Test
public void testExpression8() throws Exception {
    /* first_name = last_name */
    String exprStr = "<?xml version=\"1.0\" encoding=\"UTF-8\"?> \n" + "<java version=\"1.6.0_31\" class=\"java.beans.XMLDecoder\"> \n" + " <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc\"> \n" + "  <void property=\"children\"> \n" + "   <object class=\"java.util.ArrayList\"> \n" + "    <void method=\"add\"> \n" + "     <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc\"> \n" + "      <void property=\"column\"> \n" + "       <string>first_name</string> \n" + "      </void> \n" + "      <void property=\"tabAlias\"> \n" + "       <string>orc_people</string> \n" + "      </void> \n" + "      <void property=\"typeInfo\"> \n" + "       <object id=\"PrimitiveTypeInfo0\" class=\"org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo\"> \n" + "        <void property=\"typeName\"> \n" + "         <string>string</string> \n" + "        </void> \n" + "       </object> \n" + "      </void> \n" + "     </object> \n" + "    </void> \n" + "    <void method=\"add\"> \n" + "     <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc\"> \n" + "      <void property=\"column\"> \n" + "       <string>last_name</string> \n" + "      </void> \n" + "      <void property=\"tabAlias\"> \n" + "       <string>orc_people</string> \n" + "      </void> \n" + "      <void property=\"typeInfo\"> \n" + "       <object idref=\"PrimitiveTypeInfo0\"/> \n" + "      </void> \n" + "     </object> \n" + "    </void> \n" + "   </object> \n" + "  </void> \n" + "  <void property=\"genericUDF\"> \n" + "   <object class=\"org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual\"/> \n" + "  </void> \n" + "  <void property=\"typeInfo\"> \n" + "   <object class=\"org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo\"> \n" + "    <void property=\"typeName\"> \n" + "     <string>boolean</string> \n" + "    </void> \n" + "   </object> \n" + "  </void> \n" + " </object> \n" + "</java> ";
    SearchArgumentImpl sarg = (SearchArgumentImpl) ConvertAstToSearchArg.create(conf, getFuncDesc(exprStr));
    List<PredicateLeaf> leaves = sarg.getLeaves();
    assertEquals(0, leaves.size());
    MessageType schema = MessageTypeParser.parseMessageType("message test { required int32 id;" + " required binary first_name; }");
    FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema);
    assertNull(p);
    assertEquals("YES_NO_NULL", sarg.getExpression().toString());
}
Also used : FilterPredicate(org.apache.parquet.filter2.predicate.FilterPredicate) MessageType(org.apache.parquet.schema.MessageType) Test(org.junit.Test)

Example 7 with MessageType

use of org.apache.parquet.schema.MessageType in project hive by apache.

the class TestDataWritableReadSupport method testGetProjectedSchema2.

@Test
public void testGetProjectedSchema2() throws Exception {
    MessageType originalMsg = MessageTypeParser.parseMessageType("message hive_schema {\n" + "  optional group structCol {\n" + "    optional int32 a;\n" + "    optional double b;\n" + "  }\n" + "}\n");
    testConversion("structCol", "struct<a:int,b:double>", DataWritableReadSupport.getProjectedSchema(originalMsg, Arrays.asList("structCol"), Arrays.asList(0), Sets.newHashSet("structCol.a", "structCol.b")).toString());
}
Also used : MessageType(org.apache.parquet.schema.MessageType) Test(org.junit.Test)

Example 8 with MessageType

use of org.apache.parquet.schema.MessageType in project hive by apache.

the class TestDataWritableReadSupport method testGetProjectedSchema4.

@Test
public void testGetProjectedSchema4() throws Exception {
    MessageType originalMsg = MessageTypeParser.parseMessageType("message hive_schema {\n" + "  optional group structCol {\n" + "    optional int32 a;\n" + "    optional group subStructCol {\n" + "      optional int64 b;\n" + "      optional boolean c;\n" + "    }\n" + "  }\n" + "  optional boolean d;\n" + "}\n");
    testConversion("structCol", "struct<subStructCol:struct<b:bigint>>", DataWritableReadSupport.getProjectedSchema(originalMsg, Arrays.asList("structCol"), Arrays.asList(0), Sets.newHashSet("structCol.subStructCol.b")).toString());
}
Also used : MessageType(org.apache.parquet.schema.MessageType) Test(org.junit.Test)

Example 9 with MessageType

use of org.apache.parquet.schema.MessageType in project hive by apache.

the class TestDataWritableReadSupport method testGetProjectedSchema1.

@Test
public void testGetProjectedSchema1() throws Exception {
    MessageType originalMsg = MessageTypeParser.parseMessageType("message hive_schema {\n" + "  optional group structCol {\n" + "    optional int32 a;\n" + "    optional double b;\n" + "    optional boolean c;\n" + "    optional fixed_len_byte_array(3) d (DECIMAL(5,2));\n" + "  }\n" + "}\n");
    testConversion("structCol", "struct<a:int>", DataWritableReadSupport.getProjectedSchema(originalMsg, Arrays.asList("structCol"), Arrays.asList(0), Sets.newHashSet("structCol.a")).toString());
}
Also used : MessageType(org.apache.parquet.schema.MessageType) Test(org.junit.Test)

Example 10 with MessageType

use of org.apache.parquet.schema.MessageType in project hive by apache.

the class HiveParquetSchemaTestUtils method testConversion.

public static void testConversion(final String columnNamesStr, final String columnsTypeStr, final String actualSchema) throws Exception {
    final List<String> columnNames = createHiveColumnsFrom(columnNamesStr);
    final List<TypeInfo> columnTypes = createHiveTypeInfoFrom(columnsTypeStr);
    final MessageType messageTypeFound = HiveSchemaConverter.convert(columnNames, columnTypes);
    final MessageType expectedMT = MessageTypeParser.parseMessageType(actualSchema);
    assertEquals("converting " + columnNamesStr + ": " + columnsTypeStr + " to " + actualSchema, expectedMT, messageTypeFound);
    // Required to check the original types manually as PrimitiveType.equals does not care about it
    List<Type> expectedFields = expectedMT.getFields();
    List<Type> actualFields = messageTypeFound.getFields();
    for (int i = 0, n = expectedFields.size(); i < n; ++i) {
        OriginalType exp = expectedFields.get(i).getOriginalType();
        OriginalType act = actualFields.get(i).getOriginalType();
        assertEquals("Original types of the field do not match", exp, act);
    }
}
Also used : OriginalType(org.apache.parquet.schema.OriginalType) MessageType(org.apache.parquet.schema.MessageType) Type(org.apache.parquet.schema.Type) OriginalType(org.apache.parquet.schema.OriginalType) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) MessageType(org.apache.parquet.schema.MessageType)

Aggregations

MessageType (org.apache.parquet.schema.MessageType)40 Test (org.junit.Test)23 FilterPredicate (org.apache.parquet.filter2.predicate.FilterPredicate)13 SearchArgument (org.apache.hadoop.hive.ql.io.sarg.SearchArgument)7 Type (org.apache.parquet.schema.Type)7 Path (org.apache.hadoop.fs.Path)6 GroupType (org.apache.parquet.schema.GroupType)6 Configuration (org.apache.hadoop.conf.Configuration)5 ArrayList (java.util.ArrayList)4 BlockMetaData (org.apache.parquet.hadoop.metadata.BlockMetaData)4 OriginalType (org.apache.parquet.schema.OriginalType)4 HashMap (java.util.HashMap)3 SchemaPath (org.apache.drill.common.expression.SchemaPath)3 HiveChar (org.apache.hadoop.hive.common.type.HiveChar)3 PrimitiveType (org.apache.parquet.schema.PrimitiveType)3 DimensionSchema (io.druid.data.input.impl.DimensionSchema)2 File (java.io.File)2 HiveVarchar (org.apache.hadoop.hive.common.type.HiveVarchar)2 HiveDecimalWritable (org.apache.hadoop.hive.serde2.io.HiveDecimalWritable)2 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)2