Search in sources :

Example 36 with HCatFieldSchema

use of org.apache.hive.hcatalog.data.schema.HCatFieldSchema in project hive by apache.

the class PigHCatUtil method getBagSubSchema.

protected static ResourceSchema getBagSubSchema(HCatFieldSchema hfs) throws IOException {
    // there are two cases - array<Type> and array<struct<...>>
    // in either case the element type of the array is represented in a
    // tuple field schema in the bag's field schema - the second case (struct)
    // more naturally translates to the tuple - in the first case (array<Type>)
    // we simulate the tuple by putting the single field in a tuple
    Properties props = UDFContext.getUDFContext().getClientSystemProps();
    String innerTupleName = HCatConstants.HCAT_PIG_INNER_TUPLE_NAME_DEFAULT;
    if (props != null && props.containsKey(HCatConstants.HCAT_PIG_INNER_TUPLE_NAME)) {
        innerTupleName = props.getProperty(HCatConstants.HCAT_PIG_INNER_TUPLE_NAME).replaceAll("FIELDNAME", hfs.getName());
    }
    String innerFieldName = HCatConstants.HCAT_PIG_INNER_FIELD_NAME_DEFAULT;
    if (props != null && props.containsKey(HCatConstants.HCAT_PIG_INNER_FIELD_NAME)) {
        innerFieldName = props.getProperty(HCatConstants.HCAT_PIG_INNER_FIELD_NAME).replaceAll("FIELDNAME", hfs.getName());
    }
    ResourceFieldSchema[] bagSubFieldSchemas = new ResourceFieldSchema[1];
    bagSubFieldSchemas[0] = new ResourceFieldSchema().setName(innerTupleName).setDescription("The tuple in the bag").setType(DataType.TUPLE);
    HCatFieldSchema arrayElementFieldSchema = hfs.getArrayElementSchema().get(0);
    if (arrayElementFieldSchema.getType() == Type.STRUCT) {
        bagSubFieldSchemas[0].setSchema(getTupleSubSchema(arrayElementFieldSchema));
    } else if (arrayElementFieldSchema.getType() == Type.ARRAY) {
        ResourceSchema s = new ResourceSchema();
        List<ResourceFieldSchema> lrfs = Arrays.asList(getResourceSchemaFromFieldSchema(arrayElementFieldSchema));
        s.setFields(lrfs.toArray(new ResourceFieldSchema[lrfs.size()]));
        bagSubFieldSchemas[0].setSchema(s);
    } else {
        ResourceFieldSchema[] innerTupleFieldSchemas = new ResourceFieldSchema[1];
        innerTupleFieldSchemas[0] = new ResourceFieldSchema().setName(innerFieldName).setDescription("The inner field in the tuple in the bag").setType(getPigType(arrayElementFieldSchema)).setSchema(// the element type is not a tuple - so no subschema
        null);
        bagSubFieldSchemas[0].setSchema(new ResourceSchema().setFields(innerTupleFieldSchemas));
    }
    return new ResourceSchema().setFields(bagSubFieldSchemas);
}
Also used : ResourceSchema(org.apache.pig.ResourceSchema) ResourceFieldSchema(org.apache.pig.ResourceSchema.ResourceFieldSchema) ArrayList(java.util.ArrayList) List(java.util.List) Properties(java.util.Properties) HCatFieldSchema(org.apache.hive.hcatalog.data.schema.HCatFieldSchema)

Example 37 with HCatFieldSchema

use of org.apache.hive.hcatalog.data.schema.HCatFieldSchema in project hive by apache.

the class PigHCatUtil method transformToTuple.

private static Tuple transformToTuple(List<?> objList, HCatSchema hs) throws Exception {
    if (objList == null) {
        return null;
    }
    Tuple t = tupFac.newTuple(objList.size());
    List<HCatFieldSchema> subFields = hs.getFields();
    for (int i = 0; i < subFields.size(); i++) {
        t.set(i, extractPigObject(objList.get(i), subFields.get(i)));
    }
    return t;
}
Also used : Tuple(org.apache.pig.data.Tuple) HCatFieldSchema(org.apache.hive.hcatalog.data.schema.HCatFieldSchema)

Example 38 with HCatFieldSchema

use of org.apache.hive.hcatalog.data.schema.HCatFieldSchema in project hive by apache.

the class PigHCatUtil method getHCatSchema.

HCatSchema getHCatSchema(List<RequiredField> fields, String signature, Class<?> classForUDFCLookup) throws IOException {
    if (fields == null) {
        return null;
    }
    Properties props = UDFContext.getUDFContext().getUDFProperties(classForUDFCLookup, new String[] { signature });
    HCatSchema hcatTableSchema = (HCatSchema) props.get(HCatConstants.HCAT_TABLE_SCHEMA);
    ArrayList<HCatFieldSchema> fcols = new ArrayList<HCatFieldSchema>();
    for (RequiredField rf : fields) {
        fcols.add(hcatTableSchema.getFields().get(rf.getIndex()));
    }
    return new HCatSchema(fcols);
}
Also used : HCatSchema(org.apache.hive.hcatalog.data.schema.HCatSchema) RequiredField(org.apache.pig.LoadPushDown.RequiredField) ArrayList(java.util.ArrayList) Properties(java.util.Properties) HCatFieldSchema(org.apache.hive.hcatalog.data.schema.HCatFieldSchema)

Example 39 with HCatFieldSchema

use of org.apache.hive.hcatalog.data.schema.HCatFieldSchema in project hive by apache.

the class TestPigHCatUtil method testGetBagSubSchemaConfigured.

@Test
public void testGetBagSubSchemaConfigured() throws Exception {
    // NOTE: pig-0.8 sets client system properties by actually getting the client
    // system properties. Starting in pig-0.9 you must pass the properties in.
    // When updating our pig dependency this will need updated.
    System.setProperty(HCatConstants.HCAT_PIG_INNER_TUPLE_NAME, "t");
    System.setProperty(HCatConstants.HCAT_PIG_INNER_FIELD_NAME, "FIELDNAME_tuple");
    UDFContext.getUDFContext().setClientSystemProps(System.getProperties());
    // Define the expected schema.
    ResourceFieldSchema[] bagSubFieldSchemas = new ResourceFieldSchema[1];
    bagSubFieldSchemas[0] = new ResourceFieldSchema().setName("t").setDescription("The tuple in the bag").setType(DataType.TUPLE);
    ResourceFieldSchema[] innerTupleFieldSchemas = new ResourceFieldSchema[1];
    innerTupleFieldSchemas[0] = new ResourceFieldSchema().setName("llama_tuple").setType(DataType.CHARARRAY);
    bagSubFieldSchemas[0].setSchema(new ResourceSchema().setFields(innerTupleFieldSchemas));
    ResourceSchema expected = new ResourceSchema().setFields(bagSubFieldSchemas);
    // Get the actual converted schema.
    HCatSchema actualHCatSchema = new HCatSchema(Lists.newArrayList(new HCatFieldSchema("innerLlama", HCatFieldSchema.Type.STRING, null)));
    HCatFieldSchema actualHCatFieldSchema = new HCatFieldSchema("llama", HCatFieldSchema.Type.ARRAY, actualHCatSchema, null);
    ResourceSchema actual = PigHCatUtil.getBagSubSchema(actualHCatFieldSchema);
    Assert.assertEquals(expected.toString(), actual.toString());
    // Clean up System properties that were set by this test
    System.clearProperty(HCatConstants.HCAT_PIG_INNER_TUPLE_NAME);
    System.clearProperty(HCatConstants.HCAT_PIG_INNER_FIELD_NAME);
}
Also used : ResourceSchema(org.apache.pig.ResourceSchema) HCatSchema(org.apache.hive.hcatalog.data.schema.HCatSchema) ResourceFieldSchema(org.apache.pig.ResourceSchema.ResourceFieldSchema) HCatFieldSchema(org.apache.hive.hcatalog.data.schema.HCatFieldSchema) Test(org.junit.Test)

Example 40 with HCatFieldSchema

use of org.apache.hive.hcatalog.data.schema.HCatFieldSchema in project hive by apache.

the class HCatClientHMSImpl method getPartition.

@Override
public HCatPartition getPartition(String dbName, String tableName, Map<String, String> partitionSpec) throws HCatException {
    HCatPartition partition = null;
    try {
        HCatTable hcatTable = getTable(dbName, tableName);
        List<HCatFieldSchema> partitionColumns = hcatTable.getPartCols();
        if (partitionColumns.size() != partitionSpec.size()) {
            throw new HCatException("Partition-spec doesn't have the right number of partition keys.");
        }
        ArrayList<String> ptnValues = new ArrayList<String>();
        for (HCatFieldSchema partitionColumn : partitionColumns) {
            String partKey = partitionColumn.getName();
            if (partitionSpec.containsKey(partKey)) {
                // Partition-keys added in order.
                ptnValues.add(partitionSpec.get(partKey));
            } else {
                throw new HCatException("Invalid partition-key specified: " + partKey);
            }
        }
        Partition hivePartition = hmsClient.getPartition(checkDB(dbName), tableName, ptnValues);
        if (hivePartition != null) {
            partition = new HCatPartition(hcatTable, hivePartition);
        }
    } catch (MetaException e) {
        throw new HCatException("MetaException while retrieving partition.", e);
    } catch (NoSuchObjectException e) {
        throw new ObjectNotFoundException("NoSuchObjectException while retrieving partition.", e);
    } catch (TException e) {
        throw new ConnectionFailureException("TException while retrieving partition.", e);
    }
    return partition;
}
Also used : TException(org.apache.thrift.TException) Partition(org.apache.hadoop.hive.metastore.api.Partition) HCatException(org.apache.hive.hcatalog.common.HCatException) ArrayList(java.util.ArrayList) HCatFieldSchema(org.apache.hive.hcatalog.data.schema.HCatFieldSchema) NoSuchObjectException(org.apache.hadoop.hive.metastore.api.NoSuchObjectException) MetaException(org.apache.hadoop.hive.metastore.api.MetaException)

Aggregations

HCatFieldSchema (org.apache.hive.hcatalog.data.schema.HCatFieldSchema)61 ArrayList (java.util.ArrayList)34 Test (org.junit.Test)30 HCatException (org.apache.hive.hcatalog.common.HCatException)22 IOException (java.io.IOException)21 HCatSchema (org.apache.hive.hcatalog.data.schema.HCatSchema)21 HashMap (java.util.HashMap)19 Configuration (org.apache.hadoop.conf.Configuration)15 MetaException (org.apache.hadoop.hive.metastore.api.MetaException)15 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)7 ResourceSchema (org.apache.pig.ResourceSchema)6 HCatTable (org.apache.hive.hcatalog.api.HCatTable)5 ResourceFieldSchema (org.apache.pig.ResourceSchema.ResourceFieldSchema)5 List (java.util.List)4 Map (java.util.Map)4 Properties (java.util.Properties)4 Path (org.apache.hadoop.fs.Path)4 StorageDescriptor (org.apache.hadoop.hive.metastore.api.StorageDescriptor)3 FrontendException (org.apache.pig.impl.logicalLayer.FrontendException)3 FieldSchema (org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema)3