Search in sources :

Example 21 with HCatFieldSchema

use of org.apache.hive.hcatalog.data.schema.HCatFieldSchema in project hive by apache.

the class PigHCatUtil method getTupleSubSchema.

private static ResourceSchema getTupleSubSchema(HCatFieldSchema hfs) throws IOException {
    // for each struct subfield, create equivalent ResourceFieldSchema
    ResourceSchema s = new ResourceSchema();
    List<ResourceFieldSchema> lrfs = new ArrayList<ResourceFieldSchema>();
    for (HCatFieldSchema subField : hfs.getStructSubSchema().getFields()) {
        lrfs.add(getResourceSchemaFromFieldSchema(subField));
    }
    s.setFields(lrfs.toArray(new ResourceFieldSchema[lrfs.size()]));
    return s;
}
Also used : ResourceSchema(org.apache.pig.ResourceSchema) ResourceFieldSchema(org.apache.pig.ResourceSchema.ResourceFieldSchema) ArrayList(java.util.ArrayList) HCatFieldSchema(org.apache.hive.hcatalog.data.schema.HCatFieldSchema)

Example 22 with HCatFieldSchema

use of org.apache.hive.hcatalog.data.schema.HCatFieldSchema in project hive by apache.

the class TestPassProperties method getSchema.

private HCatSchema getSchema() throws HCatException {
    HCatSchema schema = new HCatSchema(new ArrayList<HCatFieldSchema>());
    schema.append(new HCatFieldSchema("a0", HCatFieldSchema.Type.INT, ""));
    schema.append(new HCatFieldSchema("a1", HCatFieldSchema.Type.STRING, ""));
    schema.append(new HCatFieldSchema("a2", HCatFieldSchema.Type.STRING, ""));
    return schema;
}
Also used : HCatSchema(org.apache.hive.hcatalog.data.schema.HCatSchema) HCatFieldSchema(org.apache.hive.hcatalog.data.schema.HCatFieldSchema)

Example 23 with HCatFieldSchema

use of org.apache.hive.hcatalog.data.schema.HCatFieldSchema in project hive by apache.

the class HCatBaseStorer method getJavaObj.

/**
   * Convert from Pig value object to Hive value object
   * This method assumes that {@link #validateSchema(org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema, org.apache.hive.hcatalog.data.schema.HCatFieldSchema, org.apache.pig.impl.logicalLayer.schema.Schema, org.apache.hive.hcatalog.data.schema.HCatSchema, int)}
   * which checks the types in Pig schema are compatible with target Hive table, has been called.
   */
private Object getJavaObj(Object pigObj, HCatFieldSchema hcatFS) throws HCatException, BackendException {
    try {
        if (pigObj == null)
            return null;
        // The real work-horse. Spend time and energy in this method if there is
        // need to keep HCatStorer lean and go fast.
        Type type = hcatFS.getType();
        switch(type) {
            case BINARY:
                return ((DataByteArray) pigObj).get();
            case STRUCT:
                HCatSchema structSubSchema = hcatFS.getStructSubSchema();
                // Unwrap the tuple.
                List<Object> all = ((Tuple) pigObj).getAll();
                ArrayList<Object> converted = new ArrayList<Object>(all.size());
                for (int i = 0; i < all.size(); i++) {
                    converted.add(getJavaObj(all.get(i), structSubSchema.get(i)));
                }
                return converted;
            case ARRAY:
                // Unwrap the bag.
                DataBag pigBag = (DataBag) pigObj;
                HCatFieldSchema tupFS = hcatFS.getArrayElementSchema().get(0);
                boolean needTuple = tupFS.getType() == Type.STRUCT;
                List<Object> bagContents = new ArrayList<Object>((int) pigBag.size());
                Iterator<Tuple> bagItr = pigBag.iterator();
                while (bagItr.hasNext()) {
                    // If there is only one element in tuple contained in bag, we throw away the tuple.
                    bagContents.add(getJavaObj(needTuple ? bagItr.next() : bagItr.next().get(0), tupFS));
                }
                return bagContents;
            case MAP:
                Map<?, ?> pigMap = (Map<?, ?>) pigObj;
                Map<Object, Object> typeMap = new HashMap<Object, Object>();
                for (Entry<?, ?> entry : pigMap.entrySet()) {
                    // the value has a schema and not a FieldSchema
                    typeMap.put(// Schema validation enforces that the Key is a String
                    (String) entry.getKey(), getJavaObj(entry.getValue(), hcatFS.getMapValueSchema().get(0)));
                }
                return typeMap;
            case STRING:
            case INT:
            case BIGINT:
            case FLOAT:
            case DOUBLE:
                return pigObj;
            case SMALLINT:
                if ((Integer) pigObj < Short.MIN_VALUE || (Integer) pigObj > Short.MAX_VALUE) {
                    handleOutOfRangeValue(pigObj, hcatFS);
                    return null;
                }
                return ((Integer) pigObj).shortValue();
            case TINYINT:
                if ((Integer) pigObj < Byte.MIN_VALUE || (Integer) pigObj > Byte.MAX_VALUE) {
                    handleOutOfRangeValue(pigObj, hcatFS);
                    return null;
                }
                return ((Integer) pigObj).byteValue();
            case BOOLEAN:
                if (pigObj instanceof String) {
                    if (((String) pigObj).trim().compareTo("0") == 0) {
                        return Boolean.FALSE;
                    }
                    if (((String) pigObj).trim().compareTo("1") == 0) {
                        return Boolean.TRUE;
                    }
                    throw new BackendException("Unexpected type " + type + " for value " + pigObj + " of class " + pigObj.getClass().getName(), PigHCatUtil.PIG_EXCEPTION_CODE);
                }
                return Boolean.parseBoolean(pigObj.toString());
            case DECIMAL:
                BigDecimal bd = (BigDecimal) pigObj;
                DecimalTypeInfo dti = (DecimalTypeInfo) hcatFS.getTypeInfo();
                if (bd.precision() > dti.precision() || bd.scale() > dti.scale()) {
                    handleOutOfRangeValue(pigObj, hcatFS);
                    return null;
                }
                return HiveDecimal.create(bd);
            case CHAR:
                String charVal = (String) pigObj;
                CharTypeInfo cti = (CharTypeInfo) hcatFS.getTypeInfo();
                if (charVal.length() > cti.getLength()) {
                    handleOutOfRangeValue(pigObj, hcatFS);
                    return null;
                }
                return new HiveChar(charVal, cti.getLength());
            case VARCHAR:
                String varcharVal = (String) pigObj;
                VarcharTypeInfo vti = (VarcharTypeInfo) hcatFS.getTypeInfo();
                if (varcharVal.length() > vti.getLength()) {
                    handleOutOfRangeValue(pigObj, hcatFS);
                    return null;
                }
                return new HiveVarchar(varcharVal, vti.getLength());
            case TIMESTAMP:
                DateTime dt = (DateTime) pigObj;
                //getMillis() returns UTC time regardless of TZ
                return new Timestamp(dt.getMillis());
            case DATE:
                /**
         * We ignore any TZ setting on Pig value since java.sql.Date doesn't have it (in any
         * meaningful way).  So the assumption is that if Pig value has 0 time component (midnight)
         * we assume it reasonably 'fits' into a Hive DATE.  If time part is not 0, it's considered
         * out of range for target type.
         */
                DateTime dateTime = ((DateTime) pigObj);
                if (dateTime.getMillisOfDay() != 0) {
                    handleOutOfRangeValue(pigObj, hcatFS, "Time component must be 0 (midnight) in local timezone; Local TZ val='" + pigObj + "'");
                    return null;
                }
                /*java.sql.Date is a poorly defined API.  Some (all?) SerDes call toString() on it
        [e.g. LazySimpleSerDe, uses LazyUtils.writePrimitiveUTF8()],  which automatically adjusts
          for local timezone.  Date.valueOf() also uses local timezone (as does Date(int,int,int).
          Also see PigHCatUtil#extractPigObject() for corresponding read op.  This way a DATETIME from Pig,
          when stored into Hive and read back comes back with the same value.*/
                return new Date(dateTime.getYear() - 1900, dateTime.getMonthOfYear() - 1, dateTime.getDayOfMonth());
            default:
                throw new BackendException("Unexpected HCat type " + type + " for value " + pigObj + " of class " + pigObj.getClass().getName(), PigHCatUtil.PIG_EXCEPTION_CODE);
        }
    } catch (BackendException e) {
        // provide the path to the field in the error message
        throw new BackendException((hcatFS.getName() == null ? " " : hcatFS.getName() + ".") + e.getMessage(), e);
    }
}
Also used : VarcharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) HiveChar(org.apache.hadoop.hive.common.type.HiveChar) Timestamp(java.sql.Timestamp) DateTime(org.joda.time.DateTime) HCatSchema(org.apache.hive.hcatalog.data.schema.HCatSchema) DataByteArray(org.apache.pig.data.DataByteArray) DataBag(org.apache.pig.data.DataBag) CharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo) HiveVarchar(org.apache.hadoop.hive.common.type.HiveVarchar) BigDecimal(java.math.BigDecimal) Date(java.sql.Date) HCatFieldSchema(org.apache.hive.hcatalog.data.schema.HCatFieldSchema) BackendException(org.apache.pig.backend.BackendException) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) DataType(org.apache.pig.data.DataType) Type(org.apache.hive.hcatalog.data.schema.HCatFieldSchema.Type) Map(java.util.Map) HashMap(java.util.HashMap) Tuple(org.apache.pig.data.Tuple)

Example 24 with HCatFieldSchema

use of org.apache.hive.hcatalog.data.schema.HCatFieldSchema in project hive by apache.

the class HCatBaseStorer method getHCatFSFromPigFS.

/**
   * Here we are processing HCat table schema as derived from metastore, 
   * thus it should have information about all fields/sub-fields, but not for partition columns
   */
private HCatFieldSchema getHCatFSFromPigFS(FieldSchema fSchema, HCatFieldSchema hcatFieldSchema, Schema pigSchema, HCatSchema tableSchema) throws FrontendException, HCatException {
    if (hcatFieldSchema == null) {
        if (LOG.isDebugEnabled()) {
            LOG.debug("hcatFieldSchema is null for fSchema '" + fSchema.alias + "'");
        //throw new IllegalArgumentException("hcatFiledSchema is null; fSchema=" + fSchema + " " +
        //      "(pigSchema, tableSchema)=(" + pigSchema + "," + tableSchema + ")");
        }
    }
    byte type = fSchema.type;
    switch(type) {
        case DataType.CHARARRAY:
        case DataType.BIGCHARARRAY:
            if (hcatFieldSchema != null && hcatFieldSchema.getTypeInfo() != null) {
                return new HCatFieldSchema(fSchema.alias, hcatFieldSchema.getTypeInfo(), null);
            }
            return new HCatFieldSchema(fSchema.alias, TypeInfoFactory.stringTypeInfo, null);
        case DataType.INTEGER:
            if (hcatFieldSchema != null) {
                if (!SUPPORTED_INTEGER_CONVERSIONS.contains(hcatFieldSchema.getType())) {
                    throw new FrontendException("Unsupported type: " + type + "  in Pig's schema", PigHCatUtil.PIG_EXCEPTION_CODE);
                }
                return new HCatFieldSchema(fSchema.alias, hcatFieldSchema.getTypeInfo(), null);
            }
            return new HCatFieldSchema(fSchema.alias, TypeInfoFactory.intTypeInfo, null);
        case DataType.LONG:
            return new HCatFieldSchema(fSchema.alias, TypeInfoFactory.longTypeInfo, null);
        case DataType.FLOAT:
            return new HCatFieldSchema(fSchema.alias, TypeInfoFactory.floatTypeInfo, null);
        case DataType.DOUBLE:
            return new HCatFieldSchema(fSchema.alias, TypeInfoFactory.doubleTypeInfo, null);
        case DataType.BYTEARRAY:
            return new HCatFieldSchema(fSchema.alias, TypeInfoFactory.binaryTypeInfo, null);
        case DataType.BOOLEAN:
            return new HCatFieldSchema(fSchema.alias, TypeInfoFactory.booleanTypeInfo, null);
        case DataType.DATETIME:
            //is controlled by Hive target table information
            if (hcatFieldSchema != null && hcatFieldSchema.getTypeInfo() != null) {
                return new HCatFieldSchema(fSchema.alias, hcatFieldSchema.getTypeInfo(), null);
            }
            return new HCatFieldSchema(fSchema.alias, TypeInfoFactory.timestampTypeInfo, null);
        case DataType.BIGDECIMAL:
            if (hcatFieldSchema != null && hcatFieldSchema.getTypeInfo() != null) {
                return new HCatFieldSchema(fSchema.alias, hcatFieldSchema.getTypeInfo(), null);
            }
            return new HCatFieldSchema(fSchema.alias, TypeInfoFactory.decimalTypeInfo, null);
        case DataType.BAG:
            Schema bagSchema = fSchema.schema;
            List<HCatFieldSchema> arrFields = new ArrayList<HCatFieldSchema>(1);
            FieldSchema field;
            // Find out if we need to throw away the tuple or not.
            if (removeTupleFromBag(hcatFieldSchema, fSchema)) {
                field = bagSchema.getField(0).schema.getField(0);
            } else {
                field = bagSchema.getField(0);
            }
            arrFields.add(getHCatFSFromPigFS(field, hcatFieldSchema == null ? null : hcatFieldSchema.getArrayElementSchema().get(0), pigSchema, tableSchema));
            return new HCatFieldSchema(fSchema.alias, Type.ARRAY, new HCatSchema(arrFields), "");
        case DataType.TUPLE:
            List<HCatFieldSchema> hcatFSs = new ArrayList<HCatFieldSchema>();
            HCatSchema structSubSchema = hcatFieldSchema == null ? null : hcatFieldSchema.getStructSubSchema();
            List<FieldSchema> fields = fSchema.schema.getFields();
            for (int i = 0; i < fields.size(); i++) {
                FieldSchema fieldSchema = fields.get(i);
                hcatFSs.add(getHCatFSFromPigFS(fieldSchema, structSubSchema == null ? null : structSubSchema.get(i), pigSchema, tableSchema));
            }
            return new HCatFieldSchema(fSchema.alias, Type.STRUCT, new HCatSchema(hcatFSs), "");
        case DataType.MAP:
            {
                // Pig's schema contain no type information about map's keys and
                // values. So, if its a new column assume <string,string> if its existing
                // return whatever is contained in the existing column.
                HCatFieldSchema valFS;
                List<HCatFieldSchema> valFSList = new ArrayList<HCatFieldSchema>(1);
                if (hcatFieldSchema != null) {
                    return HCatFieldSchema.createMapTypeFieldSchema(fSchema.alias, hcatFieldSchema.getMapKeyTypeInfo(), hcatFieldSchema.getMapValueSchema(), "");
                }
                // Column not found in target table. Its a new column. Its schema is map<string,string>
                valFS = new HCatFieldSchema(fSchema.alias, TypeInfoFactory.stringTypeInfo, "");
                valFSList.add(valFS);
                return HCatFieldSchema.createMapTypeFieldSchema(fSchema.alias, TypeInfoFactory.stringTypeInfo, new HCatSchema(valFSList), "");
            }
        case DataType.BIGINTEGER:
        //fall through; doesn't map to Hive/Hcat type; here for completeness
        default:
            throw new FrontendException("Unsupported type: " + type + "  in Pig's schema", PigHCatUtil.PIG_EXCEPTION_CODE);
    }
}
Also used : HCatSchema(org.apache.hive.hcatalog.data.schema.HCatSchema) HCatFieldSchema(org.apache.hive.hcatalog.data.schema.HCatFieldSchema) Schema(org.apache.pig.impl.logicalLayer.schema.Schema) HCatSchema(org.apache.hive.hcatalog.data.schema.HCatSchema) ResourceSchema(org.apache.pig.ResourceSchema) FieldSchema(org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema) HCatFieldSchema(org.apache.hive.hcatalog.data.schema.HCatFieldSchema) FieldSchema(org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema) ArrayList(java.util.ArrayList) List(java.util.List) ArrayList(java.util.ArrayList) FrontendException(org.apache.pig.impl.logicalLayer.FrontendException) HCatFieldSchema(org.apache.hive.hcatalog.data.schema.HCatFieldSchema)

Example 25 with HCatFieldSchema

use of org.apache.hive.hcatalog.data.schema.HCatFieldSchema in project hive by apache.

the class HCatBaseStorer method doSchemaValidations.

protected void doSchemaValidations(Schema pigSchema, HCatSchema tblSchema) throws FrontendException, HCatException {
    // Iterate through all the elements in Pig Schema and do validations as
    // dictated by semantics, consult HCatSchema of table when need be.
    //helps with debug messages
    int columnPos = 0;
    for (FieldSchema pigField : pigSchema.getFields()) {
        HCatFieldSchema hcatField = getColFromSchema(pigField.alias, tblSchema);
        validateSchema(pigField, hcatField, pigSchema, tblSchema, columnPos++);
    }
    try {
        PigHCatUtil.validateHCatTableSchemaFollowsPigRules(tblSchema);
    } catch (IOException e) {
        throw new FrontendException("HCatalog schema is not compatible with Pig: " + e.getMessage(), PigHCatUtil.PIG_EXCEPTION_CODE, e);
    }
}
Also used : HCatFieldSchema(org.apache.hive.hcatalog.data.schema.HCatFieldSchema) FieldSchema(org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema) IOException(java.io.IOException) FrontendException(org.apache.pig.impl.logicalLayer.FrontendException) HCatFieldSchema(org.apache.hive.hcatalog.data.schema.HCatFieldSchema)

Aggregations

HCatFieldSchema (org.apache.hive.hcatalog.data.schema.HCatFieldSchema)61 ArrayList (java.util.ArrayList)34 Test (org.junit.Test)30 HCatException (org.apache.hive.hcatalog.common.HCatException)22 IOException (java.io.IOException)21 HCatSchema (org.apache.hive.hcatalog.data.schema.HCatSchema)21 HashMap (java.util.HashMap)19 Configuration (org.apache.hadoop.conf.Configuration)18 MetaException (org.apache.hadoop.hive.metastore.api.MetaException)15 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)7 ResourceSchema (org.apache.pig.ResourceSchema)6 HCatTable (org.apache.hive.hcatalog.api.HCatTable)5 ResourceFieldSchema (org.apache.pig.ResourceSchema.ResourceFieldSchema)5 Map (java.util.Map)4 Properties (java.util.Properties)4 Path (org.apache.hadoop.fs.Path)4 List (java.util.List)3 StorageDescriptor (org.apache.hadoop.hive.metastore.api.StorageDescriptor)3 CommandNeedRetryException (org.apache.hadoop.hive.ql.CommandNeedRetryException)3 FrontendException (org.apache.pig.impl.logicalLayer.FrontendException)3