Search in sources :

Example 26 with Category

use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category in project hive by apache.

the class HCatSchemaUtils method getHCatSchema.

public static HCatSchema getHCatSchema(TypeInfo typeInfo) throws HCatException {
    Category typeCategory = typeInfo.getCategory();
    HCatSchema hCatSchema;
    if (Category.PRIMITIVE == typeCategory) {
        hCatSchema = getStructSchemaBuilder().addField(new HCatFieldSchema(null, (PrimitiveTypeInfo) typeInfo, null)).build();
    } else if (Category.STRUCT == typeCategory) {
        HCatSchema subSchema = constructHCatSchema((StructTypeInfo) typeInfo);
        hCatSchema = getStructSchemaBuilder().addField(new HCatFieldSchema(null, Type.STRUCT, subSchema, null)).build();
    } else if (Category.LIST == typeCategory) {
        CollectionBuilder builder = getListSchemaBuilder();
        builder.addField(getHCatFieldSchema(null, ((ListTypeInfo) typeInfo).getListElementTypeInfo(), null));
        hCatSchema = new HCatSchema(Arrays.asList(new HCatFieldSchema("", Type.ARRAY, builder.build(), "")));
    } else if (Category.MAP == typeCategory) {
        HCatSchema subSchema = getHCatSchema(((MapTypeInfo) typeInfo).getMapValueTypeInfo());
        MapBuilder builder = getMapSchemaBuilder();
        hCatSchema = builder.withKeyType((PrimitiveTypeInfo) ((MapTypeInfo) typeInfo).getMapKeyTypeInfo()).withValueSchema(subSchema).build();
    } else {
        throw new TypeNotPresentException(typeInfo.getTypeName(), null);
    }
    return hCatSchema;
}
Also used : Category(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)

Example 27 with Category

use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category in project hive by apache.

the class MultiDelimitSerDe method serializeNoEncode.

// This is basically the same as LazySimpleSerDe.serialize. Except that we don't use
// Base64 to encode binary data because we're using printable string as delimiter.
// Consider such a row "strAQ==\1", str is a string, AQ== is the delimiter and \1
// is the binary data.
private static void serializeNoEncode(ByteStream.Output out, Object obj, ObjectInspector objInspector, byte[] separators, int level, Text nullSequence, boolean escaped, byte escapeChar, boolean[] needsEscape) throws IOException, SerDeException {
    if (obj == null) {
        out.write(nullSequence.getBytes(), 0, nullSequence.getLength());
        return;
    }
    char separator;
    List<?> list;
    switch(objInspector.getCategory()) {
        case PRIMITIVE:
            PrimitiveObjectInspector oi = (PrimitiveObjectInspector) objInspector;
            if (oi.getPrimitiveCategory() == PrimitiveCategory.BINARY) {
                BytesWritable bw = ((BinaryObjectInspector) oi).getPrimitiveWritableObject(obj);
                byte[] toWrite = new byte[bw.getLength()];
                System.arraycopy(bw.getBytes(), 0, toWrite, 0, bw.getLength());
                out.write(toWrite, 0, toWrite.length);
            } else {
                LazyUtils.writePrimitiveUTF8(out, obj, oi, escaped, escapeChar, needsEscape);
            }
            return;
        case LIST:
            separator = (char) separators[level];
            ListObjectInspector loi = (ListObjectInspector) objInspector;
            list = loi.getList(obj);
            ObjectInspector eoi = loi.getListElementObjectInspector();
            if (list == null) {
                out.write(nullSequence.getBytes(), 0, nullSequence.getLength());
            } else {
                for (int i = 0; i < list.size(); i++) {
                    if (i > 0) {
                        out.write(separator);
                    }
                    serializeNoEncode(out, list.get(i), eoi, separators, level + 1, nullSequence, escaped, escapeChar, needsEscape);
                }
            }
            return;
        case MAP:
            separator = (char) separators[level];
            char keyValueSeparator = (char) separators[level + 1];
            MapObjectInspector moi = (MapObjectInspector) objInspector;
            ObjectInspector koi = moi.getMapKeyObjectInspector();
            ObjectInspector voi = moi.getMapValueObjectInspector();
            Map<?, ?> map = moi.getMap(obj);
            if (map == null) {
                out.write(nullSequence.getBytes(), 0, nullSequence.getLength());
            } else {
                boolean first = true;
                for (Map.Entry<?, ?> entry : map.entrySet()) {
                    if (first) {
                        first = false;
                    } else {
                        out.write(separator);
                    }
                    serializeNoEncode(out, entry.getKey(), koi, separators, level + 2, nullSequence, escaped, escapeChar, needsEscape);
                    out.write(keyValueSeparator);
                    serializeNoEncode(out, entry.getValue(), voi, separators, level + 2, nullSequence, escaped, escapeChar, needsEscape);
                }
            }
            return;
        case STRUCT:
            separator = (char) separators[level];
            StructObjectInspector soi = (StructObjectInspector) objInspector;
            List<? extends StructField> fields = soi.getAllStructFieldRefs();
            list = soi.getStructFieldsDataAsList(obj);
            if (list == null) {
                out.write(nullSequence.getBytes(), 0, nullSequence.getLength());
            } else {
                for (int i = 0; i < list.size(); i++) {
                    if (i > 0) {
                        out.write(separator);
                    }
                    serializeNoEncode(out, list.get(i), fields.get(i).getFieldObjectInspector(), separators, level + 1, nullSequence, escaped, escapeChar, needsEscape);
                }
            }
            return;
    }
    throw new RuntimeException("Unknown category type: " + objInspector.getCategory());
}
Also used : ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) BinaryObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) BinaryObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector) BytesWritable(org.apache.hadoop.io.BytesWritable) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) Map(java.util.Map) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 28 with Category

use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category in project hive by apache.

the class HCatTypeCheckHive method evaluate.

@Override
public Object evaluate(DeferredObject[] args) throws HiveException {
    List<Object> row = new ArrayList<Object>();
    String typesStr = (String) getJavaObject(args[0].get(), argOIs[0], new ArrayList<Category>());
    String[] types = typesStr.split("\\+");
    for (int i = 0; i < types.length; i++) {
        types[i] = types[i].toLowerCase();
    }
    for (int i = 1; i < args.length; i++) {
        ObjectInspector oi = argOIs[i];
        List<ObjectInspector.Category> categories = new ArrayList<ObjectInspector.Category>();
        Object o = getJavaObject(args[i].get(), oi, categories);
        try {
            if (o != null) {
                Util.check(types[i - 1], o);
            }
        } catch (IOException e) {
            throw new HiveException(e);
        }
        row.add(o == null ? "null" : o);
        row.add(":" + (o == null ? "null" : o.getClass()) + ":" + categories);
    }
    return row.toString();
}
Also used : ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) Category(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) ArrayList(java.util.ArrayList) IOException(java.io.IOException)

Example 29 with Category

use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category in project hive by apache.

the class VectorDeserializeOrcWriter method create.

// TODO: if more writers are added, separate out an EncodingWriterFactory
public static EncodingWriter create(InputFormat<?, ?> sourceIf, Deserializer serDe, Map<Path, PartitionDesc> parts, Configuration daemonConf, Configuration jobConf, Path splitPath, StructObjectInspector sourceOi, List<Integer> sourceIncludes, boolean[] cacheIncludes, int allocSize) throws IOException {
    // Vector SerDe can be disabled both on client and server side.
    if (!HiveConf.getBoolVar(daemonConf, ConfVars.LLAP_IO_ENCODE_VECTOR_SERDE_ENABLED) || !HiveConf.getBoolVar(jobConf, ConfVars.LLAP_IO_ENCODE_VECTOR_SERDE_ENABLED) || !(sourceIf instanceof TextInputFormat) || !(serDe instanceof LazySimpleSerDe)) {
        return new DeserializerOrcWriter(serDe, sourceOi, allocSize);
    }
    Path path = splitPath.getFileSystem(daemonConf).makeQualified(splitPath);
    PartitionDesc partDesc = HiveFileFormatUtils.getPartitionDescFromPathRecursively(parts, path, null);
    if (partDesc == null) {
        LlapIoImpl.LOG.info("Not using VertorDeserializeOrcWriter: no partition desc for " + path);
        return new DeserializerOrcWriter(serDe, sourceOi, allocSize);
    }
    Properties tblProps = partDesc.getTableDesc().getProperties();
    if ("true".equalsIgnoreCase(tblProps.getProperty(serdeConstants.SERIALIZATION_LAST_COLUMN_TAKES_REST))) {
        LlapIoImpl.LOG.info("Not using VertorDeserializeOrcWriter due to " + serdeConstants.SERIALIZATION_LAST_COLUMN_TAKES_REST);
        return new DeserializerOrcWriter(serDe, sourceOi, allocSize);
    }
    for (StructField sf : sourceOi.getAllStructFieldRefs()) {
        Category c = sf.getFieldObjectInspector().getCategory();
        if (c != Category.PRIMITIVE) {
            LlapIoImpl.LOG.info("Not using VertorDeserializeOrcWriter: " + c + " is not supported");
            return new DeserializerOrcWriter(serDe, sourceOi, allocSize);
        }
    }
    LlapIoImpl.LOG.info("Creating VertorDeserializeOrcWriter for " + path);
    return new VectorDeserializeOrcWriter(daemonConf, tblProps, sourceOi, sourceIncludes, cacheIncludes, allocSize);
}
Also used : DeserializerOrcWriter(org.apache.hadoop.hive.llap.io.encoded.SerDeEncodedDataReader.DeserializerOrcWriter) Path(org.apache.hadoop.fs.Path) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) Category(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category) TextInputFormat(org.apache.hadoop.mapred.TextInputFormat) LazySimpleSerDe(org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe) PartitionDesc(org.apache.hadoop.hive.ql.plan.PartitionDesc) Properties(java.util.Properties)

Example 30 with Category

use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category in project presto by prestodb.

the class HiveBucketing method getJavaDeferredObject.

private static DeferredObject getJavaDeferredObject(Object object, ObjectInspector objectInspector) {
    checkArgument(objectInspector.getCategory() == Category.PRIMITIVE, "Unsupported object inspector category %s", objectInspector.getCategory());
    PrimitiveObjectInspector poi = ((PrimitiveObjectInspector) objectInspector);
    switch(poi.getPrimitiveCategory()) {
        case BOOLEAN:
            return new DeferredJavaObject(object);
        case BYTE:
            return new DeferredJavaObject(((Long) object).byteValue());
        case SHORT:
            return new DeferredJavaObject(((Long) object).shortValue());
        case INT:
            return new DeferredJavaObject(((Long) object).intValue());
        case LONG:
            return new DeferredJavaObject(object);
        case STRING:
            return new DeferredJavaObject(((Slice) object).toStringUtf8());
    }
    throw new RuntimeException("Unsupported type: " + poi.getPrimitiveCategory());
}
Also used : DeferredJavaObject(org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredJavaObject) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector)

Aggregations

Category (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category)25 PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)25 PrimitiveCategory (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory)23 PrimitiveObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector)16 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)12 BytesWritable (org.apache.hadoop.io.BytesWritable)12 UDFArgumentTypeException (org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException)11 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)11 DecimalTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo)11 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)11 Text (org.apache.hadoop.io.Text)11 HiveChar (org.apache.hadoop.hive.common.type.HiveChar)10 HiveDecimal (org.apache.hadoop.hive.common.type.HiveDecimal)10 HiveVarchar (org.apache.hadoop.hive.common.type.HiveVarchar)10 IntWritable (org.apache.hadoop.io.IntWritable)10 ByteWritable (org.apache.hadoop.hive.serde2.io.ByteWritable)9 DateWritable (org.apache.hadoop.hive.serde2.io.DateWritable)9 DoubleWritable (org.apache.hadoop.hive.serde2.io.DoubleWritable)9 ShortWritable (org.apache.hadoop.hive.serde2.io.ShortWritable)9 BooleanWritable (org.apache.hadoop.io.BooleanWritable)9