Search in sources :

Example 16 with TypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfo in project phoenix by apache.

the class PhoenixSerDe method createLazyPhoenixInspector.

private ObjectInspector createLazyPhoenixInspector(Configuration conf, Properties tbl) throws SerDeException {
    List<String> columnNameList = Arrays.asList(tbl.getProperty(serdeConstants.LIST_COLUMNS).split(PhoenixStorageHandlerConstants.COMMA));
    List<TypeInfo> columnTypeList = TypeInfoUtils.getTypeInfosFromTypeString(tbl.getProperty(serdeConstants.LIST_COLUMN_TYPES));
    List<ObjectInspector> columnObjectInspectors = Lists.newArrayListWithExpectedSize(columnTypeList.size());
    for (TypeInfo typeInfo : columnTypeList) {
        columnObjectInspectors.add(PhoenixObjectInspectorFactory.createObjectInspector(typeInfo, serdeParams));
    }
    return LazyObjectInspectorFactory.getLazySimpleStructObjectInspector(columnNameList, columnObjectInspectors, null, serdeParams.getSeparators()[0], serdeParams, ObjectInspectorOptions.JAVA);
}
Also used : ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)

Example 17 with TypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfo in project drill by apache.

the class HiveAbstractReader method init.

private void init() throws ExecutionSetupException {
    final JobConf job = new JobConf(hiveConf);
    // Get the configured default val
    defaultPartitionValue = hiveConf.get(ConfVars.DEFAULTPARTITIONNAME.varname);
    Properties tableProperties;
    try {
        tableProperties = HiveUtilities.getTableMetadata(table);
        final Properties partitionProperties = (partition == null) ? tableProperties : HiveUtilities.getPartitionMetadata(partition, table);
        HiveUtilities.addConfToJob(job, partitionProperties);
        final SerDe tableSerDe = createSerDe(job, table.getSd().getSerdeInfo().getSerializationLib(), tableProperties);
        final StructObjectInspector tableOI = getStructOI(tableSerDe);
        if (partition != null) {
            partitionSerDe = createSerDe(job, partition.getSd().getSerdeInfo().getSerializationLib(), partitionProperties);
            partitionOI = getStructOI(partitionSerDe);
            finalOI = (StructObjectInspector) ObjectInspectorConverters.getConvertedOI(partitionOI, tableOI);
            partTblObjectInspectorConverter = ObjectInspectorConverters.getConverter(partitionOI, finalOI);
            job.setInputFormat(HiveUtilities.getInputFormatClass(job, partition.getSd(), table));
        } else {
            // For non-partitioned tables, there is no need to create converter as there are no schema changes expected.
            partitionSerDe = tableSerDe;
            partitionOI = tableOI;
            partTblObjectInspectorConverter = null;
            finalOI = tableOI;
            job.setInputFormat(HiveUtilities.getInputFormatClass(job, table.getSd(), table));
        }
        if (logger.isTraceEnabled()) {
            for (StructField field : finalOI.getAllStructFieldRefs()) {
                logger.trace("field in finalOI: {}", field.getClass().getName());
            }
            logger.trace("partitionSerDe class is {} {}", partitionSerDe.getClass().getName());
        }
        // Get list of partition column names
        final List<String> partitionNames = Lists.newArrayList();
        for (FieldSchema field : table.getPartitionKeys()) {
            partitionNames.add(field.getName());
        }
        // We should always get the columns names from ObjectInspector. For some of the tables (ex. avro) metastore
        // may not contain the schema, instead it is derived from other sources such as table properties or external file.
        // SerDe object knows how to get the schema with all the config and table properties passed in initialization.
        // ObjectInspector created from the SerDe object has the schema.
        final StructTypeInfo sTypeInfo = (StructTypeInfo) TypeInfoUtils.getTypeInfoFromObjectInspector(finalOI);
        final List<String> tableColumnNames = sTypeInfo.getAllStructFieldNames();
        // Select list of columns for project pushdown into Hive SerDe readers.
        final List<Integer> columnIds = Lists.newArrayList();
        if (isStarQuery()) {
            selectedColumnNames = tableColumnNames;
            for (int i = 0; i < selectedColumnNames.size(); i++) {
                columnIds.add(i);
            }
            selectedPartitionNames = partitionNames;
        } else {
            selectedColumnNames = Lists.newArrayList();
            for (SchemaPath field : getColumns()) {
                String columnName = field.getRootSegment().getPath();
                if (partitionNames.contains(columnName)) {
                    selectedPartitionNames.add(columnName);
                } else {
                    columnIds.add(tableColumnNames.indexOf(columnName));
                    selectedColumnNames.add(columnName);
                }
            }
        }
        ColumnProjectionUtils.appendReadColumns(job, columnIds, selectedColumnNames);
        for (String columnName : selectedColumnNames) {
            StructField fieldRef = finalOI.getStructFieldRef(columnName);
            selectedStructFieldRefs.add(fieldRef);
            ObjectInspector fieldOI = fieldRef.getFieldObjectInspector();
            TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(fieldOI.getTypeName());
            selectedColumnObjInspectors.add(fieldOI);
            selectedColumnTypes.add(typeInfo);
            selectedColumnFieldConverters.add(HiveFieldConverter.create(typeInfo, fragmentContext));
        }
        for (int i = 0; i < selectedColumnNames.size(); ++i) {
            logger.trace("inspector:typeName={}, className={}, TypeInfo: {}, converter:{}", selectedColumnObjInspectors.get(i).getTypeName(), selectedColumnObjInspectors.get(i).getClass().getName(), selectedColumnTypes.get(i).toString(), selectedColumnFieldConverters.get(i).getClass().getName());
        }
        for (int i = 0; i < table.getPartitionKeys().size(); i++) {
            FieldSchema field = table.getPartitionKeys().get(i);
            if (selectedPartitionNames.contains(field.getName())) {
                TypeInfo pType = TypeInfoUtils.getTypeInfoFromTypeString(field.getType());
                selectedPartitionTypes.add(pType);
                if (partition != null) {
                    selectedPartitionValues.add(HiveUtilities.convertPartitionType(pType, partition.getValues().get(i), defaultPartitionValue));
                }
            }
        }
    } catch (Exception e) {
        throw new ExecutionSetupException("Failure while initializing Hive Reader " + this.getClass().getName(), e);
    }
    if (!empty) {
        try {
            reader = (org.apache.hadoop.mapred.RecordReader<Object, Object>) job.getInputFormat().getRecordReader(inputSplit, job, Reporter.NULL);
            logger.trace("hive reader created: {} for inputSplit {}", reader.getClass().getName(), inputSplit.toString());
        } catch (Exception e) {
            throw new ExecutionSetupException("Failed to get o.a.hadoop.mapred.RecordReader from Hive InputFormat", e);
        }
        internalInit(tableProperties, reader);
    }
}
Also used : SerDe(org.apache.hadoop.hive.serde2.SerDe) ExecutionSetupException(org.apache.drill.common.exceptions.ExecutionSetupException) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) Properties(java.util.Properties) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) ExecutionSetupException(org.apache.drill.common.exceptions.ExecutionSetupException) ExecutionException(java.util.concurrent.ExecutionException) SchemaChangeException(org.apache.drill.exec.exception.SchemaChangeException) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) SchemaPath(org.apache.drill.common.expression.SchemaPath) JobConf(org.apache.hadoop.mapred.JobConf) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 18 with TypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfo in project presto by prestodb.

the class OrcTester method preprocessWriteValueOld.

private static Object preprocessWriteValueOld(TypeInfo typeInfo, Object value) {
    if (value == null) {
        return null;
    }
    switch(typeInfo.getCategory()) {
        case PRIMITIVE:
            PrimitiveObjectInspector.PrimitiveCategory primitiveCategory = ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory();
            switch(primitiveCategory) {
                case BOOLEAN:
                    return value;
                case BYTE:
                    return ((Number) value).byteValue();
                case SHORT:
                    return ((Number) value).shortValue();
                case INT:
                    return ((Number) value).intValue();
                case LONG:
                    return ((Number) value).longValue();
                case FLOAT:
                    return ((Number) value).floatValue();
                case DOUBLE:
                    return ((Number) value).doubleValue();
                case DECIMAL:
                    return HiveDecimal.create(((SqlDecimal) value).toBigDecimal());
                case STRING:
                    return value;
                case CHAR:
                    return new HiveChar(value.toString(), ((CharTypeInfo) typeInfo).getLength());
                case DATE:
                    int days = ((SqlDate) value).getDays();
                    LocalDate localDate = LocalDate.ofEpochDay(days);
                    ZonedDateTime zonedDateTime = localDate.atStartOfDay(ZoneId.systemDefault());
                    long millis = zonedDateTime.toEpochSecond() * 1000;
                    Date date = new Date(0);
                    // mills must be set separately to avoid masking
                    date.setTime(millis);
                    return date;
                case TIMESTAMP:
                    long millisUtc = (int) ((SqlTimestamp) value).getMillisUtc();
                    return new Timestamp(millisUtc);
                case BINARY:
                    return ((SqlVarbinary) value).getBytes();
            }
            break;
        case MAP:
            MapTypeInfo mapTypeInfo = (MapTypeInfo) typeInfo;
            TypeInfo keyTypeInfo = mapTypeInfo.getMapKeyTypeInfo();
            TypeInfo valueTypeInfo = mapTypeInfo.getMapValueTypeInfo();
            Map<Object, Object> newMap = new HashMap<>();
            for (Entry<?, ?> entry : ((Map<?, ?>) value).entrySet()) {
                newMap.put(preprocessWriteValueOld(keyTypeInfo, entry.getKey()), preprocessWriteValueOld(valueTypeInfo, entry.getValue()));
            }
            return newMap;
        case LIST:
            ListTypeInfo listTypeInfo = (ListTypeInfo) typeInfo;
            TypeInfo elementTypeInfo = listTypeInfo.getListElementTypeInfo();
            List<Object> newList = new ArrayList<>(((Collection<?>) value).size());
            for (Object element : (Iterable<?>) value) {
                newList.add(preprocessWriteValueOld(elementTypeInfo, element));
            }
            return newList;
        case STRUCT:
            StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo;
            List<?> fieldValues = (List<?>) value;
            List<TypeInfo> fieldTypeInfos = structTypeInfo.getAllStructFieldTypeInfos();
            List<Object> newStruct = new ArrayList<>();
            for (int fieldId = 0; fieldId < fieldValues.size(); fieldId++) {
                newStruct.add(preprocessWriteValueOld(fieldTypeInfos.get(fieldId), fieldValues.get(fieldId)));
            }
            return newStruct;
    }
    throw new PrestoException(NOT_SUPPORTED, format("Unsupported Hive type: %s", typeInfo));
}
Also used : HashMap(java.util.HashMap) HiveChar(org.apache.hadoop.hive.common.type.HiveChar) SqlVarbinary(com.facebook.presto.spi.type.SqlVarbinary) ArrayList(java.util.ArrayList) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) PrestoException(com.facebook.presto.spi.PrestoException) LocalDate(java.time.LocalDate) SqlTimestamp(com.facebook.presto.spi.type.SqlTimestamp) Timestamp(java.sql.Timestamp) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) ZonedDateTime(java.time.ZonedDateTime) Arrays.asList(java.util.Arrays.asList) ArrayList(java.util.ArrayList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) CharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo) SqlDate(com.facebook.presto.spi.type.SqlDate) LocalDate(java.time.LocalDate) Date(java.sql.Date) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) SqlDate(com.facebook.presto.spi.type.SqlDate) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap)

Example 19 with TypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfo in project SQLWindowing by hbutani.

the class GenericUDAFNTile method getEvaluator.

@Override
public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) throws SemanticException {
    if (parameters.length != 1) {
        throw new UDFArgumentTypeException(parameters.length - 1, "Exactly one argument is expected.");
    }
    ObjectInspector oi = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(parameters[0]);
    boolean c = ObjectInspectorUtils.compareTypes(oi, PrimitiveObjectInspectorFactory.writableIntObjectInspector);
    if (!c) {
        throw new UDFArgumentTypeException(0, "Number of tiles must be an int expression");
    }
    return new GenericUDAFNTileEvaluator();
}
Also used : PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) UDFArgumentTypeException(org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException)

Example 20 with TypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfo in project SQLWindowing by hbutani.

the class TranslateUtils method createLazyBinarySerDe.

@SuppressWarnings("unchecked")
public static SerDe createLazyBinarySerDe(Configuration cfg, StructObjectInspector oi) throws WindowingException {
    ArrayList<? extends Object>[] tInfo = getTypeMap(oi);
    ArrayList<String> columnNames = (ArrayList<String>) tInfo[0];
    ArrayList<TypeInfo> fields = (ArrayList<TypeInfo>) tInfo[1];
    StringBuilder cNames = new StringBuilder();
    StringBuilder cTypes = new StringBuilder();
    for (int i = 0; i < fields.size(); i++) {
        cNames.append(i > 0 ? "," : "");
        cTypes.append(i > 0 ? "," : "");
        cNames.append(columnNames.get(i));
        cTypes.append(fields.get(i).getTypeName());
    }
    try {
        SerDe serDe = new LazyBinarySerDe();
        Properties p = new Properties();
        p.setProperty(org.apache.hadoop.hive.serde.Constants.LIST_COLUMNS, cNames.toString());
        p.setProperty(org.apache.hadoop.hive.serde.Constants.LIST_COLUMN_TYPES, cTypes.toString());
        serDe.initialize(cfg, p);
        return serDe;
    } catch (SerDeException se) {
        throw new WindowingException(se);
    }
}
Also used : SerDe(org.apache.hadoop.hive.serde2.SerDe) LazyBinarySerDe(org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe) ArrayList(java.util.ArrayList) LazyBinarySerDe(org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe) Properties(java.util.Properties) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) WindowingException(com.sap.hadoop.windowing.WindowingException) SerDeException(org.apache.hadoop.hive.serde2.SerDeException)

Aggregations

TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)516 PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)287 ArrayList (java.util.ArrayList)202 StructTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo)193 DecimalTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo)167 ListTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo)151 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)148 MapTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo)138 Test (org.junit.Test)135 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)107 UnionTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo)78 HashMap (java.util.HashMap)74 PrimitiveCategory (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory)71 CharTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo)69 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)67 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)63 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)61 VarcharTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo)59 List (java.util.List)54 HiveConf (org.apache.hadoop.hive.conf.HiveConf)53