Search in sources :

Example 11 with TypeInfoFactory.getStructTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.getStructTypeInfo in project hive by apache.

the class ThriftJDBCBinarySerDe method initialize.

@Override
public void initialize(Configuration conf, Properties tbl) throws SerDeException {
    // Get column names
    MAX_BUFFERED_ROWS = HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVE_SERVER2_THRIFT_RESULTSET_DEFAULT_FETCH_SIZE);
    LOG.info("ThriftJDBCBinarySerDe max number of buffered columns: " + MAX_BUFFERED_ROWS);
    String columnNameProperty = tbl.getProperty(serdeConstants.LIST_COLUMNS);
    String columnTypeProperty = tbl.getProperty(serdeConstants.LIST_COLUMN_TYPES);
    final String columnNameDelimiter = tbl.containsKey(serdeConstants.COLUMN_NAME_DELIMITER) ? tbl.getProperty(serdeConstants.COLUMN_NAME_DELIMITER) : String.valueOf(SerDeUtils.COMMA);
    if (columnNameProperty.length() == 0) {
        columnNames = new ArrayList<String>();
    } else {
        columnNames = Arrays.asList(columnNameProperty.split(columnNameDelimiter));
    }
    if (columnTypeProperty.length() == 0) {
        columnTypes = new ArrayList<TypeInfo>();
    } else {
        columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
    }
    rowTypeInfo = TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes);
    rowObjectInspector = (StructObjectInspector) TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(rowTypeInfo);
    initializeRowAndColumns();
    try {
        thriftFormatter.initialize(conf, tbl);
    } catch (Exception e) {
        new SerDeException(e);
    }
}
Also used : TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) TException(org.apache.thrift.TException) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) SerDeException(org.apache.hadoop.hive.serde2.SerDeException)

Example 12 with TypeInfoFactory.getStructTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.getStructTypeInfo in project hive by apache.

the class TypeInfoUtils method getExtendedTypeInfoFromJavaType.

/**
   * Return the extended TypeInfo from a Java type. By extended TypeInfo, we
   * allow unknownType for java.lang.Object.
   *
   * @param t
   *          The Java type.
   * @param m
   *          The method, only used for generating error messages.
   */
private static TypeInfo getExtendedTypeInfoFromJavaType(Type t, Method m) {
    if (t == Object.class) {
        return TypeInfoFactory.unknownTypeInfo;
    }
    if (t instanceof ParameterizedType) {
        ParameterizedType pt = (ParameterizedType) t;
        // List?
        if (List.class == (Class<?>) pt.getRawType() || ArrayList.class == (Class<?>) pt.getRawType()) {
            return TypeInfoFactory.getListTypeInfo(getExtendedTypeInfoFromJavaType(pt.getActualTypeArguments()[0], m));
        }
        // Map?
        if (Map.class == (Class<?>) pt.getRawType() || HashMap.class == (Class<?>) pt.getRawType()) {
            return TypeInfoFactory.getMapTypeInfo(getExtendedTypeInfoFromJavaType(pt.getActualTypeArguments()[0], m), getExtendedTypeInfoFromJavaType(pt.getActualTypeArguments()[1], m));
        }
        // Otherwise convert t to RawType so we will fall into the following if
        // block.
        t = pt.getRawType();
    }
    // Must be a class.
    if (!(t instanceof Class)) {
        throw new RuntimeException("Hive does not understand type " + t + " from " + m);
    }
    Class<?> c = (Class<?>) t;
    // Java Primitive Type?
    if (PrimitiveObjectInspectorUtils.isPrimitiveJavaType(c)) {
        return TypeInfoUtils.getTypeInfoFromObjectInspector(PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(PrimitiveObjectInspectorUtils.getTypeEntryFromPrimitiveJavaType(c).primitiveCategory));
    }
    // Java Primitive Class?
    if (PrimitiveObjectInspectorUtils.isPrimitiveJavaClass(c)) {
        return TypeInfoUtils.getTypeInfoFromObjectInspector(PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(PrimitiveObjectInspectorUtils.getTypeEntryFromPrimitiveJavaClass(c).primitiveCategory));
    }
    // Primitive Writable class?
    if (PrimitiveObjectInspectorUtils.isPrimitiveWritableClass(c)) {
        return TypeInfoUtils.getTypeInfoFromObjectInspector(PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(PrimitiveObjectInspectorUtils.getTypeEntryFromPrimitiveWritableClass(c).primitiveCategory));
    }
    // Must be a struct
    Field[] fields = ObjectInspectorUtils.getDeclaredNonStaticFields(c);
    ArrayList<String> fieldNames = new ArrayList<String>(fields.length);
    ArrayList<TypeInfo> fieldTypeInfos = new ArrayList<TypeInfo>(fields.length);
    for (Field field : fields) {
        fieldNames.add(field.getName());
        fieldTypeInfos.add(getExtendedTypeInfoFromJavaType(field.getGenericType(), m));
    }
    return TypeInfoFactory.getStructTypeInfo(fieldNames, fieldTypeInfos);
}
Also used : HashMap(java.util.HashMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) ArrayList(java.util.ArrayList) ParameterizedType(java.lang.reflect.ParameterizedType) Field(java.lang.reflect.Field) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) ArrayList(java.util.ArrayList) LinkedList(java.util.LinkedList) List(java.util.List) HashMap(java.util.HashMap) Map(java.util.Map) EnumMap(java.util.EnumMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap)

Example 13 with TypeInfoFactory.getStructTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.getStructTypeInfo in project hive by apache.

the class DataWritableReadSupport method init.

/**
   * It creates the readContext for Parquet side with the requested schema during the init phase.
   *
   * @param context
   * @return the parquet ReadContext
   */
@Override
public org.apache.parquet.hadoop.api.ReadSupport.ReadContext init(InitContext context) {
    Configuration configuration = context.getConfiguration();
    MessageType fileSchema = context.getFileSchema();
    String columnNames = configuration.get(IOConstants.COLUMNS);
    Map<String, String> contextMetadata = new HashMap<String, String>();
    boolean indexAccess = configuration.getBoolean(PARQUET_COLUMN_INDEX_ACCESS, false);
    // Adds the PARQUET_INT96_WRITE_ZONE_PROPERTY value to the metadata object so that it passes the timezone
    // to the Parquet readers. PARQUET_INT96_WRITE_ZONE_PROPERTY is set on ParquetRecordReaderWrapper.
    contextMetadata.put(ParquetTableUtils.PARQUET_INT96_WRITE_ZONE_PROPERTY, configuration.get(ParquetTableUtils.PARQUET_INT96_WRITE_ZONE_PROPERTY));
    if (columnNames != null) {
        List<String> columnNamesList = getColumnNames(columnNames);
        String columnTypes = configuration.get(IOConstants.COLUMNS_TYPES);
        List<TypeInfo> columnTypesList = getColumnTypes(columnTypes);
        MessageType tableSchema;
        if (indexAccess) {
            List<Integer> indexSequence = new ArrayList<Integer>();
            // Generates a sequence list of indexes
            for (int i = 0; i < columnNamesList.size(); i++) {
                indexSequence.add(i);
            }
            tableSchema = getSchemaByIndex(fileSchema, columnNamesList, indexSequence);
        } else {
            tableSchema = getSchemaByName(fileSchema, columnNamesList, columnTypesList);
        }
        contextMetadata.put(HIVE_TABLE_AS_PARQUET_SCHEMA, tableSchema.toString());
        contextMetadata.put(PARQUET_COLUMN_INDEX_ACCESS, String.valueOf(indexAccess));
        this.hiveTypeInfo = TypeInfoFactory.getStructTypeInfo(columnNamesList, columnTypesList);
        Set<String> groupPaths = ColumnProjectionUtils.getNestedColumnPaths(configuration);
        List<Integer> indexColumnsWanted = ColumnProjectionUtils.getReadColumnIDs(configuration);
        if (!ColumnProjectionUtils.isReadAllColumns(configuration) && !indexColumnsWanted.isEmpty()) {
            MessageType requestedSchemaByUser = getProjectedSchema(tableSchema, columnNamesList, indexColumnsWanted, groupPaths);
            return new ReadContext(requestedSchemaByUser, contextMetadata);
        } else {
            return new ReadContext(tableSchema, contextMetadata);
        }
    } else {
        contextMetadata.put(HIVE_TABLE_AS_PARQUET_SCHEMA, fileSchema.toString());
        return new ReadContext(fileSchema, contextMetadata);
    }
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) MessageType(org.apache.parquet.schema.MessageType)

Example 14 with TypeInfoFactory.getStructTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.getStructTypeInfo in project hive by apache.

the class ParquetHiveSerDe method initialize.

@Override
public final void initialize(final Configuration conf, final Properties tbl) throws SerDeException {
    final TypeInfo rowTypeInfo;
    final List<String> columnNames;
    final List<TypeInfo> columnTypes;
    // Get column names and sort order
    final String columnNameProperty = tbl.getProperty(serdeConstants.LIST_COLUMNS);
    final String columnTypeProperty = tbl.getProperty(serdeConstants.LIST_COLUMN_TYPES);
    final String columnNameDelimiter = tbl.containsKey(serdeConstants.COLUMN_NAME_DELIMITER) ? tbl.getProperty(serdeConstants.COLUMN_NAME_DELIMITER) : String.valueOf(SerDeUtils.COMMA);
    if (columnNameProperty.length() == 0) {
        columnNames = new ArrayList<String>();
    } else {
        columnNames = Arrays.asList(columnNameProperty.split(columnNameDelimiter));
    }
    if (columnTypeProperty.length() == 0) {
        columnTypes = new ArrayList<TypeInfo>();
    } else {
        columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
    }
    if (columnNames.size() != columnTypes.size()) {
        throw new IllegalArgumentException("ParquetHiveSerde initialization failed. Number of column " + "name and column type differs. columnNames = " + columnNames + ", columnTypes = " + columnTypes);
    }
    // Create row related objects
    StructTypeInfo completeTypeInfo = (StructTypeInfo) TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes);
    StructTypeInfo prunedTypeInfo = null;
    if (conf != null) {
        String rawPrunedColumnPaths = conf.get(ColumnProjectionUtils.READ_NESTED_COLUMN_PATH_CONF_STR);
        if (rawPrunedColumnPaths != null) {
            List<String> prunedColumnPaths = processRawPrunedPaths(rawPrunedColumnPaths);
            prunedTypeInfo = pruneFromPaths(completeTypeInfo, prunedColumnPaths);
        }
    }
    this.objInspector = new ArrayWritableObjectInspector(completeTypeInfo, prunedTypeInfo);
    // Stats part
    serializedSize = 0;
    deserializedSize = 0;
    status = LAST_OPERATION.UNKNOWN;
}
Also used : StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)

Example 15 with TypeInfoFactory.getStructTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.getStructTypeInfo in project hive by apache.

the class ColumnarStorageBench method createStructObjectInspector.

private StructObjectInspector createStructObjectInspector(Configuration conf) {
    // Create row related objects
    String columnNames = conf.get(IOConstants.COLUMNS);
    List<String> columnNamesList = DataWritableReadSupport.getColumnNames(columnNames);
    String columnTypes = conf.get(IOConstants.COLUMNS_TYPES);
    List<TypeInfo> columnTypesList = DataWritableReadSupport.getColumnTypes(columnTypes);
    TypeInfo rowTypeInfo = TypeInfoFactory.getStructTypeInfo(columnNamesList, columnTypesList);
    return new ArrayWritableObjectInspector((StructTypeInfo) rowTypeInfo);
}
Also used : ArrayWritableObjectInspector(org.apache.hadoop.hive.ql.io.parquet.serde.ArrayWritableObjectInspector) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)

Aggregations

TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)16 StructTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo)11 ArrayList (java.util.ArrayList)7 ArrayWritableObjectInspector (org.apache.hadoop.hive.ql.io.parquet.serde.ArrayWritableObjectInspector)5 HashMap (java.util.HashMap)4 SerDeException (org.apache.hadoop.hive.serde2.SerDeException)3 ListTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo)3 PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)3 Map (java.util.Map)2 Configuration (org.apache.hadoop.conf.Configuration)2 MapTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo)2 BSONWritable (com.mongodb.hadoop.io.BSONWritable)1 IOException (java.io.IOException)1 Field (java.lang.reflect.Field)1 ParameterizedType (java.lang.reflect.ParameterizedType)1 EnumMap (java.util.EnumMap)1 LinkedList (java.util.LinkedList)1 List (java.util.List)1 Properties (java.util.Properties)1 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)1