Search in sources :

Example 16 with TypeInfoUtils.getTypeInfosFromTypeString

use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils.getTypeInfosFromTypeString in project hive by apache.

the class OrcInputFormat method typeDescriptionsFromHiveTypeProperty.

/**
 * Convert a Hive type property string that contains separated type names into a list of
 * TypeDescription objects.
 * @param hiveTypeProperty the desired types from hive
 * @param maxColumns the maximum number of desired columns
 * @return the list of TypeDescription objects.
 */
public static ArrayList<TypeDescription> typeDescriptionsFromHiveTypeProperty(String hiveTypeProperty, int maxColumns) {
    // CONSDIER: We need a type name parser for TypeDescription.
    ArrayList<TypeInfo> typeInfoList = TypeInfoUtils.getTypeInfosFromTypeString(hiveTypeProperty);
    ArrayList<TypeDescription> typeDescrList = new ArrayList<TypeDescription>(typeInfoList.size());
    for (TypeInfo typeInfo : typeInfoList) {
        typeDescrList.add(convertTypeInfo(typeInfo));
        if (typeDescrList.size() >= maxColumns) {
            break;
        }
    }
    return typeDescrList;
}
Also used : ArrayList(java.util.ArrayList) TypeDescription(org.apache.orc.TypeDescription) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) BaseCharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) UnionTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo)

Example 17 with TypeInfoUtils.getTypeInfosFromTypeString

use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils.getTypeInfosFromTypeString in project hive by apache.

the class OrcOutputFormat method getOptions.

private OrcFile.WriterOptions getOptions(JobConf conf, Properties props) {
    OrcFile.WriterOptions result = OrcFile.writerOptions(props, conf);
    if (props != null) {
        final String columnNameProperty = props.getProperty(IOConstants.COLUMNS);
        final String columnTypeProperty = props.getProperty(IOConstants.COLUMNS_TYPES);
        if (columnNameProperty != null && !columnNameProperty.isEmpty() && columnTypeProperty != null && !columnTypeProperty.isEmpty()) {
            List<String> columnNames;
            List<TypeInfo> columnTypes;
            final String columnNameDelimiter = props.containsKey(serdeConstants.COLUMN_NAME_DELIMITER) ? props.getProperty(serdeConstants.COLUMN_NAME_DELIMITER) : String.valueOf(SerDeUtils.COMMA);
            if (columnNameProperty.length() == 0) {
                columnNames = new ArrayList<String>();
            } else {
                columnNames = Arrays.asList(columnNameProperty.split(columnNameDelimiter));
            }
            if (columnTypeProperty.length() == 0) {
                columnTypes = new ArrayList<TypeInfo>();
            } else {
                columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
            }
            TypeDescription schema = TypeDescription.createStruct();
            for (int i = 0; i < columnNames.size(); ++i) {
                schema.addField(columnNames.get(i), OrcInputFormat.convertTypeInfo(columnTypes.get(i)));
            }
            if (LOG.isDebugEnabled()) {
                LOG.debug("ORC schema = " + schema);
            }
            result.setSchema(schema);
        }
    }
    return result;
}
Also used : TypeDescription(org.apache.orc.TypeDescription) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)

Example 18 with TypeInfoUtils.getTypeInfosFromTypeString

use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils.getTypeInfosFromTypeString in project hive by apache.

the class Utilities method getColumnTypes.

public static List<String> getColumnTypes(Properties props) {
    List<String> names = new ArrayList<String>();
    String colNames = props.getProperty(serdeConstants.LIST_COLUMN_TYPES);
    ArrayList<TypeInfo> cols = TypeInfoUtils.getTypeInfosFromTypeString(colNames);
    for (TypeInfo col : cols) {
        names.add(col.getTypeName());
    }
    return names;
}
Also used : ArrayList(java.util.ArrayList) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)

Example 19 with TypeInfoUtils.getTypeInfosFromTypeString

use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils.getTypeInfosFromTypeString in project hive by apache.

the class OrcSerde method initialize.

@Override
public void initialize(Configuration conf, Properties table) {
    // Read the configuration parameters
    String columnNameProperty = table.getProperty(serdeConstants.LIST_COLUMNS);
    // NOTE: if "columns.types" is missing, all columns will be of String type
    String columnTypeProperty = table.getProperty(serdeConstants.LIST_COLUMN_TYPES);
    final String columnNameDelimiter = table.containsKey(serdeConstants.COLUMN_NAME_DELIMITER) ? table.getProperty(serdeConstants.COLUMN_NAME_DELIMITER) : String.valueOf(SerDeUtils.COMMA);
    String compressType = OrcConf.COMPRESS.getString(table, conf);
    // Parse the configuration parameters
    ArrayList<String> columnNames = new ArrayList<String>();
    if (columnNameProperty != null && columnNameProperty.length() > 0) {
        for (String name : columnNameProperty.split(columnNameDelimiter)) {
            columnNames.add(name);
        }
    }
    if (columnTypeProperty == null) {
        // Default type: all string
        StringBuilder sb = new StringBuilder();
        for (int i = 0; i < columnNames.size(); i++) {
            if (i > 0) {
                sb.append(":");
            }
            sb.append("string");
        }
        columnTypeProperty = sb.toString();
    }
    ArrayList<TypeInfo> fieldTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
    StructTypeInfo rootType = new StructTypeInfo();
    // The source column names for ORC serde that will be used in the schema.
    rootType.setAllStructFieldNames(columnNames);
    rootType.setAllStructFieldTypeInfos(fieldTypes);
    inspector = OrcStruct.createObjectInspector(rootType);
}
Also used : ArrayList(java.util.ArrayList) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo)

Example 20 with TypeInfoUtils.getTypeInfosFromTypeString

use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils.getTypeInfosFromTypeString in project hive by apache.

the class ParquetHiveSerDe method initialize.

@Override
public final void initialize(final Configuration conf, final Properties tbl) throws SerDeException {
    final TypeInfo rowTypeInfo;
    final List<String> columnNames;
    final List<TypeInfo> columnTypes;
    // Get column names and sort order
    final String columnNameProperty = tbl.getProperty(serdeConstants.LIST_COLUMNS);
    final String columnTypeProperty = tbl.getProperty(serdeConstants.LIST_COLUMN_TYPES);
    final String columnNameDelimiter = tbl.containsKey(serdeConstants.COLUMN_NAME_DELIMITER) ? tbl.getProperty(serdeConstants.COLUMN_NAME_DELIMITER) : String.valueOf(SerDeUtils.COMMA);
    if (columnNameProperty.length() == 0) {
        columnNames = new ArrayList<String>();
    } else {
        columnNames = Arrays.asList(columnNameProperty.split(columnNameDelimiter));
    }
    if (columnTypeProperty.length() == 0) {
        columnTypes = new ArrayList<TypeInfo>();
    } else {
        columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
    }
    if (columnNames.size() != columnTypes.size()) {
        throw new IllegalArgumentException("ParquetHiveSerde initialization failed. Number of column " + "name and column type differs. columnNames = " + columnNames + ", columnTypes = " + columnTypes);
    }
    // Create row related objects
    StructTypeInfo completeTypeInfo = (StructTypeInfo) TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes);
    StructTypeInfo prunedTypeInfo = null;
    if (conf != null) {
        String rawPrunedColumnPaths = conf.get(ColumnProjectionUtils.READ_NESTED_COLUMN_PATH_CONF_STR);
        if (rawPrunedColumnPaths != null) {
            List<String> prunedColumnPaths = processRawPrunedPaths(rawPrunedColumnPaths);
            prunedTypeInfo = pruneFromPaths(completeTypeInfo, prunedColumnPaths);
        }
    }
    this.objInspector = new ArrayWritableObjectInspector(completeTypeInfo, prunedTypeInfo);
    // Stats part
    serializedSize = 0;
    deserializedSize = 0;
    status = LAST_OPERATION.UNKNOWN;
}
Also used : StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)

Aggregations

TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)23 ArrayList (java.util.ArrayList)18 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)17 Text (org.apache.hadoop.io.Text)14 LazyMapObjectInspector (org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyMapObjectInspector)11 SerDeException (org.apache.hadoop.hive.serde2.SerDeException)9 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)9 Cell (org.apache.hadoop.hbase.Cell)7 KeyValue (org.apache.hadoop.hbase.KeyValue)7 Result (org.apache.hadoop.hbase.client.Result)7 StructTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo)7 IntWritable (org.apache.hadoop.io.IntWritable)7 PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)6 LazySimpleStructObjectInspector (org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector)5 Test (org.junit.Test)5 HiveAccumuloMapColumnMapping (org.apache.hadoop.hive.accumulo.columns.HiveAccumuloMapColumnMapping)4 IOException (java.io.IOException)3 ColumnMapping (org.apache.hadoop.hive.hbase.ColumnMappings.ColumnMapping)2 ByteWritable (org.apache.hadoop.hive.serde2.io.ByteWritable)2 DoubleWritable (org.apache.hadoop.hive.serde2.io.DoubleWritable)2