use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils.getTypeInfosFromTypeString in project hive by apache.
the class OrcInputFormat method typeDescriptionsFromHiveTypeProperty.
/**
* Convert a Hive type property string that contains separated type names into a list of
* TypeDescription objects.
* @param hiveTypeProperty the desired types from hive
* @param maxColumns the maximum number of desired columns
* @return the list of TypeDescription objects.
*/
public static ArrayList<TypeDescription> typeDescriptionsFromHiveTypeProperty(String hiveTypeProperty, int maxColumns) {
// CONSDIER: We need a type name parser for TypeDescription.
ArrayList<TypeInfo> typeInfoList = TypeInfoUtils.getTypeInfosFromTypeString(hiveTypeProperty);
ArrayList<TypeDescription> typeDescrList = new ArrayList<TypeDescription>(typeInfoList.size());
for (TypeInfo typeInfo : typeInfoList) {
typeDescrList.add(convertTypeInfo(typeInfo));
if (typeDescrList.size() >= maxColumns) {
break;
}
}
return typeDescrList;
}
use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils.getTypeInfosFromTypeString in project hive by apache.
the class OrcOutputFormat method getOptions.
private OrcFile.WriterOptions getOptions(JobConf conf, Properties props) {
OrcFile.WriterOptions result = OrcFile.writerOptions(props, conf);
if (props != null) {
final String columnNameProperty = props.getProperty(IOConstants.COLUMNS);
final String columnTypeProperty = props.getProperty(IOConstants.COLUMNS_TYPES);
if (columnNameProperty != null && !columnNameProperty.isEmpty() && columnTypeProperty != null && !columnTypeProperty.isEmpty()) {
List<String> columnNames;
List<TypeInfo> columnTypes;
final String columnNameDelimiter = props.containsKey(serdeConstants.COLUMN_NAME_DELIMITER) ? props.getProperty(serdeConstants.COLUMN_NAME_DELIMITER) : String.valueOf(SerDeUtils.COMMA);
if (columnNameProperty.length() == 0) {
columnNames = new ArrayList<String>();
} else {
columnNames = Arrays.asList(columnNameProperty.split(columnNameDelimiter));
}
if (columnTypeProperty.length() == 0) {
columnTypes = new ArrayList<TypeInfo>();
} else {
columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
}
TypeDescription schema = TypeDescription.createStruct();
for (int i = 0; i < columnNames.size(); ++i) {
schema.addField(columnNames.get(i), OrcInputFormat.convertTypeInfo(columnTypes.get(i)));
}
if (LOG.isDebugEnabled()) {
LOG.debug("ORC schema = " + schema);
}
result.setSchema(schema);
}
}
return result;
}
use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils.getTypeInfosFromTypeString in project hive by apache.
the class Utilities method getColumnTypes.
public static List<String> getColumnTypes(Properties props) {
List<String> names = new ArrayList<String>();
String colNames = props.getProperty(serdeConstants.LIST_COLUMN_TYPES);
ArrayList<TypeInfo> cols = TypeInfoUtils.getTypeInfosFromTypeString(colNames);
for (TypeInfo col : cols) {
names.add(col.getTypeName());
}
return names;
}
use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils.getTypeInfosFromTypeString in project hive by apache.
the class OrcSerde method initialize.
@Override
public void initialize(Configuration conf, Properties table) {
// Read the configuration parameters
String columnNameProperty = table.getProperty(serdeConstants.LIST_COLUMNS);
// NOTE: if "columns.types" is missing, all columns will be of String type
String columnTypeProperty = table.getProperty(serdeConstants.LIST_COLUMN_TYPES);
final String columnNameDelimiter = table.containsKey(serdeConstants.COLUMN_NAME_DELIMITER) ? table.getProperty(serdeConstants.COLUMN_NAME_DELIMITER) : String.valueOf(SerDeUtils.COMMA);
String compressType = OrcConf.COMPRESS.getString(table, conf);
// Parse the configuration parameters
ArrayList<String> columnNames = new ArrayList<String>();
if (columnNameProperty != null && columnNameProperty.length() > 0) {
for (String name : columnNameProperty.split(columnNameDelimiter)) {
columnNames.add(name);
}
}
if (columnTypeProperty == null) {
// Default type: all string
StringBuilder sb = new StringBuilder();
for (int i = 0; i < columnNames.size(); i++) {
if (i > 0) {
sb.append(":");
}
sb.append("string");
}
columnTypeProperty = sb.toString();
}
ArrayList<TypeInfo> fieldTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
StructTypeInfo rootType = new StructTypeInfo();
// The source column names for ORC serde that will be used in the schema.
rootType.setAllStructFieldNames(columnNames);
rootType.setAllStructFieldTypeInfos(fieldTypes);
inspector = OrcStruct.createObjectInspector(rootType);
}
use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils.getTypeInfosFromTypeString in project hive by apache.
the class ParquetHiveSerDe method initialize.
@Override
public final void initialize(final Configuration conf, final Properties tbl) throws SerDeException {
final TypeInfo rowTypeInfo;
final List<String> columnNames;
final List<TypeInfo> columnTypes;
// Get column names and sort order
final String columnNameProperty = tbl.getProperty(serdeConstants.LIST_COLUMNS);
final String columnTypeProperty = tbl.getProperty(serdeConstants.LIST_COLUMN_TYPES);
final String columnNameDelimiter = tbl.containsKey(serdeConstants.COLUMN_NAME_DELIMITER) ? tbl.getProperty(serdeConstants.COLUMN_NAME_DELIMITER) : String.valueOf(SerDeUtils.COMMA);
if (columnNameProperty.length() == 0) {
columnNames = new ArrayList<String>();
} else {
columnNames = Arrays.asList(columnNameProperty.split(columnNameDelimiter));
}
if (columnTypeProperty.length() == 0) {
columnTypes = new ArrayList<TypeInfo>();
} else {
columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
}
if (columnNames.size() != columnTypes.size()) {
throw new IllegalArgumentException("ParquetHiveSerde initialization failed. Number of column " + "name and column type differs. columnNames = " + columnNames + ", columnTypes = " + columnTypes);
}
// Create row related objects
StructTypeInfo completeTypeInfo = (StructTypeInfo) TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes);
StructTypeInfo prunedTypeInfo = null;
if (conf != null) {
String rawPrunedColumnPaths = conf.get(ColumnProjectionUtils.READ_NESTED_COLUMN_PATH_CONF_STR);
if (rawPrunedColumnPaths != null) {
List<String> prunedColumnPaths = processRawPrunedPaths(rawPrunedColumnPaths);
prunedTypeInfo = pruneFromPaths(completeTypeInfo, prunedColumnPaths);
}
}
this.objInspector = new ArrayWritableObjectInspector(completeTypeInfo, prunedTypeInfo);
// Stats part
serializedSize = 0;
deserializedSize = 0;
status = LAST_OPERATION.UNKNOWN;
}
Aggregations