use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.getStructTypeInfo in project hive by apache.
the class ThriftJDBCBinarySerDe method initialize.
@Override
public void initialize(Configuration conf, Properties tbl) throws SerDeException {
// Get column names
MAX_BUFFERED_ROWS = HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVE_SERVER2_THRIFT_RESULTSET_DEFAULT_FETCH_SIZE);
LOG.info("ThriftJDBCBinarySerDe max number of buffered columns: " + MAX_BUFFERED_ROWS);
String columnNameProperty = tbl.getProperty(serdeConstants.LIST_COLUMNS);
String columnTypeProperty = tbl.getProperty(serdeConstants.LIST_COLUMN_TYPES);
final String columnNameDelimiter = tbl.containsKey(serdeConstants.COLUMN_NAME_DELIMITER) ? tbl.getProperty(serdeConstants.COLUMN_NAME_DELIMITER) : String.valueOf(SerDeUtils.COMMA);
if (columnNameProperty.length() == 0) {
columnNames = new ArrayList<String>();
} else {
columnNames = Arrays.asList(columnNameProperty.split(columnNameDelimiter));
}
if (columnTypeProperty.length() == 0) {
columnTypes = new ArrayList<TypeInfo>();
} else {
columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
}
rowTypeInfo = TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes);
rowObjectInspector = (StructObjectInspector) TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(rowTypeInfo);
initializeRowAndColumns();
try {
thriftFormatter.initialize(conf, tbl);
} catch (Exception e) {
new SerDeException(e);
}
}
use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.getStructTypeInfo in project hive by apache.
the class TypeInfoUtils method getExtendedTypeInfoFromJavaType.
/**
* Return the extended TypeInfo from a Java type. By extended TypeInfo, we
* allow unknownType for java.lang.Object.
*
* @param t
* The Java type.
* @param m
* The method, only used for generating error messages.
*/
private static TypeInfo getExtendedTypeInfoFromJavaType(Type t, Method m) {
if (t == Object.class) {
return TypeInfoFactory.unknownTypeInfo;
}
if (t instanceof ParameterizedType) {
ParameterizedType pt = (ParameterizedType) t;
// List?
if (List.class == (Class<?>) pt.getRawType() || ArrayList.class == (Class<?>) pt.getRawType()) {
return TypeInfoFactory.getListTypeInfo(getExtendedTypeInfoFromJavaType(pt.getActualTypeArguments()[0], m));
}
// Map?
if (Map.class == (Class<?>) pt.getRawType() || HashMap.class == (Class<?>) pt.getRawType()) {
return TypeInfoFactory.getMapTypeInfo(getExtendedTypeInfoFromJavaType(pt.getActualTypeArguments()[0], m), getExtendedTypeInfoFromJavaType(pt.getActualTypeArguments()[1], m));
}
// Otherwise convert t to RawType so we will fall into the following if
// block.
t = pt.getRawType();
}
// Must be a class.
if (!(t instanceof Class)) {
throw new RuntimeException("Hive does not understand type " + t + " from " + m);
}
Class<?> c = (Class<?>) t;
// Java Primitive Type?
if (PrimitiveObjectInspectorUtils.isPrimitiveJavaType(c)) {
return TypeInfoUtils.getTypeInfoFromObjectInspector(PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(PrimitiveObjectInspectorUtils.getTypeEntryFromPrimitiveJavaType(c).primitiveCategory));
}
// Java Primitive Class?
if (PrimitiveObjectInspectorUtils.isPrimitiveJavaClass(c)) {
return TypeInfoUtils.getTypeInfoFromObjectInspector(PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(PrimitiveObjectInspectorUtils.getTypeEntryFromPrimitiveJavaClass(c).primitiveCategory));
}
// Primitive Writable class?
if (PrimitiveObjectInspectorUtils.isPrimitiveWritableClass(c)) {
return TypeInfoUtils.getTypeInfoFromObjectInspector(PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(PrimitiveObjectInspectorUtils.getTypeEntryFromPrimitiveWritableClass(c).primitiveCategory));
}
// Must be a struct
Field[] fields = ObjectInspectorUtils.getDeclaredNonStaticFields(c);
ArrayList<String> fieldNames = new ArrayList<String>(fields.length);
ArrayList<TypeInfo> fieldTypeInfos = new ArrayList<TypeInfo>(fields.length);
for (Field field : fields) {
fieldNames.add(field.getName());
fieldTypeInfos.add(getExtendedTypeInfoFromJavaType(field.getGenericType(), m));
}
return TypeInfoFactory.getStructTypeInfo(fieldNames, fieldTypeInfos);
}
use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.getStructTypeInfo in project hive by apache.
the class DataWritableReadSupport method init.
/**
* It creates the readContext for Parquet side with the requested schema during the init phase.
*
* @param context
* @return the parquet ReadContext
*/
@Override
public org.apache.parquet.hadoop.api.ReadSupport.ReadContext init(InitContext context) {
Configuration configuration = context.getConfiguration();
MessageType fileSchema = context.getFileSchema();
String columnNames = configuration.get(IOConstants.COLUMNS);
Map<String, String> contextMetadata = new HashMap<String, String>();
boolean indexAccess = configuration.getBoolean(PARQUET_COLUMN_INDEX_ACCESS, false);
// Adds the PARQUET_INT96_WRITE_ZONE_PROPERTY value to the metadata object so that it passes the timezone
// to the Parquet readers. PARQUET_INT96_WRITE_ZONE_PROPERTY is set on ParquetRecordReaderWrapper.
contextMetadata.put(ParquetTableUtils.PARQUET_INT96_WRITE_ZONE_PROPERTY, configuration.get(ParquetTableUtils.PARQUET_INT96_WRITE_ZONE_PROPERTY));
if (columnNames != null) {
List<String> columnNamesList = getColumnNames(columnNames);
String columnTypes = configuration.get(IOConstants.COLUMNS_TYPES);
List<TypeInfo> columnTypesList = getColumnTypes(columnTypes);
MessageType tableSchema;
if (indexAccess) {
List<Integer> indexSequence = new ArrayList<Integer>();
// Generates a sequence list of indexes
for (int i = 0; i < columnNamesList.size(); i++) {
indexSequence.add(i);
}
tableSchema = getSchemaByIndex(fileSchema, columnNamesList, indexSequence);
} else {
tableSchema = getSchemaByName(fileSchema, columnNamesList, columnTypesList);
}
contextMetadata.put(HIVE_TABLE_AS_PARQUET_SCHEMA, tableSchema.toString());
contextMetadata.put(PARQUET_COLUMN_INDEX_ACCESS, String.valueOf(indexAccess));
this.hiveTypeInfo = TypeInfoFactory.getStructTypeInfo(columnNamesList, columnTypesList);
Set<String> groupPaths = ColumnProjectionUtils.getNestedColumnPaths(configuration);
List<Integer> indexColumnsWanted = ColumnProjectionUtils.getReadColumnIDs(configuration);
if (!ColumnProjectionUtils.isReadAllColumns(configuration) && !indexColumnsWanted.isEmpty()) {
MessageType requestedSchemaByUser = getProjectedSchema(tableSchema, columnNamesList, indexColumnsWanted, groupPaths);
return new ReadContext(requestedSchemaByUser, contextMetadata);
} else {
return new ReadContext(tableSchema, contextMetadata);
}
} else {
contextMetadata.put(HIVE_TABLE_AS_PARQUET_SCHEMA, fileSchema.toString());
return new ReadContext(fileSchema, contextMetadata);
}
}
use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.getStructTypeInfo in project hive by apache.
the class ParquetHiveSerDe method initialize.
@Override
public final void initialize(final Configuration conf, final Properties tbl) throws SerDeException {
final TypeInfo rowTypeInfo;
final List<String> columnNames;
final List<TypeInfo> columnTypes;
// Get column names and sort order
final String columnNameProperty = tbl.getProperty(serdeConstants.LIST_COLUMNS);
final String columnTypeProperty = tbl.getProperty(serdeConstants.LIST_COLUMN_TYPES);
final String columnNameDelimiter = tbl.containsKey(serdeConstants.COLUMN_NAME_DELIMITER) ? tbl.getProperty(serdeConstants.COLUMN_NAME_DELIMITER) : String.valueOf(SerDeUtils.COMMA);
if (columnNameProperty.length() == 0) {
columnNames = new ArrayList<String>();
} else {
columnNames = Arrays.asList(columnNameProperty.split(columnNameDelimiter));
}
if (columnTypeProperty.length() == 0) {
columnTypes = new ArrayList<TypeInfo>();
} else {
columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
}
if (columnNames.size() != columnTypes.size()) {
throw new IllegalArgumentException("ParquetHiveSerde initialization failed. Number of column " + "name and column type differs. columnNames = " + columnNames + ", columnTypes = " + columnTypes);
}
// Create row related objects
StructTypeInfo completeTypeInfo = (StructTypeInfo) TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes);
StructTypeInfo prunedTypeInfo = null;
if (conf != null) {
String rawPrunedColumnPaths = conf.get(ColumnProjectionUtils.READ_NESTED_COLUMN_PATH_CONF_STR);
if (rawPrunedColumnPaths != null) {
List<String> prunedColumnPaths = processRawPrunedPaths(rawPrunedColumnPaths);
prunedTypeInfo = pruneFromPaths(completeTypeInfo, prunedColumnPaths);
}
}
this.objInspector = new ArrayWritableObjectInspector(completeTypeInfo, prunedTypeInfo);
// Stats part
serializedSize = 0;
deserializedSize = 0;
status = LAST_OPERATION.UNKNOWN;
}
use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.getStructTypeInfo in project hive by apache.
the class ColumnarStorageBench method createStructObjectInspector.
private StructObjectInspector createStructObjectInspector(Configuration conf) {
// Create row related objects
String columnNames = conf.get(IOConstants.COLUMNS);
List<String> columnNamesList = DataWritableReadSupport.getColumnNames(columnNames);
String columnTypes = conf.get(IOConstants.COLUMNS_TYPES);
List<TypeInfo> columnTypesList = DataWritableReadSupport.getColumnTypes(columnTypes);
TypeInfo rowTypeInfo = TypeInfoFactory.getStructTypeInfo(columnNamesList, columnTypesList);
return new ArrayWritableObjectInspector((StructTypeInfo) rowTypeInfo);
}
Aggregations