use of org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo in project parquet-mr by apache.
the class ParquetHiveSerDe method initialize.
@Override
public final void initialize(final Configuration conf, final Properties tbl) throws SerDeException {
final TypeInfo rowTypeInfo;
final List<String> columnNames;
final List<TypeInfo> columnTypes;
// Get column names and sort order
final String columnNameProperty = tbl.getProperty(IOConstants.COLUMNS);
final String columnTypeProperty = tbl.getProperty(IOConstants.COLUMNS_TYPES);
if (columnNameProperty.length() == 0) {
columnNames = new ArrayList<String>();
} else {
columnNames = Arrays.asList(columnNameProperty.split(","));
}
if (columnTypeProperty.length() == 0) {
columnTypes = new ArrayList<TypeInfo>();
} else {
columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
}
if (columnNames.size() != columnTypes.size()) {
throw new IllegalArgumentException("ParquetHiveSerde initialization failed. Number of column " + "name and column type differs. columnNames = " + columnNames + ", columnTypes = " + columnTypes);
}
// Create row related objects
rowTypeInfo = TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes);
this.objInspector = new ArrayWritableObjectInspector((StructTypeInfo) rowTypeInfo);
// Stats part
stats = new SerDeStats();
serializedSize = 0;
deserializedSize = 0;
status = LAST_OPERATION.UNKNOWN;
}
use of org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo in project mongo-hadoop by mongodb.
the class BSONSerDe method initialize.
/**
* Finds out the information of the table, including the column names and types.
*/
@SuppressWarnings("unchecked")
@Override
public void initialize(final Configuration conf, final Properties tblProps) throws SerDeException {
// regex used to split column names between commas
String splitCols = "\\s*,\\s*";
// Get the table column names
String colNamesStr = tblProps.getProperty(serdeConstants.LIST_COLUMNS);
columnNames = Arrays.asList(colNamesStr.split(splitCols));
// Get mappings specified by the user
if (tblProps.containsKey(MONGO_COLS)) {
String mongoFieldsStr = tblProps.getProperty(MONGO_COLS);
Map<String, String> rules = ((BasicBSONObject) JSON.parse(mongoFieldsStr)).toMap();
// register the hive field mappings to mongo field mappings
hiveToMongo = new HashMap<String, String>();
registerMappings(rules);
}
// Get the table column types
String colTypesStr = tblProps.getProperty(serdeConstants.LIST_COLUMN_TYPES);
columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(colTypesStr);
if (columnNames.size() != columnTypes.size()) {
throw new SerDeException("Column Names and Types don't match in size");
}
// Get the structure and object inspector
docTypeInfo = (StructTypeInfo) TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes);
docOI = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(docTypeInfo);
// Create the BSONWritable instance for future use.
bsonWritable = new BSONWritable();
}
use of org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo in project presto by prestodb.
the class HiveType method getTypeSignature.
private static TypeSignature getTypeSignature(TypeInfo typeInfo) {
switch(typeInfo.getCategory()) {
case PRIMITIVE:
Type primitiveType = getPrimitiveType((PrimitiveTypeInfo) typeInfo);
if (primitiveType == null) {
break;
}
return primitiveType.getTypeSignature();
case MAP:
MapTypeInfo mapTypeInfo = (MapTypeInfo) typeInfo;
TypeSignature keyType = getTypeSignature(mapTypeInfo.getMapKeyTypeInfo());
TypeSignature valueType = getTypeSignature(mapTypeInfo.getMapValueTypeInfo());
return new TypeSignature(StandardTypes.MAP, ImmutableList.of(TypeSignatureParameter.of(keyType), TypeSignatureParameter.of(valueType)));
case LIST:
ListTypeInfo listTypeInfo = (ListTypeInfo) typeInfo;
TypeSignature elementType = getTypeSignature(listTypeInfo.getListElementTypeInfo());
return new TypeSignature(StandardTypes.ARRAY, ImmutableList.of(TypeSignatureParameter.of(elementType)));
case STRUCT:
StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo;
List<TypeInfo> structFieldTypeInfos = structTypeInfo.getAllStructFieldTypeInfos();
List<String> structFieldNames = structTypeInfo.getAllStructFieldNames();
if (structFieldTypeInfos.size() != structFieldNames.size()) {
throw new PrestoException(HiveErrorCode.HIVE_INVALID_METADATA, format("Invalid Hive struct type: %s", typeInfo));
}
ImmutableList.Builder<TypeSignatureParameter> typeSignatureBuilder = ImmutableList.builder();
for (int i = 0; i < structFieldTypeInfos.size(); i++) {
TypeSignature typeSignature = getTypeSignature(structFieldTypeInfos.get(i));
// Lower case the struct field names.
// Otherwise, Presto will refuse to write to columns whose struct type has field names containing upper case characters.
// Users can't work around this by casting in their queries because Presto parser always lower case types.
// TODO: This is a hack. Presto engine should be able to handle identifiers in a case insensitive way where necessary.
String rowFieldName = structFieldNames.get(i).toLowerCase(Locale.US);
typeSignatureBuilder.add(TypeSignatureParameter.of(new NamedTypeSignature(Optional.of(new RowFieldName(rowFieldName, false)), typeSignature)));
}
return new TypeSignature(StandardTypes.ROW, typeSignatureBuilder.build());
}
throw new PrestoException(NOT_SUPPORTED, format("Unsupported Hive type: %s", typeInfo));
}
use of org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo in project flink by apache.
the class HiveParserUtils method toRelDataType.
// converts a hive TypeInfo to RelDataType
public static RelDataType toRelDataType(TypeInfo typeInfo, RelDataTypeFactory relTypeFactory) throws SemanticException {
RelDataType res;
switch(typeInfo.getCategory()) {
case PRIMITIVE:
// hive sets NULLABLE for all primitive types, revert that
res = HiveParserTypeConverter.convert(typeInfo, relTypeFactory);
return relTypeFactory.createTypeWithNullability(res, false);
case LIST:
RelDataType elementType = toRelDataType(((ListTypeInfo) typeInfo).getListElementTypeInfo(), relTypeFactory);
return relTypeFactory.createArrayType(elementType, -1);
case MAP:
RelDataType keyType = toRelDataType(((MapTypeInfo) typeInfo).getMapKeyTypeInfo(), relTypeFactory);
RelDataType valType = toRelDataType(((MapTypeInfo) typeInfo).getMapValueTypeInfo(), relTypeFactory);
return relTypeFactory.createMapType(keyType, valType);
case STRUCT:
List<TypeInfo> types = ((StructTypeInfo) typeInfo).getAllStructFieldTypeInfos();
List<RelDataType> convertedTypes = new ArrayList<>(types.size());
for (TypeInfo type : types) {
convertedTypes.add(toRelDataType(type, relTypeFactory));
}
return relTypeFactory.createStructType(convertedTypes, ((StructTypeInfo) typeInfo).getAllStructFieldNames());
case UNION:
default:
throw new SemanticException(String.format("%s type is not supported yet", typeInfo.getCategory().name()));
}
}
use of org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo in project haivvreo by jghoman.
the class AvroObjectInspectorGenerator method createObjectInspectorWorker.
private ObjectInspector createObjectInspectorWorker(TypeInfo ti) throws SerDeException {
// at deserialization and the object inspector will never see the actual union.
if (!supportedCategories(ti))
throw new HaivvreoException("Don't yet support this type: " + ti);
ObjectInspector result;
switch(ti.getCategory()) {
case PRIMITIVE:
PrimitiveTypeInfo pti = (PrimitiveTypeInfo) ti;
result = PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(pti.getPrimitiveCategory());
break;
case STRUCT:
StructTypeInfo sti = (StructTypeInfo) ti;
ArrayList<ObjectInspector> ois = new ArrayList<ObjectInspector>(sti.getAllStructFieldTypeInfos().size());
for (TypeInfo typeInfo : sti.getAllStructFieldTypeInfos()) {
ois.add(createObjectInspectorWorker(typeInfo));
}
result = ObjectInspectorFactory.getStandardStructObjectInspector(sti.getAllStructFieldNames(), ois);
break;
case MAP:
MapTypeInfo mti = (MapTypeInfo) ti;
result = ObjectInspectorFactory.getStandardMapObjectInspector(PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(PrimitiveObjectInspector.PrimitiveCategory.STRING), createObjectInspectorWorker(mti.getMapValueTypeInfo()));
break;
case LIST:
ListTypeInfo ati = (ListTypeInfo) ti;
result = ObjectInspectorFactory.getStandardListObjectInspector(createObjectInspectorWorker(ati.getListElementTypeInfo()));
break;
case UNION:
UnionTypeInfo uti = (UnionTypeInfo) ti;
List<TypeInfo> allUnionObjectTypeInfos = uti.getAllUnionObjectTypeInfos();
List<ObjectInspector> unionObjectInspectors = new ArrayList<ObjectInspector>(allUnionObjectTypeInfos.size());
for (TypeInfo typeInfo : allUnionObjectTypeInfos) {
unionObjectInspectors.add(createObjectInspectorWorker(typeInfo));
}
result = ObjectInspectorFactory.getStandardUnionObjectInspector(unionObjectInspectors);
break;
default:
throw new HaivvreoException("No Hive categories matched: " + ti);
}
return result;
}
Aggregations