use of org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo in project hive by apache.
the class JsonSerDe method initialize.
@Override
public void initialize(Configuration conf, Properties tbl) throws SerDeException {
List<TypeInfo> columnTypes;
StructTypeInfo rowTypeInfo;
LOG.debug("Initializing JsonSerDe: {}", tbl.entrySet());
// Get column names and types
String columnNameProperty = tbl.getProperty(serdeConstants.LIST_COLUMNS);
String columnTypeProperty = tbl.getProperty(serdeConstants.LIST_COLUMN_TYPES);
final String columnNameDelimiter = tbl.containsKey(serdeConstants.COLUMN_NAME_DELIMITER) ? tbl.getProperty(serdeConstants.COLUMN_NAME_DELIMITER) : String.valueOf(SerDeUtils.COMMA);
// all table column names
if (columnNameProperty.isEmpty()) {
columnNames = Collections.emptyList();
} else {
columnNames = Arrays.asList(columnNameProperty.split(columnNameDelimiter));
}
// all column types
if (columnTypeProperty.isEmpty()) {
columnTypes = Collections.emptyList();
} else {
columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
}
LOG.debug("columns: {}, {}", columnNameProperty, columnNames);
LOG.debug("types: {}, {} ", columnTypeProperty, columnTypes);
assert (columnNames.size() == columnTypes.size());
rowTypeInfo = (StructTypeInfo) TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes);
cachedObjectInspector = HCatRecordObjectInspectorFactory.getHCatRecordObjectInspector(rowTypeInfo);
try {
schema = HCatSchemaUtils.getHCatSchema(rowTypeInfo).get(0).getStructSubSchema();
LOG.debug("schema : {}", schema);
LOG.debug("fields : {}", schema.getFieldNames());
} catch (HCatException e) {
throw new SerDeException(e);
}
jsonFactory = new JsonFactory();
tsParser = new TimestampParser(HiveStringUtils.splitAndUnEscape(tbl.getProperty(serdeConstants.TIMESTAMP_FORMATS)));
}
use of org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo in project hive by apache.
the class VectorSerializeRow method serializeStructWrite.
private void serializeStructWrite(StructColumnVector colVector, Field field, int adjustedBatchIndex) throws IOException {
StructTypeInfo typeInfo = (StructTypeInfo) field.typeInfo;
StructObjectInspector objectInspector = (StructObjectInspector) field.objectInspector;
final ColumnVector[] fieldColumnVectors = colVector.fields;
final Field[] children = field.children;
final List<? extends StructField> structFields = objectInspector.getAllStructFieldRefs();
final int size = field.count;
final List list = (List) vectorExtractRow.extractRowColumn(colVector, typeInfo, objectInspector, adjustedBatchIndex);
serializeWrite.beginStruct(list);
for (int i = 0; i < size; i++) {
if (i > 0) {
serializeWrite.separateStruct();
}
serializeWrite(fieldColumnVectors[i], children[i], adjustedBatchIndex);
}
serializeWrite.finishStruct();
}
use of org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo in project hive by apache.
the class VectorSerializeRow method createField.
private Field createField(TypeInfo typeInfo) {
final Field field = new Field();
final Category category = typeInfo.getCategory();
field.category = category;
field.typeInfo = typeInfo;
if (category == Category.PRIMITIVE) {
field.isPrimitive = true;
field.primitiveCategory = ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory();
} else {
field.isPrimitive = false;
field.objectInspector = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(typeInfo);
switch(category) {
case LIST:
field.children = new Field[1];
field.children[0] = createField(((ListTypeInfo) typeInfo).getListElementTypeInfo());
break;
case MAP:
field.children = new Field[2];
field.children[0] = createField(((MapTypeInfo) typeInfo).getMapKeyTypeInfo());
field.children[1] = createField(((MapTypeInfo) typeInfo).getMapValueTypeInfo());
break;
case STRUCT:
StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo;
List<TypeInfo> fieldTypeInfos = structTypeInfo.getAllStructFieldTypeInfos();
field.children = createFields(fieldTypeInfos.toArray(new TypeInfo[fieldTypeInfos.size()]));
break;
case UNION:
UnionTypeInfo unionTypeInfo = (UnionTypeInfo) typeInfo;
List<TypeInfo> objectTypeInfos = unionTypeInfo.getAllUnionObjectTypeInfos();
field.children = createFields(objectTypeInfos.toArray(new TypeInfo[objectTypeInfos.size()]));
break;
default:
throw new RuntimeException();
}
field.count = field.children.length;
}
return field;
}
use of org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo in project hive by apache.
the class VectorizationContext method getStructFieldIndex.
/**
* The field of Struct is stored in StructColumnVector.fields[index].
* Check the StructTypeInfo.getAllStructFieldNames() and compare to the field name, get the index.
*/
private int getStructFieldIndex(ExprNodeFieldDesc exprNodeFieldDesc) throws HiveException {
ExprNodeDesc structNodeDesc = exprNodeFieldDesc.getDesc();
String fieldName = exprNodeFieldDesc.getFieldName();
StructTypeInfo structTypeInfo = (StructTypeInfo) structNodeDesc.getTypeInfo();
int index = 0;
boolean isFieldExist = false;
for (String fn : structTypeInfo.getAllStructFieldNames()) {
if (fieldName.equals(fn)) {
isFieldExist = true;
break;
}
index++;
}
if (isFieldExist) {
return index;
} else {
throw new HiveException("Could not vectorize expression:" + exprNodeFieldDesc.toString() + ", the field " + fieldName + " doesn't exist.");
}
}
use of org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo in project hive by apache.
the class VectorVerifyFast method serializeWrite.
public static void serializeWrite(SerializeWrite serializeWrite, TypeInfo typeInfo, Object object) throws IOException {
if (object == null) {
serializeWrite.writeNull();
return;
}
switch(typeInfo.getCategory()) {
case PRIMITIVE:
{
PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) typeInfo;
switch(primitiveTypeInfo.getPrimitiveCategory()) {
case BOOLEAN:
{
boolean value = ((BooleanWritable) object).get();
serializeWrite.writeBoolean(value);
}
break;
case BYTE:
{
byte value = ((ByteWritable) object).get();
serializeWrite.writeByte(value);
}
break;
case SHORT:
{
short value = ((ShortWritable) object).get();
serializeWrite.writeShort(value);
}
break;
case INT:
{
int value = ((IntWritable) object).get();
serializeWrite.writeInt(value);
}
break;
case LONG:
{
long value = ((LongWritable) object).get();
serializeWrite.writeLong(value);
}
break;
case FLOAT:
{
float value = ((FloatWritable) object).get();
serializeWrite.writeFloat(value);
}
break;
case DOUBLE:
{
double value = ((DoubleWritable) object).get();
serializeWrite.writeDouble(value);
}
break;
case STRING:
{
Text value = (Text) object;
byte[] stringBytes = value.getBytes();
int stringLength = stringBytes.length;
serializeWrite.writeString(stringBytes, 0, stringLength);
}
break;
case CHAR:
{
HiveChar value = ((HiveCharWritable) object).getHiveChar();
serializeWrite.writeHiveChar(value);
}
break;
case VARCHAR:
{
HiveVarchar value = ((HiveVarcharWritable) object).getHiveVarchar();
serializeWrite.writeHiveVarchar(value);
}
break;
case DECIMAL:
{
HiveDecimal value = ((HiveDecimalWritable) object).getHiveDecimal();
DecimalTypeInfo decTypeInfo = (DecimalTypeInfo) primitiveTypeInfo;
serializeWrite.writeHiveDecimal(value, decTypeInfo.scale());
}
break;
case DATE:
{
Date value = ((DateWritable) object).get();
serializeWrite.writeDate(value);
}
break;
case TIMESTAMP:
{
Timestamp value = ((TimestampWritable) object).getTimestamp();
serializeWrite.writeTimestamp(value);
}
break;
case INTERVAL_YEAR_MONTH:
{
HiveIntervalYearMonth value = ((HiveIntervalYearMonthWritable) object).getHiveIntervalYearMonth();
serializeWrite.writeHiveIntervalYearMonth(value);
}
break;
case INTERVAL_DAY_TIME:
{
HiveIntervalDayTime value = ((HiveIntervalDayTimeWritable) object).getHiveIntervalDayTime();
serializeWrite.writeHiveIntervalDayTime(value);
}
break;
case BINARY:
{
BytesWritable byteWritable = (BytesWritable) object;
byte[] binaryBytes = byteWritable.getBytes();
int length = byteWritable.getLength();
serializeWrite.writeBinary(binaryBytes, 0, length);
}
break;
default:
throw new Error("Unknown primitive category " + primitiveTypeInfo.getPrimitiveCategory().name());
}
}
break;
case LIST:
{
ListTypeInfo listTypeInfo = (ListTypeInfo) typeInfo;
TypeInfo elementTypeInfo = listTypeInfo.getListElementTypeInfo();
ArrayList<Object> elements = (ArrayList<Object>) object;
serializeWrite.beginList(elements);
boolean isFirst = true;
for (Object elementObject : elements) {
if (isFirst) {
isFirst = false;
} else {
serializeWrite.separateList();
}
if (elementObject == null) {
serializeWrite.writeNull();
} else {
serializeWrite(serializeWrite, elementTypeInfo, elementObject);
}
}
serializeWrite.finishList();
}
break;
case MAP:
{
MapTypeInfo mapTypeInfo = (MapTypeInfo) typeInfo;
TypeInfo keyTypeInfo = mapTypeInfo.getMapKeyTypeInfo();
TypeInfo valueTypeInfo = mapTypeInfo.getMapValueTypeInfo();
HashMap<Object, Object> hashMap = (HashMap<Object, Object>) object;
serializeWrite.beginMap(hashMap);
boolean isFirst = true;
for (Map.Entry<Object, Object> entry : hashMap.entrySet()) {
if (isFirst) {
isFirst = false;
} else {
serializeWrite.separateKeyValuePair();
}
if (entry.getKey() == null) {
serializeWrite.writeNull();
} else {
serializeWrite(serializeWrite, keyTypeInfo, entry.getKey());
}
serializeWrite.separateKey();
if (entry.getValue() == null) {
serializeWrite.writeNull();
} else {
serializeWrite(serializeWrite, valueTypeInfo, entry.getValue());
}
}
serializeWrite.finishMap();
}
break;
case STRUCT:
{
StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo;
ArrayList<TypeInfo> fieldTypeInfos = structTypeInfo.getAllStructFieldTypeInfos();
ArrayList<Object> fieldValues = (ArrayList<Object>) object;
final int size = fieldValues.size();
serializeWrite.beginStruct(fieldValues);
boolean isFirst = true;
for (int i = 0; i < size; i++) {
if (isFirst) {
isFirst = false;
} else {
serializeWrite.separateStruct();
}
serializeWrite(serializeWrite, fieldTypeInfos.get(i), fieldValues.get(i));
}
serializeWrite.finishStruct();
}
break;
case UNION:
{
UnionTypeInfo unionTypeInfo = (UnionTypeInfo) typeInfo;
List<TypeInfo> fieldTypeInfos = unionTypeInfo.getAllUnionObjectTypeInfos();
final int size = fieldTypeInfos.size();
StandardUnionObjectInspector.StandardUnion standardUnion = (StandardUnionObjectInspector.StandardUnion) object;
byte tag = standardUnion.getTag();
serializeWrite.beginUnion(tag);
serializeWrite(serializeWrite, fieldTypeInfos.get(tag), standardUnion.getObject());
serializeWrite.finishUnion();
}
break;
default:
throw new Error("Unknown category " + typeInfo.getCategory().name());
}
}
Aggregations