use of org.apache.hadoop.hive.serde2.objectinspector.StructField in project hive by apache.
the class AvroSerializer method serializeStruct.
private Object serializeStruct(StructTypeInfo typeInfo, StructObjectInspector ssoi, Object o, Schema schema) throws AvroSerdeException {
int size = schema.getFields().size();
List<? extends StructField> allStructFieldRefs = ssoi.getAllStructFieldRefs();
List<Object> structFieldsDataAsList = ssoi.getStructFieldsDataAsList(o);
GenericData.Record record = new GenericData.Record(schema);
ArrayList<TypeInfo> allStructFieldTypeInfos = typeInfo.getAllStructFieldTypeInfos();
for (int i = 0; i < size; i++) {
Field field = schema.getFields().get(i);
TypeInfo colTypeInfo = allStructFieldTypeInfos.get(i);
StructField structFieldRef = allStructFieldRefs.get(i);
Object structFieldData = structFieldsDataAsList.get(i);
ObjectInspector fieldOI = structFieldRef.getFieldObjectInspector();
Object val = serialize(colTypeInfo, fieldOI, structFieldData, field.schema());
record.put(field.name(), val);
}
return record;
}
use of org.apache.hadoop.hive.serde2.objectinspector.StructField in project hive by apache.
the class AvroSerializer method serialize.
// Hive is pretty simple (read: stupid) in writing out values via the serializer.
// We're just going to go through, matching indices. Hive formats normally
// handle mismatches with null. We don't have that option, so instead we'll
// end up throwing an exception for invalid records.
public Writable serialize(Object o, ObjectInspector objectInspector, List<String> columnNames, List<TypeInfo> columnTypes, Schema schema) throws AvroSerdeException {
StructObjectInspector soi = (StructObjectInspector) objectInspector;
GenericData.Record record = new GenericData.Record(schema);
List<? extends StructField> outputFieldRefs = soi.getAllStructFieldRefs();
if (outputFieldRefs.size() != columnNames.size()) {
throw new AvroSerdeException("Number of input columns was different than output columns (in = " + columnNames.size() + " vs out = " + outputFieldRefs.size());
}
int size = schema.getFields().size();
if (outputFieldRefs.size() != size) {
throw new AvroSerdeException("Hive passed in a different number of fields than the schema expected: (Hive wanted " + outputFieldRefs.size() + ", Avro expected " + schema.getFields().size());
}
List<? extends StructField> allStructFieldRefs = soi.getAllStructFieldRefs();
List<Object> structFieldsDataAsList = soi.getStructFieldsDataAsList(o);
for (int i = 0; i < size; i++) {
Field field = schema.getFields().get(i);
TypeInfo typeInfo = columnTypes.get(i);
StructField structFieldRef = allStructFieldRefs.get(i);
Object structFieldData = structFieldsDataAsList.get(i);
ObjectInspector fieldOI = structFieldRef.getFieldObjectInspector();
Object val = serialize(typeInfo, fieldOI, structFieldData, field.schema());
record.put(field.name(), val);
}
if (!GenericData.get().validate(schema, record)) {
throw new SerializeToAvroException(schema, record);
}
cache.setRecord(record);
return cache;
}
use of org.apache.hadoop.hive.serde2.objectinspector.StructField in project hive by apache.
the class ObjectInspectorUtils method compareSupported.
/**
* Whether comparison is supported for this type.
* Currently all types that references any map are not comparable.
*/
public static boolean compareSupported(ObjectInspector oi) {
switch(oi.getCategory()) {
case PRIMITIVE:
return true;
case LIST:
ListObjectInspector loi = (ListObjectInspector) oi;
return compareSupported(loi.getListElementObjectInspector());
case STRUCT:
StructObjectInspector soi = (StructObjectInspector) oi;
List<? extends StructField> fields = soi.getAllStructFieldRefs();
for (int f = 0; f < fields.size(); f++) {
if (!compareSupported(fields.get(f).getFieldObjectInspector())) {
return false;
}
}
return true;
case MAP:
return false;
case UNION:
UnionObjectInspector uoi = (UnionObjectInspector) oi;
for (ObjectInspector eoi : uoi.getObjectInspectors()) {
if (!compareSupported(eoi)) {
return false;
}
}
return true;
default:
return false;
}
}
use of org.apache.hadoop.hive.serde2.objectinspector.StructField in project hive by apache.
the class ObjectInspectorUtils method hashCode.
public static int hashCode(Object o, ObjectInspector objIns) {
if (o == null) {
return 0;
}
switch(objIns.getCategory()) {
case PRIMITIVE:
{
PrimitiveObjectInspector poi = ((PrimitiveObjectInspector) objIns);
switch(poi.getPrimitiveCategory()) {
case VOID:
return 0;
case BOOLEAN:
return ((BooleanObjectInspector) poi).get(o) ? 1 : 0;
case BYTE:
return ((ByteObjectInspector) poi).get(o);
case SHORT:
return ((ShortObjectInspector) poi).get(o);
case INT:
return ((IntObjectInspector) poi).get(o);
case LONG:
{
long a = ((LongObjectInspector) poi).get(o);
return (int) ((a >>> 32) ^ a);
}
case FLOAT:
return Float.floatToIntBits(((FloatObjectInspector) poi).get(o));
case DOUBLE:
{
// This hash function returns the same result as Double.hashCode()
// while DoubleWritable.hashCode returns a different result.
long a = Double.doubleToLongBits(((DoubleObjectInspector) poi).get(o));
return (int) ((a >>> 32) ^ a);
}
case STRING:
{
// This hash function returns the same result as String.hashCode() when
// all characters are ASCII, while Text.hashCode() always returns a
// different result.
Text t = ((StringObjectInspector) poi).getPrimitiveWritableObject(o);
int r = 0;
for (int i = 0; i < t.getLength(); i++) {
r = r * 31 + t.getBytes()[i];
}
return r;
}
case CHAR:
return ((HiveCharObjectInspector) poi).getPrimitiveWritableObject(o).hashCode();
case VARCHAR:
return ((HiveVarcharObjectInspector) poi).getPrimitiveWritableObject(o).hashCode();
case BINARY:
return ((BinaryObjectInspector) poi).getPrimitiveWritableObject(o).hashCode();
case DATE:
return ((DateObjectInspector) poi).getPrimitiveWritableObject(o).hashCode();
case TIMESTAMP:
TimestampWritable t = ((TimestampObjectInspector) poi).getPrimitiveWritableObject(o);
return t.hashCode();
case INTERVAL_YEAR_MONTH:
HiveIntervalYearMonthWritable intervalYearMonth = ((HiveIntervalYearMonthObjectInspector) poi).getPrimitiveWritableObject(o);
return intervalYearMonth.hashCode();
case INTERVAL_DAY_TIME:
HiveIntervalDayTimeWritable intervalDayTime = ((HiveIntervalDayTimeObjectInspector) poi).getPrimitiveWritableObject(o);
return intervalDayTime.hashCode();
case DECIMAL:
// compatible hash code.
return ((HiveDecimalObjectInspector) poi).getPrimitiveWritableObject(o).hashCode();
default:
{
throw new RuntimeException("Unknown type: " + poi.getPrimitiveCategory());
}
}
}
case LIST:
{
int r = 0;
ListObjectInspector listOI = (ListObjectInspector) objIns;
ObjectInspector elemOI = listOI.getListElementObjectInspector();
for (int ii = 0; ii < listOI.getListLength(o); ++ii) {
r = 31 * r + hashCode(listOI.getListElement(o, ii), elemOI);
}
return r;
}
case MAP:
{
int r = 0;
MapObjectInspector mapOI = (MapObjectInspector) objIns;
ObjectInspector keyOI = mapOI.getMapKeyObjectInspector();
ObjectInspector valueOI = mapOI.getMapValueObjectInspector();
Map<?, ?> map = mapOI.getMap(o);
for (Map.Entry<?, ?> entry : map.entrySet()) {
r += hashCode(entry.getKey(), keyOI) ^ hashCode(entry.getValue(), valueOI);
}
return r;
}
case STRUCT:
int r = 0;
StructObjectInspector structOI = (StructObjectInspector) objIns;
List<? extends StructField> fields = structOI.getAllStructFieldRefs();
for (StructField field : fields) {
r = 31 * r + hashCode(structOI.getStructFieldData(o, field), field.getFieldObjectInspector());
}
return r;
case UNION:
UnionObjectInspector uOI = (UnionObjectInspector) objIns;
byte tag = uOI.getTag(o);
return hashCode(uOI.getField(o), uOI.getObjectInspectors().get(tag));
default:
throw new RuntimeException("Unknown type: " + objIns.getTypeName());
}
}
use of org.apache.hadoop.hive.serde2.objectinspector.StructField in project hive by apache.
the class TestAvroDeserializer method canDeserializeRecordsInternal.
public void canDeserializeRecordsInternal(Schema s, Schema fileSchema) throws SerDeException, IOException {
GenericData.Record record = new GenericData.Record(s);
GenericData.Record innerRecord = new GenericData.Record(s.getField("aRecord").schema());
innerRecord.put("int1", 42);
innerRecord.put("boolean1", true);
innerRecord.put("long1", 42432234234l);
record.put("aRecord", innerRecord);
assertTrue(GENERIC_DATA.validate(s, record));
AvroGenericRecordWritable garw = Utils.serializeAndDeserializeRecord(record, fileSchema);
AvroObjectInspectorGenerator aoig = new AvroObjectInspectorGenerator(s);
AvroDeserializer de = new AvroDeserializer();
ArrayList<Object> row = (ArrayList<Object>) de.deserialize(aoig.getColumnNames(), aoig.getColumnTypes(), garw, s);
assertEquals(1, row.size());
Object theRecordObject = row.get(0);
System.out.println("theRecordObject = " + theRecordObject.getClass().getCanonicalName());
// The original record was lost in the deserialization, so just go the
// correct way, through objectinspectors
StandardStructObjectInspector oi = (StandardStructObjectInspector) aoig.getObjectInspector();
List<? extends StructField> allStructFieldRefs = oi.getAllStructFieldRefs();
assertEquals(1, allStructFieldRefs.size());
StructField fieldRefForaRecord = allStructFieldRefs.get(0);
assertEquals("arecord", fieldRefForaRecord.getFieldName());
Object innerRecord2 = oi.getStructFieldData(row, fieldRefForaRecord);
// Extract innerRecord field refs
StandardStructObjectInspector innerRecord2OI = (StandardStructObjectInspector) fieldRefForaRecord.getFieldObjectInspector();
List<? extends StructField> allStructFieldRefs1 = innerRecord2OI.getAllStructFieldRefs();
assertEquals(3, allStructFieldRefs1.size());
assertEquals("int1", allStructFieldRefs1.get(0).getFieldName());
assertEquals("boolean1", allStructFieldRefs1.get(1).getFieldName());
assertEquals("long1", allStructFieldRefs1.get(2).getFieldName());
innerRecord2OI.getStructFieldsDataAsList(innerRecord2);
assertEquals(42, innerRecord2OI.getStructFieldData(innerRecord2, allStructFieldRefs1.get(0)));
assertEquals(true, innerRecord2OI.getStructFieldData(innerRecord2, allStructFieldRefs1.get(1)));
assertEquals(42432234234l, innerRecord2OI.getStructFieldData(innerRecord2, allStructFieldRefs1.get(2)));
}
Aggregations