use of org.apache.hadoop.hive.serde2.objectinspector.StructField in project hive by apache.
the class TestHBaseSerDe method deserializeAndSerialize.
private void deserializeAndSerialize(HBaseSerDe serDe, Result r, Put p, Object[] expectedFieldsData) throws SerDeException {
// Get the row structure
StructObjectInspector oi = (StructObjectInspector) serDe.getObjectInspector();
List<? extends StructField> fieldRefs = oi.getAllStructFieldRefs();
assertEquals(9, fieldRefs.size());
// Deserialize
Object row = serDe.deserialize(new ResultWritable(r));
for (int i = 0; i < fieldRefs.size(); i++) {
Object fieldData = oi.getStructFieldData(row, fieldRefs.get(i));
if (fieldData != null) {
fieldData = ((LazyPrimitive<?, ?>) fieldData).getWritableObject();
}
assertEquals("Field " + i, expectedFieldsData[i], fieldData);
}
// Serialize
assertEquals(PutWritable.class, serDe.getSerializedClass());
PutWritable serializedPut = (PutWritable) serDe.serialize(row, oi);
assertEquals("Serialized data", p.toString(), String.valueOf(serializedPut.getPut()));
}
use of org.apache.hadoop.hive.serde2.objectinspector.StructField in project hive by apache.
the class TestHBaseSerDe method deserializeAndSerializeHiveStructColumnFamily.
private void deserializeAndSerializeHiveStructColumnFamily(HBaseSerDe serDe, Result r, Put p, Object[] expectedFieldsData, int[] expectedMapSize, List<Object> expectedQualifiers, Object notPresentKey) throws SerDeException, IOException {
StructObjectInspector soi = (StructObjectInspector) serDe.getObjectInspector();
List<? extends StructField> fieldRefs = soi.getAllStructFieldRefs();
Object row = serDe.deserialize(new ResultWritable(r));
int k = 0;
for (int i = 0; i < fieldRefs.size(); i++) {
Object fieldData = soi.getStructFieldData(row, fieldRefs.get(i));
assertNotNull(fieldData);
if (fieldData instanceof LazyPrimitive<?, ?>) {
assertEquals(expectedFieldsData[i], ((LazyPrimitive<?, ?>) fieldData).getWritableObject());
} else if (fieldData instanceof LazyHBaseCellMap) {
for (int j = 0; j < ((LazyHBaseCellMap) fieldData).getMapSize(); j++) {
assertEquals(expectedFieldsData[k + 1], ((LazyHBaseCellMap) fieldData).getMapValueElement(expectedQualifiers.get(k)).toString().trim());
k++;
}
assertEquals(expectedMapSize[i - 1], ((LazyHBaseCellMap) fieldData).getMapSize());
// Make sure that the unwanted key is not present in the map
assertNull(((LazyHBaseCellMap) fieldData).getMapValueElement(notPresentKey));
} else {
fail("Error: field data not an instance of LazyPrimitive<?, ?> or LazyHBaseCellMap");
}
}
SerDeUtils.getJSONString(row, soi);
// Now serialize
Put put = ((PutWritable) serDe.serialize(row, soi)).getPut();
assertNotNull(put);
}
use of org.apache.hadoop.hive.serde2.objectinspector.StructField in project hive by apache.
the class TestStreaming method deserializeDeltaFileRow.
// Assumes stored data schema = [acid fields],string,int,string
// return array of 6 fields, where the last field has the actual data
private static Object[] deserializeDeltaFileRow(Object row, StructObjectInspector inspector) {
List<? extends StructField> fields = inspector.getAllStructFieldRefs();
WritableIntObjectInspector f0ins = (WritableIntObjectInspector) fields.get(0).getFieldObjectInspector();
WritableLongObjectInspector f1ins = (WritableLongObjectInspector) fields.get(1).getFieldObjectInspector();
WritableIntObjectInspector f2ins = (WritableIntObjectInspector) fields.get(2).getFieldObjectInspector();
WritableLongObjectInspector f3ins = (WritableLongObjectInspector) fields.get(3).getFieldObjectInspector();
WritableLongObjectInspector f4ins = (WritableLongObjectInspector) fields.get(4).getFieldObjectInspector();
StructObjectInspector f5ins = (StructObjectInspector) fields.get(5).getFieldObjectInspector();
int f0 = f0ins.get(inspector.getStructFieldData(row, fields.get(0)));
long f1 = f1ins.get(inspector.getStructFieldData(row, fields.get(1)));
int f2 = f2ins.get(inspector.getStructFieldData(row, fields.get(2)));
long f3 = f3ins.get(inspector.getStructFieldData(row, fields.get(3)));
long f4 = f4ins.get(inspector.getStructFieldData(row, fields.get(4)));
SampleRec f5 = deserializeInner(inspector.getStructFieldData(row, fields.get(5)), f5ins);
return new Object[] { f0, f1, f2, f3, f4, f5 };
}
use of org.apache.hadoop.hive.serde2.objectinspector.StructField in project hive by apache.
the class FetchOperator method createPartValue.
private Object[] createPartValue(PartitionDesc partDesc, StructObjectInspector partOI) {
Map<String, String> partSpec = partDesc.getPartSpec();
List<? extends StructField> fields = partOI.getAllStructFieldRefs();
Object[] partValues = new Object[fields.size()];
for (int i = 0; i < partValues.length; i++) {
StructField field = fields.get(i);
String value = partSpec.get(field.getFieldName());
ObjectInspector oi = field.getFieldObjectInspector();
partValues[i] = ObjectInspectorConverters.getConverter(PrimitiveObjectInspectorFactory.javaStringObjectInspector, oi).convert(value);
}
return partValues;
}
use of org.apache.hadoop.hive.serde2.objectinspector.StructField in project drill by apache.
the class HiveAbstractReader method init.
private void init() throws ExecutionSetupException {
final JobConf job = new JobConf(hiveConf);
// Get the configured default val
defaultPartitionValue = hiveConf.get(ConfVars.DEFAULTPARTITIONNAME.varname);
Properties tableProperties;
try {
tableProperties = HiveUtilities.getTableMetadata(table);
final Properties partitionProperties = (partition == null) ? tableProperties : HiveUtilities.getPartitionMetadata(partition, table);
HiveUtilities.addConfToJob(job, partitionProperties);
final SerDe tableSerDe = createSerDe(job, table.getSd().getSerdeInfo().getSerializationLib(), tableProperties);
final StructObjectInspector tableOI = getStructOI(tableSerDe);
if (partition != null) {
partitionSerDe = createSerDe(job, partition.getSd().getSerdeInfo().getSerializationLib(), partitionProperties);
partitionOI = getStructOI(partitionSerDe);
finalOI = (StructObjectInspector) ObjectInspectorConverters.getConvertedOI(partitionOI, tableOI);
partTblObjectInspectorConverter = ObjectInspectorConverters.getConverter(partitionOI, finalOI);
job.setInputFormat(HiveUtilities.getInputFormatClass(job, partition.getSd(), table));
} else {
// For non-partitioned tables, there is no need to create converter as there are no schema changes expected.
partitionSerDe = tableSerDe;
partitionOI = tableOI;
partTblObjectInspectorConverter = null;
finalOI = tableOI;
job.setInputFormat(HiveUtilities.getInputFormatClass(job, table.getSd(), table));
}
if (logger.isTraceEnabled()) {
for (StructField field : finalOI.getAllStructFieldRefs()) {
logger.trace("field in finalOI: {}", field.getClass().getName());
}
logger.trace("partitionSerDe class is {} {}", partitionSerDe.getClass().getName());
}
// Get list of partition column names
final List<String> partitionNames = Lists.newArrayList();
for (FieldSchema field : table.getPartitionKeys()) {
partitionNames.add(field.getName());
}
// We should always get the columns names from ObjectInspector. For some of the tables (ex. avro) metastore
// may not contain the schema, instead it is derived from other sources such as table properties or external file.
// SerDe object knows how to get the schema with all the config and table properties passed in initialization.
// ObjectInspector created from the SerDe object has the schema.
final StructTypeInfo sTypeInfo = (StructTypeInfo) TypeInfoUtils.getTypeInfoFromObjectInspector(finalOI);
final List<String> tableColumnNames = sTypeInfo.getAllStructFieldNames();
// Select list of columns for project pushdown into Hive SerDe readers.
final List<Integer> columnIds = Lists.newArrayList();
if (isStarQuery()) {
selectedColumnNames = tableColumnNames;
for (int i = 0; i < selectedColumnNames.size(); i++) {
columnIds.add(i);
}
selectedPartitionNames = partitionNames;
} else {
selectedColumnNames = Lists.newArrayList();
for (SchemaPath field : getColumns()) {
String columnName = field.getRootSegment().getPath();
if (partitionNames.contains(columnName)) {
selectedPartitionNames.add(columnName);
} else {
columnIds.add(tableColumnNames.indexOf(columnName));
selectedColumnNames.add(columnName);
}
}
}
ColumnProjectionUtils.appendReadColumns(job, columnIds, selectedColumnNames);
for (String columnName : selectedColumnNames) {
StructField fieldRef = finalOI.getStructFieldRef(columnName);
selectedStructFieldRefs.add(fieldRef);
ObjectInspector fieldOI = fieldRef.getFieldObjectInspector();
TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(fieldOI.getTypeName());
selectedColumnObjInspectors.add(fieldOI);
selectedColumnTypes.add(typeInfo);
selectedColumnFieldConverters.add(HiveFieldConverter.create(typeInfo, fragmentContext));
}
for (int i = 0; i < selectedColumnNames.size(); ++i) {
logger.trace("inspector:typeName={}, className={}, TypeInfo: {}, converter:{}", selectedColumnObjInspectors.get(i).getTypeName(), selectedColumnObjInspectors.get(i).getClass().getName(), selectedColumnTypes.get(i).toString(), selectedColumnFieldConverters.get(i).getClass().getName());
}
for (int i = 0; i < table.getPartitionKeys().size(); i++) {
FieldSchema field = table.getPartitionKeys().get(i);
if (selectedPartitionNames.contains(field.getName())) {
TypeInfo pType = TypeInfoUtils.getTypeInfoFromTypeString(field.getType());
selectedPartitionTypes.add(pType);
if (partition != null) {
selectedPartitionValues.add(HiveUtilities.convertPartitionType(pType, partition.getValues().get(i), defaultPartitionValue));
}
}
}
} catch (Exception e) {
throw new ExecutionSetupException("Failure while initializing Hive Reader " + this.getClass().getName(), e);
}
if (!empty) {
try {
reader = (org.apache.hadoop.mapred.RecordReader<Object, Object>) job.getInputFormat().getRecordReader(inputSplit, job, Reporter.NULL);
logger.trace("hive reader created: {} for inputSplit {}", reader.getClass().getName(), inputSplit.toString());
} catch (Exception e) {
throw new ExecutionSetupException("Failed to get o.a.hadoop.mapred.RecordReader from Hive InputFormat", e);
}
internalInit(tableProperties, reader);
}
}
Aggregations