use of org.apache.hadoop.hive.serde2.objectinspector.StructField in project hive by apache.
the class JoinOperator method process.
@Override
public void process(Object row, int tag) throws HiveException {
try {
reportProgress();
lastAlias = alias;
alias = (byte) tag;
if (!alias.equals(lastAlias)) {
nextSz = joinEmitInterval;
}
List<Object> nr = getFilteredValue(alias, row);
if (handleSkewJoin) {
skewJoinKeyContext.handleSkew(tag);
}
// number of rows for the key in the given table
long sz = storage[alias].rowCount();
StructObjectInspector soi = (StructObjectInspector) inputObjInspectors[tag];
StructField sf = soi.getStructFieldRef(Utilities.ReduceField.KEY.toString());
List keyObject = (List) soi.getStructFieldData(row, sf);
// Are we consuming too much memory
if (alias == numAliases - 1 && !(handleSkewJoin && skewJoinKeyContext.currBigKeyTag >= 0) && !hasLeftSemiJoin) {
if (sz == joinEmitInterval && !hasFilter(condn[alias - 1].getLeft()) && !hasFilter(condn[alias - 1].getRight())) {
// The input is sorted by alias, so if we are already in the last join
// operand,
// we can emit some results now.
// Note this has to be done before adding the current row to the
// storage,
// to preserve the correctness for outer joins.
checkAndGenObject();
storage[alias].clearRows();
}
} else {
if (LOG.isInfoEnabled() && (sz == nextSz)) {
// Print a message if we reached at least 1000 rows for a join operand
// We won't print a message for the last join operand since the size
// will never goes to joinEmitInterval.
LOG.info("table " + alias + " has " + sz + " rows for join key " + keyObject);
nextSz = getNextSize(nextSz);
}
}
// Add the value to the vector
// if join-key is null, process each row in different group.
StructObjectInspector inspector = (StructObjectInspector) sf.getFieldObjectInspector();
if (SerDeUtils.hasAnyNullObject(keyObject, inspector, nullsafes)) {
endGroup();
startGroup();
}
storage[alias].addRow(nr);
} catch (Exception e) {
e.printStackTrace();
throw new HiveException(e);
}
}
use of org.apache.hadoop.hive.serde2.objectinspector.StructField in project hive by apache.
the class JoinUtil method unflattenObjInspector.
/**
* Checks the input object inspector to see if it is in for form of a flattened struct
* like the ones generated by a vectorized reduce sink input:
* { 'key.reducesinkkey0':int, 'value._col0':int, 'value._col1':int, .. }
* If so, then it creates an "unflattened" struct that contains nested key/value
* structs:
* { key: { reducesinkkey0:int }, value: { _col0:int, _col1:int, .. } }
*
* @param oi
* @return unflattened object inspector if unflattening is needed,
* otherwise the original object inspector
*/
private static ObjectInspector unflattenObjInspector(ObjectInspector oi) {
if (oi instanceof StructObjectInspector) {
// Check if all fields start with "key." or "value."
// If so, then unflatten by adding an additional level of nested key and value structs
// Example: { "key.reducesinkkey0":int, "key.reducesinkkey1": int, "value._col6":int }
// Becomes
// { "key": { "reducesinkkey0":int, "reducesinkkey1":int }, "value": { "_col6":int } }
ArrayList<StructField> keyFields = new ArrayList<StructField>();
ArrayList<StructField> valueFields = new ArrayList<StructField>();
for (StructField field : ((StructObjectInspector) oi).getAllStructFieldRefs()) {
String fieldNameLower = field.getFieldName().toLowerCase();
if (fieldNameLower.startsWith(KEY_FIELD_PREFIX)) {
keyFields.add(field);
} else if (fieldNameLower.startsWith(VALUE_FIELD_PREFIX)) {
valueFields.add(field);
} else {
// Not a flattened struct, no need to unflatten
return oi;
}
}
// All field names are of the form "key." or "value."
// Create key/value structs and add the respective fields to each one
ArrayList<ObjectInspector> reduceFieldOIs = new ArrayList<ObjectInspector>();
reduceFieldOIs.add(createStructFromFields(keyFields, Utilities.ReduceField.KEY.toString()));
reduceFieldOIs.add(createStructFromFields(valueFields, Utilities.ReduceField.VALUE.toString()));
// Finally create the outer struct to contain the key, value structs
return ObjectInspectorFactory.getStandardStructObjectInspector(Utilities.reduceFieldNameList, reduceFieldOIs);
}
return oi;
}
use of org.apache.hadoop.hive.serde2.objectinspector.StructField in project hive by apache.
the class VectorDeserializeOrcWriter method create.
// TODO: if more writers are added, separate out an EncodingWriterFactory
public static EncodingWriter create(InputFormat<?, ?> sourceIf, Deserializer serDe, Map<Path, PartitionDesc> parts, Configuration daemonConf, Configuration jobConf, Path splitPath, StructObjectInspector sourceOi, List<Integer> sourceIncludes, boolean[] cacheIncludes, int allocSize) throws IOException {
// Vector SerDe can be disabled both on client and server side.
if (!HiveConf.getBoolVar(daemonConf, ConfVars.LLAP_IO_ENCODE_VECTOR_SERDE_ENABLED) || !HiveConf.getBoolVar(jobConf, ConfVars.LLAP_IO_ENCODE_VECTOR_SERDE_ENABLED) || !(sourceIf instanceof TextInputFormat) || !(serDe instanceof LazySimpleSerDe)) {
return new DeserializerOrcWriter(serDe, sourceOi, allocSize);
}
Path path = splitPath.getFileSystem(daemonConf).makeQualified(splitPath);
PartitionDesc partDesc = HiveFileFormatUtils.getFromPathRecursively(parts, path, null);
if (partDesc == null) {
LlapIoImpl.LOG.info("Not using VertorDeserializeOrcWriter: no partition desc for " + path);
return new DeserializerOrcWriter(serDe, sourceOi, allocSize);
}
Properties tblProps = partDesc.getTableDesc().getProperties();
if ("true".equalsIgnoreCase(tblProps.getProperty(serdeConstants.SERIALIZATION_LAST_COLUMN_TAKES_REST))) {
LlapIoImpl.LOG.info("Not using VertorDeserializeOrcWriter due to " + serdeConstants.SERIALIZATION_LAST_COLUMN_TAKES_REST);
return new DeserializerOrcWriter(serDe, sourceOi, allocSize);
}
for (StructField sf : sourceOi.getAllStructFieldRefs()) {
Category c = sf.getFieldObjectInspector().getCategory();
if (c != Category.PRIMITIVE) {
LlapIoImpl.LOG.info("Not using VertorDeserializeOrcWriter: " + c + " is not supported");
return new DeserializerOrcWriter(serDe, sourceOi, allocSize);
}
}
LlapIoImpl.LOG.info("Creating VertorDeserializeOrcWriter for " + path);
return new VectorDeserializeOrcWriter(daemonConf, tblProps, sourceOi, sourceIncludes, cacheIncludes, allocSize);
}
use of org.apache.hadoop.hive.serde2.objectinspector.StructField in project hive by apache.
the class HiveMetaStoreUtils method getFieldsFromDeserializer.
/**
* @param tableName name of the table
* @param deserializer deserializer to use
* @return the list of fields
* @throws SerDeException if the serde throws an exception
* @throws MetaException if one of the fields or types in the table is invalid
*/
public static List<FieldSchema> getFieldsFromDeserializer(String tableName, Deserializer deserializer) throws SerDeException, MetaException {
ObjectInspector oi = deserializer.getObjectInspector();
String[] names = tableName.split("\\.");
String last_name = names[names.length - 1];
for (int i = 1; i < names.length; i++) {
if (oi instanceof StructObjectInspector) {
StructObjectInspector soi = (StructObjectInspector) oi;
StructField sf = soi.getStructFieldRef(names[i]);
if (sf == null) {
throw new MetaException("Invalid Field " + names[i]);
} else {
oi = sf.getFieldObjectInspector();
}
} else if (oi instanceof ListObjectInspector && names[i].equalsIgnoreCase("$elem$")) {
ListObjectInspector loi = (ListObjectInspector) oi;
oi = loi.getListElementObjectInspector();
} else if (oi instanceof MapObjectInspector && names[i].equalsIgnoreCase("$key$")) {
MapObjectInspector moi = (MapObjectInspector) oi;
oi = moi.getMapKeyObjectInspector();
} else if (oi instanceof MapObjectInspector && names[i].equalsIgnoreCase("$value$")) {
MapObjectInspector moi = (MapObjectInspector) oi;
oi = moi.getMapValueObjectInspector();
} else {
throw new MetaException("Unknown type for " + names[i]);
}
}
ArrayList<FieldSchema> str_fields = new ArrayList<>();
// rules on how to recurse the ObjectInspector based on its type
if (oi.getCategory() != Category.STRUCT) {
str_fields.add(new FieldSchema(last_name, oi.getTypeName(), FROM_SERIALIZER));
} else {
List<? extends StructField> fields = ((StructObjectInspector) oi).getAllStructFieldRefs();
for (int i = 0; i < fields.size(); i++) {
StructField structField = fields.get(i);
String fieldName = structField.getFieldName();
String fieldTypeName = structField.getFieldObjectInspector().getTypeName();
String fieldComment = determineFieldComment(structField.getFieldComment());
str_fields.add(new FieldSchema(fieldName, fieldTypeName, fieldComment));
}
}
return str_fields;
}
use of org.apache.hadoop.hive.serde2.objectinspector.StructField in project hive by apache.
the class LlapRowRecordReader method convertValue.
static Object convertValue(Object val, ObjectInspector oi) {
if (val == null) {
return null;
}
Object convertedVal = null;
ObjectInspector.Category oiCategory = oi.getCategory();
switch(oiCategory) {
case PRIMITIVE:
convertedVal = convertPrimitive(val, (PrimitiveObjectInspector) oi);
break;
case LIST:
ListObjectInspector loi = (ListObjectInspector) oi;
int listSize = loi.getListLength(val);
// Per ListObjectInpsector.getListLength(), -1 length means null list.
if (listSize < 0) {
return null;
}
List<Object> convertedList = new ArrayList<Object>(listSize);
ObjectInspector listElementOI = loi.getListElementObjectInspector();
for (int idx = 0; idx < listSize; ++idx) {
convertedList.add(convertValue(loi.getListElement(val, idx), listElementOI));
}
convertedVal = convertedList;
break;
case MAP:
MapObjectInspector moi = (MapObjectInspector) oi;
int mapSize = moi.getMapSize(val);
// Per MapObjectInpsector.getMapSize(), -1 length means null map.
if (mapSize < 0) {
return null;
}
Map<Object, Object> convertedMap = new LinkedHashMap<Object, Object>(mapSize);
ObjectInspector mapKeyOI = moi.getMapKeyObjectInspector();
ObjectInspector mapValOI = moi.getMapValueObjectInspector();
Map<?, ?> mapCol = moi.getMap(val);
for (Object mapKey : mapCol.keySet()) {
Object convertedMapKey = convertValue(mapKey, mapKeyOI);
Object convertedMapVal = convertValue(mapCol.get(mapKey), mapValOI);
convertedMap.put(convertedMapKey, convertedMapVal);
}
convertedVal = convertedMap;
break;
case STRUCT:
StructObjectInspector soi = (StructObjectInspector) oi;
List<Object> convertedRow = new ArrayList<Object>();
for (StructField structField : soi.getAllStructFieldRefs()) {
Object convertedFieldValue = convertValue(soi.getStructFieldData(val, structField), structField.getFieldObjectInspector());
convertedRow.add(convertedFieldValue);
}
convertedVal = convertedRow;
break;
default:
throw new IllegalArgumentException("Cannot convert type " + oiCategory);
}
return convertedVal;
}
Aggregations