use of org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector in project hive by apache.
the class Utilities method constructVectorizedReduceRowOI.
/**
* Create row key and value object inspectors for reduce vectorization.
* The row object inspector used by ReduceWork needs to be a **standard**
* struct object inspector, not just any struct object inspector.
* @param keyInspector
* @param valueInspector
* @return OI
* @throws HiveException
*/
public static StandardStructObjectInspector constructVectorizedReduceRowOI(StructObjectInspector keyInspector, StructObjectInspector valueInspector) throws HiveException {
ArrayList<String> colNames = new ArrayList<String>();
ArrayList<ObjectInspector> ois = new ArrayList<ObjectInspector>();
List<? extends StructField> fields = keyInspector.getAllStructFieldRefs();
for (StructField field : fields) {
colNames.add(Utilities.ReduceField.KEY.toString() + "." + field.getFieldName());
ois.add(field.getFieldObjectInspector());
}
fields = valueInspector.getAllStructFieldRefs();
for (StructField field : fields) {
colNames.add(Utilities.ReduceField.VALUE.toString() + "." + field.getFieldName());
ois.add(field.getFieldObjectInspector());
}
StandardStructObjectInspector rowObjectInspector = ObjectInspectorFactory.getStandardStructObjectInspector(colNames, ois);
return rowObjectInspector;
}
use of org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector in project hive by apache.
the class SkewJoinHandler method initiliaze.
public void initiliaze(Configuration hconf) {
this.hconf = hconf;
JoinDesc desc = joinOp.getConf();
skewKeyDefinition = desc.getSkewKeyDefinition();
skewKeysTableObjectInspector = new HashMap<Byte, StructObjectInspector>(numAliases);
tblDesc = desc.getSkewKeysValuesTables();
tblSerializers = new HashMap<Byte, AbstractSerDe>(numAliases);
bigKeysExistingMap = new HashMap<Byte, Boolean>(numAliases);
taskId = Utilities.getTaskId(hconf);
int[][] filterMap = desc.getFilterMap();
for (int i = 0; i < numAliases; i++) {
Byte alias = conf.getTagOrder()[i];
List<ObjectInspector> skewTableKeyInspectors = new ArrayList<ObjectInspector>();
StructObjectInspector soi = (StructObjectInspector) joinOp.inputObjInspectors[alias];
StructField sf = soi.getStructFieldRef(Utilities.ReduceField.KEY.toString());
List<? extends StructField> keyFields = ((StructObjectInspector) sf.getFieldObjectInspector()).getAllStructFieldRefs();
int keyFieldSize = keyFields.size();
for (int k = 0; k < keyFieldSize; k++) {
skewTableKeyInspectors.add(keyFields.get(k).getFieldObjectInspector());
}
TableDesc joinKeyDesc = desc.getKeyTableDesc();
List<String> keyColNames = Utilities.getColumnNames(joinKeyDesc.getProperties());
StructObjectInspector structTblKeyInpector = ObjectInspectorFactory.getStandardStructObjectInspector(keyColNames, skewTableKeyInspectors);
try {
AbstractSerDe serializer = (AbstractSerDe) ReflectionUtils.newInstance(tblDesc.get(alias).getDeserializerClass(), null);
SerDeUtils.initializeSerDe(serializer, null, tblDesc.get(alias).getProperties(), null);
tblSerializers.put((byte) i, serializer);
} catch (SerDeException e) {
LOG.error("Skewjoin will be disabled due to " + e.getMessage(), e);
joinOp.handleSkewJoin = false;
break;
}
boolean hasFilter = filterMap != null && filterMap[i] != null;
TableDesc valTblDesc = JoinUtil.getSpillTableDesc(alias, joinOp.spillTableDesc, conf, !hasFilter);
List<String> valColNames = new ArrayList<String>();
if (valTblDesc != null) {
valColNames = Utilities.getColumnNames(valTblDesc.getProperties());
}
StructObjectInspector structTblValInpector = ObjectInspectorFactory.getStandardStructObjectInspector(valColNames, joinOp.joinValuesStandardObjectInspectors[i]);
StructObjectInspector structTblInpector = ObjectInspectorFactory.getUnionStructObjectInspector(Arrays.asList(structTblValInpector, structTblKeyInpector));
skewKeysTableObjectInspector.put((byte) i, structTblInpector);
}
// reset rowcontainer's serde, objectinspector, and tableDesc.
for (int i = 0; i < numAliases; i++) {
Byte alias = conf.getTagOrder()[i];
RowContainer<ArrayList<Object>> rc = (RowContainer) joinOp.storage[i];
if (rc != null) {
rc.setSerDe(tblSerializers.get((byte) i), skewKeysTableObjectInspector.get((byte) i));
rc.setTableDesc(tblDesc.get(alias));
}
}
}
use of org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector in project hive by apache.
the class MapOperator method setChildren.
public void setChildren(Configuration hconf) throws Exception {
List<Operator<? extends OperatorDesc>> children = new ArrayList<Operator<? extends OperatorDesc>>();
Map<String, Configuration> tableNameToConf = cloneConfsForNestedColPruning(hconf);
Map<TableDesc, StructObjectInspector> convertedOI = getConvertedOI(tableNameToConf);
for (Map.Entry<Path, ArrayList<String>> entry : conf.getPathToAliases().entrySet()) {
Path onefile = entry.getKey();
List<String> aliases = entry.getValue();
PartitionDesc partDesc = conf.getPathToPartitionInfo().get(onefile);
TableDesc tableDesc = partDesc.getTableDesc();
Configuration newConf = tableNameToConf.get(tableDesc.getTableName());
for (String alias : aliases) {
Operator<? extends OperatorDesc> op = conf.getAliasToWork().get(alias);
if (isLogDebugEnabled) {
LOG.debug("Adding alias " + alias + " to work list for file " + onefile);
}
Map<Operator<?>, MapOpCtx> contexts = opCtxMap.get(onefile.toString());
if (contexts == null) {
opCtxMap.put(onefile.toString(), contexts = new LinkedHashMap<Operator<?>, MapOpCtx>());
}
if (contexts.containsKey(op)) {
continue;
}
MapOpCtx context = new MapOpCtx(alias, op, partDesc);
StructObjectInspector tableRowOI = convertedOI.get(partDesc.getTableDesc());
contexts.put(op, initObjectInspector(newConf, context, tableRowOI));
if (children.contains(op) == false) {
op.setParentOperators(new ArrayList<Operator<? extends OperatorDesc>>(1));
op.getParentOperators().add(this);
children.add(op);
}
}
}
initOperatorContext(children);
// we found all the operators that we are supposed to process.
setChildOperators(children);
}
use of org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector in project hive by apache.
the class VectorExpressionWriterFactory method genVectorStructExpressionWritables.
/**
* Compiles the appropriate vector expression writers based on a struct object
* inspector.
*/
public static VectorExpressionWriter[] genVectorStructExpressionWritables(StructObjectInspector oi) throws HiveException {
VectorExpressionWriter[] writers = new VectorExpressionWriter[oi.getAllStructFieldRefs().size()];
final List<? extends StructField> fields = oi.getAllStructFieldRefs();
int i = 0;
for (StructField field : fields) {
writers[i++] = genVectorExpressionWritable(field.getFieldObjectInspector());
}
return writers;
}
use of org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector in project hive by apache.
the class VectorExpressionWriterFactory method processVectorInspector.
/**
* Creates the value writers for an struct object inspector.
* Creates an appropriate output object inspector.
*/
public static void processVectorInspector(StructObjectInspector structObjInspector, SingleOIDClosure closure) throws HiveException {
List<? extends StructField> fields = structObjInspector.getAllStructFieldRefs();
VectorExpressionWriter[] writers = new VectorExpressionWriter[fields.size()];
List<ObjectInspector> oids = new ArrayList<ObjectInspector>(writers.length);
ArrayList<String> columnNames = new ArrayList<String>();
int i = 0;
for (StructField field : fields) {
ObjectInspector fieldObjInsp = TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(TypeInfoUtils.getTypeInfoFromObjectInspector(field.getFieldObjectInspector()));
writers[i] = VectorExpressionWriterFactory.genVectorExpressionWritable(fieldObjInsp);
columnNames.add(field.getFieldName());
oids.add(writers[i].getObjectInspector());
i++;
}
ObjectInspector objectInspector = ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, oids);
closure.assign(writers, objectInspector);
}
Aggregations