use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardStructObjectInspector in project hive by apache.
the class ReduceRecordSource method init.
void init(JobConf jconf, Operator<?> reducer, boolean vectorized, TableDesc keyTableDesc, TableDesc valueTableDesc, Reader reader, boolean handleGroupKey, byte tag, VectorizedRowBatchCtx batchContext, long vectorizedVertexNum) throws Exception {
this.vectorizedVertexNum = vectorizedVertexNum;
ObjectInspector keyObjectInspector;
this.reducer = reducer;
this.vectorized = vectorized;
this.keyTableDesc = keyTableDesc;
if (reader instanceof KeyValueReader) {
this.reader = new KeyValuesFromKeyValue((KeyValueReader) reader);
} else {
this.reader = new KeyValuesFromKeyValues((KeyValuesReader) reader);
}
this.handleGroupKey = handleGroupKey;
this.tag = tag;
try {
inputKeyDeserializer = ReflectionUtils.newInstance(keyTableDesc.getDeserializerClass(), null);
SerDeUtils.initializeSerDe(inputKeyDeserializer, null, keyTableDesc.getProperties(), null);
keyObjectInspector = inputKeyDeserializer.getObjectInspector();
if (vectorized) {
keyStructInspector = (StructObjectInspector) keyObjectInspector;
firstValueColumnOffset = keyStructInspector.getAllStructFieldRefs().size();
}
// We should initialize the SerDe with the TypeInfo when available.
this.valueTableDesc = valueTableDesc;
inputValueDeserializer = (AbstractSerDe) ReflectionUtils.newInstance(valueTableDesc.getDeserializerClass(), null);
SerDeUtils.initializeSerDe(inputValueDeserializer, null, valueTableDesc.getProperties(), null);
valueObjectInspector = inputValueDeserializer.getObjectInspector();
ArrayList<ObjectInspector> ois = new ArrayList<ObjectInspector>();
if (vectorized) {
/* vectorization only works with struct object inspectors */
valueStructInspectors = (StructObjectInspector) valueObjectInspector;
final int totalColumns = firstValueColumnOffset + valueStructInspectors.getAllStructFieldRefs().size();
valueStringWriters = new ArrayList<VectorExpressionWriter>(totalColumns);
valueStringWriters.addAll(Arrays.asList(VectorExpressionWriterFactory.genVectorStructExpressionWritables(keyStructInspector)));
valueStringWriters.addAll(Arrays.asList(VectorExpressionWriterFactory.genVectorStructExpressionWritables(valueStructInspectors)));
rowObjectInspector = Utilities.constructVectorizedReduceRowOI(keyStructInspector, valueStructInspectors);
batch = batchContext.createVectorizedRowBatch();
// Setup vectorized deserialization for the key and value.
BinarySortableSerDe binarySortableSerDe = (BinarySortableSerDe) inputKeyDeserializer;
keyBinarySortableDeserializeToRow = new VectorDeserializeRow<BinarySortableDeserializeRead>(new BinarySortableDeserializeRead(VectorizedBatchUtil.typeInfosFromStructObjectInspector(keyStructInspector), /* useExternalBuffer */
true, binarySortableSerDe.getSortOrders()));
keyBinarySortableDeserializeToRow.init(0);
final int valuesSize = valueStructInspectors.getAllStructFieldRefs().size();
if (valuesSize > 0) {
valueLazyBinaryDeserializeToRow = new VectorDeserializeRow<LazyBinaryDeserializeRead>(new LazyBinaryDeserializeRead(VectorizedBatchUtil.typeInfosFromStructObjectInspector(valueStructInspectors), /* useExternalBuffer */
true));
valueLazyBinaryDeserializeToRow.init(firstValueColumnOffset);
// Create data buffers for value bytes column vectors.
for (int i = firstValueColumnOffset; i < batch.numCols; i++) {
ColumnVector colVector = batch.cols[i];
if (colVector instanceof BytesColumnVector) {
BytesColumnVector bytesColumnVector = (BytesColumnVector) colVector;
bytesColumnVector.initBuffer();
}
}
}
} else {
ois.add(keyObjectInspector);
ois.add(valueObjectInspector);
rowObjectInspector = ObjectInspectorFactory.getStandardStructObjectInspector(Utilities.reduceFieldNameList, ois);
}
} catch (Throwable e) {
abort = true;
if (e instanceof OutOfMemoryError) {
// Don't create a new object if we are already out of memory
throw (OutOfMemoryError) e;
} else {
throw new RuntimeException("Reduce operator initialization failed", e);
}
}
perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.TEZ_INIT_OPERATORS);
}
use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardStructObjectInspector in project hive by apache.
the class MapOperator method initObjectInspector.
private MapOpCtx initObjectInspector(Configuration hconf, MapOpCtx opCtx, StructObjectInspector tableRowOI) throws Exception {
PartitionDesc pd = opCtx.partDesc;
TableDesc td = pd.getTableDesc();
// Use table properties in case of unpartitioned tables,
// and the union of table properties and partition properties, with partition
// taking precedence, in the case of partitioned tables
Properties overlayedProps = SerDeUtils.createOverlayedProperties(td.getProperties(), pd.getProperties());
Map<String, String> partSpec = pd.getPartSpec();
opCtx.tableName = String.valueOf(overlayedProps.getProperty("name"));
opCtx.partName = String.valueOf(partSpec);
opCtx.deserializer = pd.getDeserializer(hconf);
StructObjectInspector partRawRowObjectInspector;
boolean isAcid = AcidUtils.isTablePropertyTransactional(td.getProperties());
if (Utilities.isSchemaEvolutionEnabled(hconf, isAcid) && Utilities.isInputFileFormatSelfDescribing(pd)) {
partRawRowObjectInspector = tableRowOI;
} else {
partRawRowObjectInspector = (StructObjectInspector) opCtx.deserializer.getObjectInspector();
}
opCtx.partTblObjectInspectorConverter = ObjectInspectorConverters.getConverter(partRawRowObjectInspector, tableRowOI);
// Next check if this table has partitions and if so
// get the list of partition names as well as allocate
// the serdes for the partition columns
String pcols = overlayedProps.getProperty(hive_metastoreConstants.META_TABLE_PARTITION_COLUMNS);
if (pcols != null && pcols.length() > 0) {
String[] partKeys = pcols.trim().split("/");
String pcolTypes = overlayedProps.getProperty(hive_metastoreConstants.META_TABLE_PARTITION_COLUMN_TYPES);
String[] partKeyTypes = pcolTypes.trim().split(":");
if (partKeys.length > partKeyTypes.length) {
throw new HiveException("Internal error : partKeys length, " + partKeys.length + " greater than partKeyTypes length, " + partKeyTypes.length);
}
List<String> partNames = new ArrayList<String>(partKeys.length);
Object[] partValues = new Object[partKeys.length];
List<ObjectInspector> partObjectInspectors = new ArrayList<ObjectInspector>(partKeys.length);
for (int i = 0; i < partKeys.length; i++) {
String key = partKeys[i];
partNames.add(key);
ObjectInspector oi = PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(TypeInfoFactory.getPrimitiveTypeInfo(partKeyTypes[i]));
// Partitions do not exist for this table
if (partSpec == null) {
// for partitionless table, initialize partValue to null
partValues[i] = null;
} else {
partValues[i] = ObjectInspectorConverters.getConverter(PrimitiveObjectInspectorFactory.javaStringObjectInspector, oi).convert(partSpec.get(key));
}
partObjectInspectors.add(oi);
}
opCtx.rowWithPart = new Object[] { null, partValues };
opCtx.partObjectInspector = ObjectInspectorFactory.getStandardStructObjectInspector(partNames, partObjectInspectors);
}
// In that case, it will be a Select, but the rowOI need not be amended
if (opCtx.op instanceof TableScanOperator) {
TableScanOperator tsOp = (TableScanOperator) opCtx.op;
TableScanDesc tsDesc = tsOp.getConf();
if (tsDesc != null && tsDesc.hasVirtualCols()) {
opCtx.vcs = tsDesc.getVirtualCols();
opCtx.vcValues = new Object[opCtx.vcs.size()];
opCtx.vcsObjectInspector = VirtualColumn.getVCSObjectInspector(opCtx.vcs);
if (opCtx.isPartitioned()) {
opCtx.rowWithPartAndVC = Arrays.copyOfRange(opCtx.rowWithPart, 0, 3);
} else {
opCtx.rowWithPartAndVC = new Object[2];
}
}
}
if (!opCtx.hasVC() && !opCtx.isPartitioned()) {
opCtx.rowObjectInspector = tableRowOI;
return opCtx;
}
List<StructObjectInspector> inspectors = new ArrayList<StructObjectInspector>();
inspectors.add(tableRowOI);
if (opCtx.isPartitioned()) {
inspectors.add(opCtx.partObjectInspector);
}
if (opCtx.hasVC()) {
inspectors.add(opCtx.vcsObjectInspector);
}
opCtx.rowObjectInspector = ObjectInspectorFactory.getUnionStructObjectInspector(inspectors);
return opCtx;
}
use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardStructObjectInspector in project hive by apache.
the class PartExprEvalUtils method prepareExpr.
public static synchronized ObjectPair<PrimitiveObjectInspector, ExprNodeEvaluator> prepareExpr(ExprNodeGenericFuncDesc expr, List<String> partColumnNames, List<PrimitiveTypeInfo> partColumnTypeInfos) throws HiveException {
// Create the row object
List<ObjectInspector> partObjectInspectors = new ArrayList<ObjectInspector>();
for (int i = 0; i < partColumnNames.size(); i++) {
partObjectInspectors.add(PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(partColumnTypeInfos.get(i)));
}
StructObjectInspector objectInspector = ObjectInspectorFactory.getStandardStructObjectInspector(partColumnNames, partObjectInspectors);
ExprNodeEvaluator evaluator = ExprNodeEvaluatorFactory.get(expr);
ObjectInspector evaluateResultOI = evaluator.initialize(objectInspector);
return ObjectPair.create((PrimitiveObjectInspector) evaluateResultOI, evaluator);
}
use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardStructObjectInspector in project hive by apache.
the class GenericUDTFJSONTuple method initialize.
@Override
public StructObjectInspector initialize(ObjectInspector[] args) throws UDFArgumentException {
inputOIs = args;
numCols = args.length - 1;
jsonObjectCache = new HashCache<>();
if (numCols < 1) {
throw new UDFArgumentException("json_tuple() takes at least two arguments: " + "the json string and a path expression");
}
for (int i = 0; i < args.length; ++i) {
if (args[i].getCategory() != ObjectInspector.Category.PRIMITIVE || !args[i].getTypeName().equals(serdeConstants.STRING_TYPE_NAME)) {
throw new UDFArgumentException("json_tuple()'s arguments have to be string type");
}
}
seenErrors = false;
pathParsed = false;
paths = new String[numCols];
cols = new Text[numCols];
retCols = new Text[numCols];
nullCols = new Object[numCols];
for (int i = 0; i < numCols; ++i) {
cols[i] = new Text();
retCols[i] = cols[i];
nullCols[i] = null;
}
// construct output object inspector
ArrayList<String> fieldNames = new ArrayList<String>(numCols);
ArrayList<ObjectInspector> fieldOIs = new ArrayList<ObjectInspector>(numCols);
for (int i = 0; i < numCols; ++i) {
// column name can be anything since it will be named by UDTF as clause
fieldNames.add("c" + i);
// all returned type will be Text
fieldOIs.add(PrimitiveObjectInspectorFactory.writableStringObjectInspector);
}
return ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldOIs);
}
use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardStructObjectInspector in project hive by apache.
the class GenericUDTFParseUrlTuple method initialize.
@Override
public StructObjectInspector initialize(ObjectInspector[] args) throws UDFArgumentException {
inputOIs = args;
numCols = args.length - 1;
if (numCols < 1) {
throw new UDFArgumentException("parse_url_tuple() takes at least two arguments: " + "the url string and a part name");
}
for (int i = 0; i < args.length; ++i) {
if (args[i].getCategory() != ObjectInspector.Category.PRIMITIVE || !args[i].getTypeName().equals(serdeConstants.STRING_TYPE_NAME)) {
throw new UDFArgumentException("parse_url_tuple()'s arguments have to be string type");
}
}
seenErrors = false;
pathParsed = false;
url = null;
p = null;
lastKey = null;
paths = new String[numCols];
partnames = new PARTNAME[numCols];
cols = new Text[numCols];
retCols = new Text[numCols];
nullCols = new Object[numCols];
for (int i = 0; i < numCols; ++i) {
cols[i] = new Text();
retCols[i] = cols[i];
nullCols[i] = null;
}
// construct output object inspector
ArrayList<String> fieldNames = new ArrayList<String>(numCols);
ArrayList<ObjectInspector> fieldOIs = new ArrayList<ObjectInspector>(numCols);
for (int i = 0; i < numCols; ++i) {
// column name can be anything since it will be named by UDTF as clause
fieldNames.add("c" + i);
// all returned type will be Text
fieldOIs.add(PrimitiveObjectInspectorFactory.writableStringObjectInspector);
}
return ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldOIs);
}
Aggregations