use of org.apache.hadoop.hive.serde2.objectinspector.InspectableObject in project hive by apache.
the class ColumnStatsTask method constructColumnStatsFromPackedRows.
private List<ColumnStatistics> constructColumnStatsFromPackedRows(Hive db) throws HiveException, MetaException, IOException {
String currentDb = SessionState.get().getCurrentDatabase();
String tableName = work.getColStats().getTableName();
String partName = null;
List<String> colName = work.getColStats().getColName();
List<String> colType = work.getColStats().getColType();
boolean isTblLevel = work.getColStats().isTblLevel();
List<ColumnStatistics> stats = new ArrayList<ColumnStatistics>();
InspectableObject packedRow;
Table tbl = db.getTable(currentDb, tableName);
while ((packedRow = ftOp.getNextRow()) != null) {
if (packedRow.oi.getCategory() != ObjectInspector.Category.STRUCT) {
throw new HiveException("Unexpected object type encountered while unpacking row");
}
List<ColumnStatisticsObj> statsObjs = new ArrayList<ColumnStatisticsObj>();
StructObjectInspector soi = (StructObjectInspector) packedRow.oi;
List<? extends StructField> fields = soi.getAllStructFieldRefs();
List<Object> list = soi.getStructFieldsDataAsList(packedRow.o);
List<FieldSchema> partColSchema = tbl.getPartCols();
// Partition columns are appended at end, we only care about stats column
int numOfStatCols = isTblLevel ? fields.size() : fields.size() - partColSchema.size();
for (int i = 0; i < numOfStatCols; i++) {
// Get the field objectInspector, fieldName and the field object.
ObjectInspector foi = fields.get(i).getFieldObjectInspector();
Object f = (list == null ? null : list.get(i));
String fieldName = fields.get(i).getFieldName();
ColumnStatisticsObj statsObj = new ColumnStatisticsObj();
statsObj.setColName(colName.get(i));
statsObj.setColType(colType.get(i));
unpackStructObject(foi, f, fieldName, statsObj);
statsObjs.add(statsObj);
}
if (!isTblLevel) {
List<String> partVals = new ArrayList<String>();
// Iterate over partition columns to figure out partition name
for (int i = fields.size() - partColSchema.size(); i < fields.size(); i++) {
Object partVal = ((PrimitiveObjectInspector) fields.get(i).getFieldObjectInspector()).getPrimitiveJavaObject(list.get(i));
partVals.add(// could be null for default partition
partVal == null ? this.conf.getVar(ConfVars.DEFAULTPARTITIONNAME) : partVal.toString());
}
partName = Warehouse.makePartName(partColSchema, partVals);
}
String[] names = Utilities.getDbTableName(currentDb, tableName);
ColumnStatisticsDesc statsDesc = getColumnStatsDesc(names[0], names[1], partName, isTblLevel);
ColumnStatistics colStats = new ColumnStatistics();
colStats.setStatsDesc(statsDesc);
colStats.setStatsObj(statsObjs);
stats.add(colStats);
}
ftOp.clearFetchContext();
return stats;
}
use of org.apache.hadoop.hive.serde2.objectinspector.InspectableObject in project hive by apache.
the class TestOperators method setUp.
@Override
protected void setUp() {
r = new InspectableObject[5];
ArrayList<String> names = new ArrayList<String>(3);
names.add("col0");
names.add("col1");
names.add("col2");
ArrayList<ObjectInspector> objectInspectors = new ArrayList<ObjectInspector>(3);
objectInspectors.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
objectInspectors.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
objectInspectors.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
for (int i = 0; i < 5; i++) {
ArrayList<String> data = new ArrayList<String>();
data.add("" + i);
data.add("" + (i + 1));
data.add("" + (i + 2));
try {
r[i] = new InspectableObject();
r[i].o = data;
r[i].oi = ObjectInspectorFactory.getStandardStructObjectInspector(names, objectInspectors);
} catch (Throwable e) {
throw new RuntimeException(e);
}
}
}
use of org.apache.hadoop.hive.serde2.objectinspector.InspectableObject in project hive by apache.
the class DummyStoreOperator method initializeOp.
@Override
protected void initializeOp(Configuration hconf) throws HiveException {
super.initializeOp(hconf);
/*
* The conversion to standard object inspector was necessitated by HIVE-5973. The issue
* happens when a select operator preceeds this operator as in the case of a subquery. The
* select operator does not allocate a new object to hold the deserialized row. This affects
* the operation of the SMB join which puts the object in a priority queue. Since all elements
* of the priority queue point to the same object, the join was resulting in incorrect
* results.
*
* So the fix is to make a copy of the object as done in the processOp phase below. This
* however necessitates a change in the object inspector that can be used in processing the
* row downstream.
*/
outputObjInspector = ObjectInspectorUtils.getStandardObjectInspector(inputObjInspectors[0]);
result = new InspectableObject(null, outputObjInspector);
}
use of org.apache.hadoop.hive.serde2.objectinspector.InspectableObject in project hive by apache.
the class FetchOperator method getNextRow.
/**
* Get the next row. The fetch context is modified appropriately.
*
**/
public InspectableObject getNextRow() throws IOException {
try {
while (true) {
boolean opNotEOF = true;
if (context != null) {
context.resetRow();
}
if (currRecReader == null) {
currRecReader = getRecordReader();
if (currRecReader == null) {
return null;
}
/**
* Start reading a new file.
* If file contains header, skip header lines before reading the records.
* If file contains footer, used FooterBuffer to cache and remove footer
* records at the end of the file.
*/
headerCount = Utilities.getHeaderCount(currDesc.getTableDesc());
footerCount = Utilities.getFooterCount(currDesc.getTableDesc(), job);
// Skip header lines.
opNotEOF = Utilities.skipHeader(currRecReader, headerCount, key, value);
// Initialize footer buffer.
if (opNotEOF && footerCount > 0) {
footerBuffer = new FooterBuffer();
opNotEOF = footerBuffer.initializeBuffer(job, currRecReader, footerCount, key, value);
}
}
if (opNotEOF && footerBuffer == null) {
/**
* When file doesn't end after skipping header line
* and there is no footer lines, read normally.
*/
opNotEOF = currRecReader.next(key, value);
}
if (opNotEOF && footerBuffer != null) {
opNotEOF = footerBuffer.updateBuffer(job, currRecReader, key, value);
}
if (opNotEOF) {
if (operator != null && context != null && context.inputFileChanged()) {
// The child operators cleanup if input file has changed
operator.cleanUpInputFileChanged();
}
if (hasVC) {
row[isPartitioned ? 2 : 1] = MapOperator.populateVirtualColumnValues(context, vcCols, vcValues, currSerDe);
}
Object deserialized = currSerDe.deserialize(value);
if (ObjectConverter != null) {
deserialized = ObjectConverter.convert(deserialized);
}
if (hasVC || isPartitioned) {
row[0] = deserialized;
inspectable.o = row;
} else {
inspectable.o = deserialized;
}
inspectable.oi = currSerDe.getObjectInspector();
return inspectable;
} else {
currRecReader.close();
currRecReader = null;
}
}
} catch (Exception e) {
throw new IOException(e);
}
}
Aggregations