use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils in project hive by apache.
the class ReduceSinkOperator method initializeOp.
@Override
protected void initializeOp(Configuration hconf) throws HiveException {
super.initializeOp(hconf);
try {
numRows = 0;
cntr = 1;
logEveryNRows = HiveConf.getLongVar(hconf, HiveConf.ConfVars.HIVE_LOG_N_RECORDS);
List<ExprNodeDesc> keys = conf.getKeyCols();
if (LOG.isDebugEnabled()) {
LOG.debug("keys size is " + keys.size());
for (ExprNodeDesc k : keys) {
LOG.debug("Key exprNodeDesc " + k.getExprString());
}
}
keyEval = new ExprNodeEvaluator[keys.size()];
int i = 0;
for (ExprNodeDesc e : keys) {
if (e instanceof ExprNodeGenericFuncDesc && ((ExprNodeGenericFuncDesc) e).getGenericUDF() instanceof GenericUDFBucketNumber) {
buckColIdxInKeyForSdpo = i;
}
keyEval[i++] = ExprNodeEvaluatorFactory.get(e);
}
numDistributionKeys = conf.getNumDistributionKeys();
distinctColIndices = conf.getDistinctColumnIndices();
numDistinctExprs = distinctColIndices.size();
valueEval = new ExprNodeEvaluator[conf.getValueCols().size()];
i = 0;
for (ExprNodeDesc e : conf.getValueCols()) {
valueEval[i++] = ExprNodeEvaluatorFactory.get(e);
}
partitionEval = new ExprNodeEvaluator[conf.getPartitionCols().size()];
i = 0;
for (ExprNodeDesc e : conf.getPartitionCols()) {
int index = ExprNodeDescUtils.indexOf(e, keys);
partitionEval[i++] = index < 0 ? ExprNodeEvaluatorFactory.get(e) : keyEval[index];
}
if (conf.getBucketCols() != null && !conf.getBucketCols().isEmpty()) {
bucketEval = new ExprNodeEvaluator[conf.getBucketCols().size()];
i = 0;
for (ExprNodeDesc e : conf.getBucketCols()) {
int index = ExprNodeDescUtils.indexOf(e, keys);
bucketEval[i++] = index < 0 ? ExprNodeEvaluatorFactory.get(e) : keyEval[index];
}
buckColIdxInKey = conf.getPartitionCols().size();
}
tag = conf.getTag();
tagByte[0] = (byte) tag;
skipTag = conf.getSkipTag();
LOG.info("Using tag = " + tag);
TableDesc keyTableDesc = conf.getKeySerializeInfo();
AbstractSerDe keySerDe = keyTableDesc.getSerDeClass().newInstance();
keySerDe.initialize(null, keyTableDesc.getProperties(), null);
keySerializer = keySerDe;
keyIsText = keySerializer.getSerializedClass().equals(Text.class);
TableDesc valueTableDesc = conf.getValueSerializeInfo();
AbstractSerDe valueSerDe = valueTableDesc.getSerDeClass().newInstance();
valueSerDe.initialize(null, valueTableDesc.getProperties(), null);
valueSerializer = valueSerDe;
int limit = conf.getTopN();
float memUsage = conf.getTopNMemoryUsage();
if (limit >= 0 && memUsage > 0) {
reducerHash = conf.isPTFReduceSink() ? new PTFTopNHash() : new TopNHash();
reducerHash.initialize(limit, memUsage, conf.isMapGroupBy(), this, conf, hconf);
}
useUniformHash = conf.getReducerTraits().contains(UNIFORM);
firstRow = true;
// acidOp flag has to be checked to use JAVA hash which works like
// identity function for integers, necessary to read RecordIdentifier
// incase of ACID updates/deletes.
boolean acidOp = conf.getWriteType() == AcidUtils.Operation.UPDATE || conf.getWriteType() == AcidUtils.Operation.DELETE;
hashFunc = getConf().getBucketingVersion() == 2 && !acidOp ? ObjectInspectorUtils::getBucketHashCode : ObjectInspectorUtils::getBucketHashCodeOld;
} catch (Exception e) {
String msg = "Error initializing ReduceSinkOperator: " + e.getMessage();
LOG.error(msg, e);
throw new RuntimeException(e);
}
}
use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils in project hive by apache.
the class FileSinkOperator method initializeOp.
@Override
protected void initializeOp(Configuration hconf) throws HiveException {
super.initializeOp(hconf);
try {
this.hconf = hconf;
filesCreated = false;
isTemporary = conf.isTemporary();
multiFileSpray = conf.isMultiFileSpray();
this.isBucketed = hconf.getInt(hive_metastoreConstants.BUCKET_COUNT, 0) > 0;
totalFiles = conf.getTotalFiles();
numFiles = conf.getNumFiles();
dpCtx = conf.getDynPartCtx();
lbCtx = conf.getLbCtx();
fsp = prevFsp = null;
valToPaths = new HashMap<String, FSPaths>();
taskId = originalTaskId = Utilities.getTaskId(hconf);
initializeSpecPath();
fs = specPath.getFileSystem(hconf);
if (hconf instanceof JobConf) {
jc = (JobConf) hconf;
} else {
// test code path
jc = new JobConf(hconf);
}
try {
createHiveOutputFormat(jc);
} catch (HiveException ex) {
logOutputFormatError(hconf, ex);
throw ex;
}
isCompressed = conf.getCompressed();
if (conf.isLinkedFileSink() && conf.isDirectInsert()) {
parent = Utilities.toTempPath(conf.getFinalDirName());
} else {
parent = Utilities.toTempPath(conf.getDirName());
}
statsFromRecordWriter = new boolean[numFiles];
AbstractSerDe serde = conf.getTableInfo().getSerDeClass().newInstance();
serde.initialize(unsetNestedColumnPaths(hconf), conf.getTableInfo().getProperties(), null);
serializer = serde;
outputClass = serializer.getSerializedClass();
destTablePath = conf.getDestPath();
isInsertOverwrite = conf.getInsertOverwrite();
counterGroup = HiveConf.getVar(hconf, HiveConf.ConfVars.HIVECOUNTERGROUP);
LOG.info("Using serializer : " + serializer + " and formatter : " + hiveOutputFormat + (isCompressed ? " with compression" : ""));
// Timeout is chosen to make sure that even if one iteration takes more than
// half of the script.timeout but less than script.timeout, we will still
// be able to report progress.
timeOut = hconf.getInt("mapred.healthChecker.script.timeout", 600000) / 2;
if (multiFileSpray) {
partitionEval = new ExprNodeEvaluator[conf.getPartitionCols().size()];
int i = 0;
for (ExprNodeDesc e : conf.getPartitionCols()) {
partitionEval[i++] = ExprNodeEvaluatorFactory.get(e);
}
partitionObjectInspectors = initEvaluators(partitionEval, outputObjInspector);
prtner = (HivePartitioner<HiveKey, Object>) ReflectionUtils.newInstance(jc.getPartitionerClass(), null);
}
if (dpCtx != null && !inspectPartitionValues()) {
dpSetup();
}
if (lbCtx != null) {
lbSetup();
}
if (!bDynParts) {
fsp = new FSPaths(specPath, conf.isMmTable(), conf.isDirectInsert(), conf.getInsertOverwrite(), conf.getAcidOperation());
fsp.subdirAfterTxn = combinePathFragments(generateListBucketingDirName(null), unionPath);
if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) {
Utilities.FILE_OP_LOGGER.trace("creating new paths " + System.identityHashCode(fsp) + " from ctor; childSpec " + unionPath + ": tmpPath " + fsp.buildTmpPath() + ", task path " + fsp.buildTaskOutputTempPath());
}
// createBucketFiles(fsp);
if (!this.isSkewedStoredAsSubDirectories) {
// special entry for non-DP case
valToPaths.put("", fsp);
}
}
final StoragePolicyValue tmpStorage = StoragePolicyValue.lookup(HiveConf.getVar(hconf, HIVE_TEMPORARY_TABLE_STORAGE));
if (isTemporary && fsp != null && tmpStorage != StoragePolicyValue.DEFAULT) {
// Not supported for temp tables.
assert !conf.isMmTable();
final Path outputPath = fsp.buildTaskOutputTempPath();
StoragePolicyShim shim = ShimLoader.getHadoopShims().getStoragePolicyShim(fs);
if (shim != null) {
// directory creation is otherwise within the writers
fs.mkdirs(outputPath);
shim.setStoragePolicy(outputPath, tmpStorage);
}
}
if (conf.getWriteType() == AcidUtils.Operation.UPDATE || conf.getWriteType() == AcidUtils.Operation.DELETE) {
// ROW__ID is always in the first field
recIdField = ((StructObjectInspector) outputObjInspector).getAllStructFieldRefs().get(0);
recIdInspector = (StructObjectInspector) recIdField.getFieldObjectInspector();
// bucket is the second field in the record id
bucketField = recIdInspector.getAllStructFieldRefs().get(1);
bucketInspector = (IntObjectInspector) bucketField.getFieldObjectInspector();
}
numRows = 0;
cntr = 1;
logEveryNRows = HiveConf.getLongVar(hconf, HiveConf.ConfVars.HIVE_LOG_N_RECORDS);
statsMap.put(getCounterName(Counter.RECORDS_OUT), row_count);
// Setup hashcode
hashFunc = conf.getTableInfo().getBucketingVersion() == 2 ? ObjectInspectorUtils::getBucketHashCode : ObjectInspectorUtils::getBucketHashCodeOld;
// This count is used to get total number of rows in an insert query.
if (conf.getTableInfo() != null && conf.getTableInfo().getTableName() != null) {
statsMap.put(TOTAL_TABLE_ROWS_WRITTEN, row_count);
}
} catch (HiveException e) {
throw e;
} catch (Exception e) {
throw new HiveException(e);
}
}
use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils in project hive by apache.
the class VectorReduceSinkObjectHashOperator method initializeOp.
@Override
protected void initializeOp(Configuration hconf) throws HiveException {
super.initializeOp(hconf);
VectorExpression.doTransientInit(reduceSinkBucketExpressions, hconf);
VectorExpression.doTransientInit(reduceSinkPartitionExpressions, hconf);
if (!isEmptyKey) {
// For this variation, we serialize the key without caring if it single Long,
// single String, multi-key, etc.
keyOutput = new Output();
keyBinarySortableSerializeWrite.set(keyOutput);
keyVectorSerializeRow = new VectorSerializeRow<BinarySortableSerializeWrite>(keyBinarySortableSerializeWrite);
keyVectorSerializeRow.init(reduceSinkKeyTypeInfos, reduceSinkKeyColumnMap);
}
if (isEmptyBuckets) {
numBuckets = 0;
} else {
numBuckets = conf.getNumBuckets();
bucketObjectInspectors = getObjectInspectorArray(reduceSinkBucketTypeInfos);
bucketVectorExtractRow = new VectorExtractRow();
bucketVectorExtractRow.init(reduceSinkBucketTypeInfos, reduceSinkBucketColumnMap);
bucketFieldValues = new Object[reduceSinkBucketTypeInfos.length];
}
if (isEmptyPartitions) {
nonPartitionRandom = new Random(12345);
} else {
partitionObjectInspectors = getObjectInspectorArray(reduceSinkPartitionTypeInfos);
partitionVectorExtractRow = new VectorExtractRow();
partitionVectorExtractRow.init(reduceSinkPartitionTypeInfos, reduceSinkPartitionColumnMap);
partitionFieldValues = new Object[reduceSinkPartitionTypeInfos.length];
}
// Set hashFunc
hashFunc = getConf().getBucketingVersion() == 2 && !vectorDesc.getIsAcidChange() ? ObjectInspectorUtils::getBucketHashCode : ObjectInspectorUtils::getBucketHashCodeOld;
// Set function to evaluate _bucket_number if needed.
if (reduceSinkKeyExpressions != null) {
for (VectorExpression ve : reduceSinkKeyExpressions) {
if (ve instanceof BucketNumExpression) {
bucketExpr = (BucketNumExpression) ve;
break;
}
}
}
}
use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils in project hive by apache.
the class TestLazyBinarySerDe method compareDiffSizedStructs.
/**
* Compare two structs that have different number of fields. We just compare
* the first few common fields, ignoring the fields existing in one struct but
* not the other.
*
* @see ObjectInspectorUtils#compare(Object, ObjectInspector, Object,
* ObjectInspector)
*/
int compareDiffSizedStructs(Object o1, ObjectInspector oi1, Object o2, ObjectInspector oi2) {
StructObjectInspector soi1 = (StructObjectInspector) oi1;
StructObjectInspector soi2 = (StructObjectInspector) oi2;
List<? extends StructField> fields1 = soi1.getAllStructFieldRefs();
List<? extends StructField> fields2 = soi2.getAllStructFieldRefs();
int minimum = Math.min(fields1.size(), fields2.size());
for (int i = 0; i < minimum; i++) {
int result = ObjectInspectorUtils.compare(soi1.getStructFieldData(o1, fields1.get(i)), fields1.get(i).getFieldObjectInspector(), soi2.getStructFieldData(o2, fields2.get(i)), fields2.get(i).getFieldObjectInspector());
if (result != 0) {
return result;
}
}
return 0;
}
Aggregations