use of org.apache.hadoop.hive.ql.plan.TableDesc in project hive by apache.
the class Driver method acquireLocks.
/**
* Acquire read and write locks needed by the statement. The list of objects to be locked are
* obtained from the inputs and outputs populated by the compiler. Locking strategy depends on
* HiveTxnManager and HiveLockManager configured
*
* This method also records the list of valid transactions. This must be done after any
* transactions have been opened.
* @throws CommandProcessorResponse
*/
private void acquireLocks() throws CommandProcessorResponse {
PerfLogger perfLogger = SessionState.getPerfLogger();
perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.ACQUIRE_READ_WRITE_LOCKS);
if (!queryTxnMgr.isTxnOpen() && queryTxnMgr.supportsAcid()) {
/*non acid txn managers don't support txns but fwd lock requests to lock managers
acid txn manager requires all locks to be associated with a txn so if we
end up here w/o an open txn it's because we are processing something like "use <database>
which by definition needs no locks*/
return;
}
try {
String userFromUGI = getUserFromUGI();
if (userFromUGI == null) {
throw createProcessorResponse(10);
}
// Set the table write id in all of the acid file sinks
if (haveAcidWrite()) {
List<FileSinkDesc> acidSinks = new ArrayList<>(plan.getAcidSinks());
// sorting makes tests easier to write since file names and ROW__IDs depend on statementId
// so this makes (file name -> data) mapping stable
acidSinks.sort((FileSinkDesc fsd1, FileSinkDesc fsd2) -> fsd1.getDirName().compareTo(fsd2.getDirName()));
for (FileSinkDesc desc : acidSinks) {
TableDesc tableInfo = desc.getTableInfo();
long writeId = queryTxnMgr.getTableWriteId(Utilities.getDatabaseName(tableInfo.getTableName()), Utilities.getTableName(tableInfo.getTableName()));
desc.setTableWriteId(writeId);
// it's possible to have > 1 FileSink writing to the same table/partition
// e.g. Merge stmt, multi-insert stmt when mixing DP and SP writes
desc.setStatementId(queryTxnMgr.getStmtIdAndIncrement());
}
}
/*It's imperative that {@code acquireLocks()} is called for all commands so that
HiveTxnManager can transition its state machine correctly*/
queryTxnMgr.acquireLocks(plan, ctx, userFromUGI, lDrvState);
if (queryTxnMgr.recordSnapshot(plan)) {
recordValidTxns(queryTxnMgr);
}
if (plan.hasAcidResourcesInQuery()) {
recordValidWriteIds(queryTxnMgr);
}
} catch (Exception e) {
errorMessage = "FAILED: Error in acquiring locks: " + e.getMessage();
SQLState = ErrorMsg.findSQLState(e.getMessage());
downstreamError = e;
console.printError(errorMessage, "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
throw createProcessorResponse(10);
} finally {
perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.ACQUIRE_READ_WRITE_LOCKS);
}
}
use of org.apache.hadoop.hive.ql.plan.TableDesc in project hive by apache.
the class Driver method getSchema.
/**
* Get a Schema with fields represented with native Hive types
*/
private static Schema getSchema(BaseSemanticAnalyzer sem, HiveConf conf) {
Schema schema = null;
// give up.
if (sem == null) {
// can't get any info without a plan
} else if (sem.getResultSchema() != null) {
List<FieldSchema> lst = sem.getResultSchema();
schema = new Schema(lst, null);
} else if (sem.getFetchTask() != null) {
FetchTask ft = sem.getFetchTask();
TableDesc td = ft.getTblDesc();
// deserializer.
if (td == null && ft.getWork() != null && ft.getWork().getPartDesc() != null) {
if (ft.getWork().getPartDesc().size() > 0) {
td = ft.getWork().getPartDesc().get(0).getTableDesc();
}
}
if (td == null) {
LOG.info("No returning schema.");
} else {
String tableName = "result";
List<FieldSchema> lst = null;
try {
lst = HiveMetaStoreUtils.getFieldsFromDeserializer(tableName, td.getDeserializer(conf));
} catch (Exception e) {
LOG.warn("Error getting schema: " + org.apache.hadoop.util.StringUtils.stringifyException(e));
}
if (lst != null) {
schema = new Schema(lst, null);
}
}
}
if (schema == null) {
schema = new Schema();
}
LOG.info("Returning Hive schema: " + schema);
return schema;
}
use of org.apache.hadoop.hive.ql.plan.TableDesc in project hive by apache.
the class SemanticAnalyzer method genScriptPlan.
@SuppressWarnings("nls")
private Operator genScriptPlan(ASTNode trfm, QB qb, Operator input) throws SemanticException {
// If there is no "AS" clause, the output schema will be "key,value"
ArrayList<ColumnInfo> outputCols = new ArrayList<ColumnInfo>();
int inputSerDeNum = 1, inputRecordWriterNum = 2;
int outputSerDeNum = 4, outputRecordReaderNum = 5;
int outputColsNum = 6;
boolean outputColNames = false, outputColSchemas = false;
int execPos = 3;
boolean defaultOutputCols = false;
// Go over all the children
if (trfm.getChildCount() > outputColsNum) {
ASTNode outCols = (ASTNode) trfm.getChild(outputColsNum);
if (outCols.getType() == HiveParser.TOK_ALIASLIST) {
outputColNames = true;
} else if (outCols.getType() == HiveParser.TOK_TABCOLLIST) {
outputColSchemas = true;
}
}
// If column type is not specified, use a string
if (!outputColNames && !outputColSchemas) {
String intName = getColumnInternalName(0);
ColumnInfo colInfo = new ColumnInfo(intName, TypeInfoFactory.stringTypeInfo, null, false);
colInfo.setAlias("key");
outputCols.add(colInfo);
intName = getColumnInternalName(1);
colInfo = new ColumnInfo(intName, TypeInfoFactory.stringTypeInfo, null, false);
colInfo.setAlias("value");
outputCols.add(colInfo);
defaultOutputCols = true;
} else {
ASTNode collist = (ASTNode) trfm.getChild(outputColsNum);
int ccount = collist.getChildCount();
Set<String> colAliasNamesDuplicateCheck = new HashSet<String>();
if (outputColNames) {
for (int i = 0; i < ccount; ++i) {
String colAlias = unescapeIdentifier(((ASTNode) collist.getChild(i)).getText()).toLowerCase();
failIfColAliasExists(colAliasNamesDuplicateCheck, colAlias);
String intName = getColumnInternalName(i);
ColumnInfo colInfo = new ColumnInfo(intName, TypeInfoFactory.stringTypeInfo, null, false);
colInfo.setAlias(colAlias);
outputCols.add(colInfo);
}
} else {
for (int i = 0; i < ccount; ++i) {
ASTNode child = (ASTNode) collist.getChild(i);
assert child.getType() == HiveParser.TOK_TABCOL;
String colAlias = unescapeIdentifier(((ASTNode) child.getChild(0)).getText()).toLowerCase();
failIfColAliasExists(colAliasNamesDuplicateCheck, colAlias);
String intName = getColumnInternalName(i);
ColumnInfo colInfo = new ColumnInfo(intName, TypeInfoUtils.getTypeInfoFromTypeString(getTypeStringFromAST((ASTNode) child.getChild(1))), null, false);
colInfo.setAlias(colAlias);
outputCols.add(colInfo);
}
}
}
RowResolver out_rwsch = new RowResolver();
StringBuilder columns = new StringBuilder();
StringBuilder columnTypes = new StringBuilder();
for (int i = 0; i < outputCols.size(); ++i) {
if (i != 0) {
columns.append(",");
columnTypes.append(",");
}
columns.append(outputCols.get(i).getInternalName());
columnTypes.append(outputCols.get(i).getType().getTypeName());
out_rwsch.put(qb.getParseInfo().getAlias(), outputCols.get(i).getAlias(), outputCols.get(i));
}
StringBuilder inpColumns = new StringBuilder();
StringBuilder inpColumnTypes = new StringBuilder();
ArrayList<ColumnInfo> inputSchema = opParseCtx.get(input).getRowResolver().getColumnInfos();
for (int i = 0; i < inputSchema.size(); ++i) {
if (i != 0) {
inpColumns.append(",");
inpColumnTypes.append(",");
}
inpColumns.append(inputSchema.get(i).getInternalName());
inpColumnTypes.append(inputSchema.get(i).getType().getTypeName());
}
TableDesc outInfo;
TableDesc errInfo;
TableDesc inInfo;
String defaultSerdeName = conf.getVar(HiveConf.ConfVars.HIVESCRIPTSERDE);
Class<? extends Deserializer> serde;
try {
serde = (Class<? extends Deserializer>) Class.forName(defaultSerdeName, true, Utilities.getSessionSpecifiedClassLoader());
} catch (ClassNotFoundException e) {
throw new SemanticException(e);
}
int fieldSeparator = Utilities.tabCode;
if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVESCRIPTESCAPE)) {
fieldSeparator = Utilities.ctrlaCode;
}
// Input and Output Serdes
if (trfm.getChild(inputSerDeNum).getChildCount() > 0) {
inInfo = getTableDescFromSerDe((ASTNode) (((ASTNode) trfm.getChild(inputSerDeNum))).getChild(0), inpColumns.toString(), inpColumnTypes.toString(), false);
} else {
inInfo = PlanUtils.getTableDesc(serde, Integer.toString(fieldSeparator), inpColumns.toString(), inpColumnTypes.toString(), false, true);
}
if (trfm.getChild(outputSerDeNum).getChildCount() > 0) {
outInfo = getTableDescFromSerDe((ASTNode) (((ASTNode) trfm.getChild(outputSerDeNum))).getChild(0), columns.toString(), columnTypes.toString(), false);
// This is for backward compatibility. If the user did not specify the
// output column list, we assume that there are 2 columns: key and value.
// However, if the script outputs: col1, col2, col3 seperated by TAB, the
// requirement is: key is col and value is (col2 TAB col3)
} else {
outInfo = PlanUtils.getTableDesc(serde, Integer.toString(fieldSeparator), columns.toString(), columnTypes.toString(), defaultOutputCols);
}
// Error stream always uses the default serde with a single column
errInfo = PlanUtils.getTableDesc(serde, Integer.toString(Utilities.tabCode), "KEY");
// Output record readers
Class<? extends RecordReader> outRecordReader = getRecordReader((ASTNode) trfm.getChild(outputRecordReaderNum));
Class<? extends RecordWriter> inRecordWriter = getRecordWriter((ASTNode) trfm.getChild(inputRecordWriterNum));
Class<? extends RecordReader> errRecordReader = getDefaultRecordReader();
Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild(new ScriptDesc(fetchFilesNotInLocalFilesystem(stripQuotes(trfm.getChild(execPos).getText())), inInfo, inRecordWriter, outInfo, outRecordReader, errRecordReader, errInfo), new RowSchema(out_rwsch.getColumnInfos()), input), out_rwsch);
// disable backtracking
output.setColumnExprMap(new HashMap<String, ExprNodeDesc>());
// Add URI entity for transform script. script assumed t be local unless downloadable
if (conf.getBoolVar(ConfVars.HIVE_CAPTURE_TRANSFORM_ENTITY)) {
String scriptCmd = getScriptProgName(stripQuotes(trfm.getChild(execPos).getText()));
getInputs().add(new ReadEntity(new Path(scriptCmd), ResourceDownloader.isFileUri(scriptCmd)));
}
return output;
}
use of org.apache.hadoop.hive.ql.plan.TableDesc in project hive by apache.
the class Vectorizer method canSpecializeReduceSink.
private boolean canSpecializeReduceSink(ReduceSinkDesc desc, boolean isTezOrSpark, VectorizationContext vContext, VectorReduceSinkDesc vectorDesc) throws HiveException {
VectorReduceSinkInfo vectorReduceSinkInfo = new VectorReduceSinkInfo();
// Various restrictions.
// Set this if we encounter a condition we were not expecting.
boolean isUnexpectedCondition = false;
boolean isVectorizationReduceSinkNativeEnabled = HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_VECTORIZATION_REDUCESINK_NEW_ENABLED);
String engine = HiveConf.getVar(hiveConf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE);
int limit = desc.getTopN();
float memUsage = desc.getTopNMemoryUsage();
boolean hasPTFTopN = (limit >= 0 && memUsage > 0 && desc.isPTFReduceSink());
boolean hasDistinctColumns = (desc.getDistinctColumnIndices().size() > 0);
TableDesc keyTableDesc = desc.getKeySerializeInfo();
Class<? extends Deserializer> keySerializerClass = keyTableDesc.getDeserializerClass();
boolean isKeyBinarySortable = (keySerializerClass == org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe.class);
TableDesc valueTableDesc = desc.getValueSerializeInfo();
Class<? extends Deserializer> valueDeserializerClass = valueTableDesc.getDeserializerClass();
boolean isValueLazyBinary = (valueDeserializerClass == org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe.class);
// We are doing work here we'd normally do in VectorGroupByCommonOperator's constructor.
// So if we later decide not to specialize, we'll just waste any scratch columns allocated...
List<ExprNodeDesc> keysDescs = desc.getKeyCols();
final boolean isEmptyKey = (keysDescs.size() == 0);
if (!isEmptyKey) {
VectorExpression[] allKeyExpressions = vContext.getVectorExpressions(keysDescs);
final int[] reduceSinkKeyColumnMap = new int[allKeyExpressions.length];
final TypeInfo[] reduceSinkKeyTypeInfos = new TypeInfo[allKeyExpressions.length];
final Type[] reduceSinkKeyColumnVectorTypes = new Type[allKeyExpressions.length];
final VectorExpression[] reduceSinkKeyExpressions;
// Since a key expression can be a calculation and the key will go into a scratch column,
// we need the mapping and type information.
ArrayList<VectorExpression> groupByKeyExpressionsList = new ArrayList<VectorExpression>();
for (int i = 0; i < reduceSinkKeyColumnMap.length; i++) {
VectorExpression ve = allKeyExpressions[i];
reduceSinkKeyColumnMap[i] = ve.getOutputColumnNum();
reduceSinkKeyTypeInfos[i] = keysDescs.get(i).getTypeInfo();
reduceSinkKeyColumnVectorTypes[i] = VectorizationContext.getColumnVectorTypeFromTypeInfo(reduceSinkKeyTypeInfos[i]);
if (!IdentityExpression.isColumnOnly(ve)) {
groupByKeyExpressionsList.add(ve);
}
}
if (groupByKeyExpressionsList.size() == 0) {
reduceSinkKeyExpressions = null;
} else {
reduceSinkKeyExpressions = groupByKeyExpressionsList.toArray(new VectorExpression[0]);
}
vectorReduceSinkInfo.setReduceSinkKeyColumnMap(reduceSinkKeyColumnMap);
vectorReduceSinkInfo.setReduceSinkKeyTypeInfos(reduceSinkKeyTypeInfos);
vectorReduceSinkInfo.setReduceSinkKeyColumnVectorTypes(reduceSinkKeyColumnVectorTypes);
vectorReduceSinkInfo.setReduceSinkKeyExpressions(reduceSinkKeyExpressions);
}
ArrayList<ExprNodeDesc> valueDescs = desc.getValueCols();
final boolean isEmptyValue = (valueDescs.size() == 0);
if (!isEmptyValue) {
VectorExpression[] allValueExpressions = vContext.getVectorExpressions(valueDescs);
final int[] reduceSinkValueColumnMap = new int[allValueExpressions.length];
final TypeInfo[] reduceSinkValueTypeInfos = new TypeInfo[allValueExpressions.length];
final Type[] reduceSinkValueColumnVectorTypes = new Type[allValueExpressions.length];
VectorExpression[] reduceSinkValueExpressions;
ArrayList<VectorExpression> reduceSinkValueExpressionsList = new ArrayList<VectorExpression>();
for (int i = 0; i < valueDescs.size(); ++i) {
VectorExpression ve = allValueExpressions[i];
reduceSinkValueColumnMap[i] = ve.getOutputColumnNum();
reduceSinkValueTypeInfos[i] = valueDescs.get(i).getTypeInfo();
reduceSinkValueColumnVectorTypes[i] = VectorizationContext.getColumnVectorTypeFromTypeInfo(reduceSinkValueTypeInfos[i]);
if (!IdentityExpression.isColumnOnly(ve)) {
reduceSinkValueExpressionsList.add(ve);
}
}
if (reduceSinkValueExpressionsList.size() == 0) {
reduceSinkValueExpressions = null;
} else {
reduceSinkValueExpressions = reduceSinkValueExpressionsList.toArray(new VectorExpression[0]);
}
vectorReduceSinkInfo.setReduceSinkValueColumnMap(reduceSinkValueColumnMap);
vectorReduceSinkInfo.setReduceSinkValueTypeInfos(reduceSinkValueTypeInfos);
vectorReduceSinkInfo.setReduceSinkValueColumnVectorTypes(reduceSinkValueColumnVectorTypes);
vectorReduceSinkInfo.setReduceSinkValueExpressions(reduceSinkValueExpressions);
}
boolean useUniformHash = desc.getReducerTraits().contains(UNIFORM);
vectorReduceSinkInfo.setUseUniformHash(useUniformHash);
List<ExprNodeDesc> bucketDescs = desc.getBucketCols();
final boolean isEmptyBuckets = (bucketDescs == null || bucketDescs.size() == 0);
List<ExprNodeDesc> partitionDescs = desc.getPartitionCols();
final boolean isEmptyPartitions = (partitionDescs == null || partitionDescs.size() == 0);
if (useUniformHash || (isEmptyKey && isEmptyBuckets && isEmptyPartitions)) {
// NOTE: For Uniform Hash or no buckets/partitions, when the key is empty, we will use the VectorReduceSinkEmptyKeyOperator instead.
} else {
// Collect bucket and/or partition information for object hashing.
int[] reduceSinkBucketColumnMap = null;
TypeInfo[] reduceSinkBucketTypeInfos = null;
Type[] reduceSinkBucketColumnVectorTypes = null;
VectorExpression[] reduceSinkBucketExpressions = null;
if (!isEmptyBuckets) {
VectorExpression[] allBucketExpressions = vContext.getVectorExpressions(bucketDescs);
reduceSinkBucketColumnMap = new int[bucketDescs.size()];
reduceSinkBucketTypeInfos = new TypeInfo[bucketDescs.size()];
reduceSinkBucketColumnVectorTypes = new Type[bucketDescs.size()];
ArrayList<VectorExpression> reduceSinkBucketExpressionsList = new ArrayList<VectorExpression>();
for (int i = 0; i < bucketDescs.size(); ++i) {
VectorExpression ve = allBucketExpressions[i];
reduceSinkBucketColumnMap[i] = ve.getOutputColumnNum();
reduceSinkBucketTypeInfos[i] = bucketDescs.get(i).getTypeInfo();
reduceSinkBucketColumnVectorTypes[i] = VectorizationContext.getColumnVectorTypeFromTypeInfo(reduceSinkBucketTypeInfos[i]);
if (!IdentityExpression.isColumnOnly(ve)) {
reduceSinkBucketExpressionsList.add(ve);
}
}
if (reduceSinkBucketExpressionsList.size() == 0) {
reduceSinkBucketExpressions = null;
} else {
reduceSinkBucketExpressions = reduceSinkBucketExpressionsList.toArray(new VectorExpression[0]);
}
}
int[] reduceSinkPartitionColumnMap = null;
TypeInfo[] reduceSinkPartitionTypeInfos = null;
Type[] reduceSinkPartitionColumnVectorTypes = null;
VectorExpression[] reduceSinkPartitionExpressions = null;
if (!isEmptyPartitions) {
VectorExpression[] allPartitionExpressions = vContext.getVectorExpressions(partitionDescs);
reduceSinkPartitionColumnMap = new int[partitionDescs.size()];
reduceSinkPartitionTypeInfos = new TypeInfo[partitionDescs.size()];
reduceSinkPartitionColumnVectorTypes = new Type[partitionDescs.size()];
ArrayList<VectorExpression> reduceSinkPartitionExpressionsList = new ArrayList<VectorExpression>();
for (int i = 0; i < partitionDescs.size(); ++i) {
VectorExpression ve = allPartitionExpressions[i];
reduceSinkPartitionColumnMap[i] = ve.getOutputColumnNum();
reduceSinkPartitionTypeInfos[i] = partitionDescs.get(i).getTypeInfo();
reduceSinkPartitionColumnVectorTypes[i] = VectorizationContext.getColumnVectorTypeFromTypeInfo(reduceSinkPartitionTypeInfos[i]);
if (!IdentityExpression.isColumnOnly(ve)) {
reduceSinkPartitionExpressionsList.add(ve);
}
}
if (reduceSinkPartitionExpressionsList.size() == 0) {
reduceSinkPartitionExpressions = null;
} else {
reduceSinkPartitionExpressions = reduceSinkPartitionExpressionsList.toArray(new VectorExpression[0]);
}
}
vectorReduceSinkInfo.setReduceSinkBucketColumnMap(reduceSinkBucketColumnMap);
vectorReduceSinkInfo.setReduceSinkBucketTypeInfos(reduceSinkBucketTypeInfos);
vectorReduceSinkInfo.setReduceSinkBucketColumnVectorTypes(reduceSinkBucketColumnVectorTypes);
vectorReduceSinkInfo.setReduceSinkBucketExpressions(reduceSinkBucketExpressions);
vectorReduceSinkInfo.setReduceSinkPartitionColumnMap(reduceSinkPartitionColumnMap);
vectorReduceSinkInfo.setReduceSinkPartitionTypeInfos(reduceSinkPartitionTypeInfos);
vectorReduceSinkInfo.setReduceSinkPartitionColumnVectorTypes(reduceSinkPartitionColumnVectorTypes);
vectorReduceSinkInfo.setReduceSinkPartitionExpressions(reduceSinkPartitionExpressions);
}
// Remember the condition variables for EXPLAIN regardless.
vectorDesc.setVectorReduceSinkInfo(vectorReduceSinkInfo);
vectorDesc.setIsVectorizationReduceSinkNativeEnabled(isVectorizationReduceSinkNativeEnabled);
vectorDesc.setEngine(engine);
vectorDesc.setIsEmptyKey(isEmptyKey);
vectorDesc.setIsEmptyValue(isEmptyValue);
vectorDesc.setIsEmptyBuckets(isEmptyBuckets);
vectorDesc.setIsEmptyPartitions(isEmptyPartitions);
vectorDesc.setHasPTFTopN(hasPTFTopN);
vectorDesc.setHasDistinctColumns(hasDistinctColumns);
vectorDesc.setIsKeyBinarySortable(isKeyBinarySortable);
vectorDesc.setIsValueLazyBinary(isValueLazyBinary);
// This indicates we logged an inconsistency (from our point-of-view) and will not make this
// operator native...
vectorDesc.setIsUnexpectedCondition(isUnexpectedCondition);
// Many restrictions.
if (!isVectorizationReduceSinkNativeEnabled || !isTezOrSpark || hasPTFTopN || hasDistinctColumns || !isKeyBinarySortable || !isValueLazyBinary || isUnexpectedCondition) {
return false;
}
return true;
}
use of org.apache.hadoop.hive.ql.plan.TableDesc in project hive by apache.
the class BaseSemanticAnalyzer method createFetchTask.
/**
* Create a FetchTask for a given schema.
*
* @param schema string
*/
protected FetchTask createFetchTask(String schema) {
Properties prop = new Properties();
// Sets delimiter to tab (ascii 9)
prop.setProperty(serdeConstants.SERIALIZATION_FORMAT, Integer.toString(Utilities.tabCode));
prop.setProperty(serdeConstants.SERIALIZATION_NULL_FORMAT, " ");
String[] colTypes = schema.split("#");
prop.setProperty("columns", colTypes[0]);
prop.setProperty("columns.types", colTypes[1]);
prop.setProperty(serdeConstants.SERIALIZATION_LIB, LazySimpleSerDe.class.getName());
FetchWork fetch = new FetchWork(ctx.getResFile(), new TableDesc(TextInputFormat.class, IgnoreKeyTextOutputFormat.class, prop), -1);
fetch.setSerializationNullFormat(" ");
return (FetchTask) TaskFactory.get(fetch);
}
Aggregations