use of org.apache.hadoop.hive.ql.hooks.ReadEntity in project hive by apache.
the class Driver method getHivePrivObjects.
private static List<HivePrivilegeObject> getHivePrivObjects(Set<? extends Entity> privObjects, Map<String, List<String>> tableName2Cols) {
List<HivePrivilegeObject> hivePrivobjs = new ArrayList<HivePrivilegeObject>();
if (privObjects == null) {
return hivePrivobjs;
}
for (Entity privObject : privObjects) {
HivePrivilegeObjectType privObjType = AuthorizationUtils.getHivePrivilegeObjectType(privObject.getType());
if (privObject.isDummy()) {
// do not authorize dummy readEntity or writeEntity
continue;
}
if (privObject instanceof ReadEntity && !((ReadEntity) privObject).isDirect()) {
// See description of the isDirect in ReadEntity
continue;
}
if (privObject instanceof WriteEntity && ((WriteEntity) privObject).isTempURI()) {
// do not authorize temporary uris
continue;
}
// support for authorization on partitions needs to be added
String dbname = null;
String objName = null;
List<String> partKeys = null;
List<String> columns = null;
String className = null;
switch(privObject.getType()) {
case DATABASE:
dbname = privObject.getDatabase().getName();
break;
case TABLE:
dbname = privObject.getTable().getDbName();
objName = privObject.getTable().getTableName();
columns = tableName2Cols == null ? null : tableName2Cols.get(Table.getCompleteName(dbname, objName));
break;
case DFS_DIR:
case LOCAL_DIR:
objName = privObject.getD().toString();
break;
case FUNCTION:
if (privObject.getDatabase() != null) {
dbname = privObject.getDatabase().getName();
}
objName = privObject.getFunctionName();
className = privObject.getClassName();
break;
case DUMMYPARTITION:
case PARTITION:
// not currently handled
continue;
case SERVICE_NAME:
objName = privObject.getServiceName();
break;
default:
throw new AssertionError("Unexpected object type");
}
HivePrivObjectActionType actionType = AuthorizationUtils.getActionType(privObject);
HivePrivilegeObject hPrivObject = new HivePrivilegeObject(privObjType, dbname, objName, partKeys, columns, actionType, null, className);
hivePrivobjs.add(hPrivObject);
}
return hivePrivobjs;
}
use of org.apache.hadoop.hive.ql.hooks.ReadEntity in project hive by apache.
the class QueryResultsCache method entryMatches.
private boolean entryMatches(LookupInfo lookupInfo, CacheEntry entry) {
QueryInfo queryInfo = entry.getQueryInfo();
for (ReadEntity readEntity : queryInfo.getInputs()) {
// Check that the tables used do not resolve to temp tables.
if (readEntity.getType() == Type.TABLE) {
Table tableUsed = readEntity.getTable();
Map<String, Table> tempTables = SessionHiveMetaStoreClient.getTempTablesForDatabase(tableUsed.getDbName());
if (tempTables != null && tempTables.containsKey(tableUsed.getTableName())) {
LOG.info("{} resolves to a temporary table in the current session. This query cannot use the cache.", tableUsed.getTableName());
return false;
}
}
}
return true;
}
use of org.apache.hadoop.hive.ql.hooks.ReadEntity in project hive by apache.
the class SemanticAnalyzer method getMetaData.
@SuppressWarnings("nls")
private void getMetaData(QB qb, ReadEntity parentInput) throws HiveException {
LOG.info("Get metadata for source tables");
// Go over the tables and populate the related structures.
// We have to materialize the table alias list since we might
// modify it in the middle for view rewrite.
List<String> tabAliases = new ArrayList<String>(qb.getTabAliases());
// Keep track of view alias to view name and read entity
// For eg: for a query like 'select * from V3', where V3 -> V2, V2 -> V1, V1 -> T
// keeps track of full view name and read entity corresponding to alias V3, V3:V2, V3:V2:V1.
// This is needed for tracking the dependencies for inputs, along with their parents.
Map<String, ObjectPair<String, ReadEntity>> aliasToViewInfo = new HashMap<String, ObjectPair<String, ReadEntity>>();
/*
* used to capture view to SQ conversions. This is used to check for
* recursive CTE invocations.
*/
Map<String, String> sqAliasToCTEName = new HashMap<String, String>();
for (String alias : tabAliases) {
String tabName = qb.getTabNameForAlias(alias);
String cteName = tabName.toLowerCase();
// Get table details from tabNameToTabObject cache
Table tab = getTableObjectByName(tabName, false);
if (tab != null) {
// do a deep copy, in case downstream changes it.
tab = new Table(tab.getTTable().deepCopy());
}
if (tab == null || tab.getDbName().equals(SessionState.get().getCurrentDatabase())) {
Table materializedTab = ctx.getMaterializedTable(cteName);
if (materializedTab == null) {
// we first look for this alias from CTE, and then from catalog.
CTEClause cte = findCTEFromName(qb, cteName);
if (cte != null) {
if (!cte.materialize) {
addCTEAsSubQuery(qb, cteName, alias);
sqAliasToCTEName.put(alias, cteName);
continue;
}
tab = materializeCTE(cteName, cte);
}
} else {
tab = materializedTab;
}
}
if (tab == null) {
if (tabName.equals(DUMMY_DATABASE + "." + DUMMY_TABLE)) {
continue;
}
ASTNode src = qb.getParseInfo().getSrcForAlias(alias);
if (null != src) {
throw new SemanticException(ErrorMsg.INVALID_TABLE.getMsg(src));
} else {
throw new SemanticException(ErrorMsg.INVALID_TABLE.getMsg(alias));
}
}
if (tab.isView()) {
if (qb.getParseInfo().isAnalyzeCommand()) {
throw new SemanticException(ErrorMsg.ANALYZE_VIEW.getMsg());
}
String fullViewName = tab.getFullyQualifiedName();
// Prevent view cycles
if (viewsExpanded.contains(fullViewName)) {
throw new SemanticException("Recursive view " + fullViewName + " detected (cycle: " + StringUtils.join(viewsExpanded, " -> ") + " -> " + fullViewName + ").");
}
replaceViewReferenceWithDefinition(qb, tab, tabName, alias);
// If the view is Inside another view, it should have at least one parent
if (qb.isInsideView() && parentInput == null) {
parentInput = PlanUtils.getParentViewInfo(getAliasId(alias, qb), viewAliasToInput);
}
ReadEntity viewInput = new ReadEntity(tab, parentInput, !qb.isInsideView());
viewInput = PlanUtils.addInput(inputs, viewInput);
aliasToViewInfo.put(alias, new ObjectPair<String, ReadEntity>(fullViewName, viewInput));
String aliasId = getAliasId(alias, qb);
if (aliasId != null) {
aliasId = aliasId.replace(SemanticAnalyzer.SUBQUERY_TAG_1, "").replace(SemanticAnalyzer.SUBQUERY_TAG_2, "");
}
viewAliasToInput.put(aliasId, viewInput);
continue;
}
if (!InputFormat.class.isAssignableFrom(tab.getInputFormatClass())) {
throw new SemanticException(generateErrorMessage(qb.getParseInfo().getSrcForAlias(alias), ErrorMsg.INVALID_INPUT_FORMAT_TYPE.getMsg()));
}
qb.getMetaData().setSrcForAlias(alias, tab);
if (qb.getParseInfo().isAnalyzeCommand()) {
// allow partial partition specification for nonscan since noscan is fast.
TableSpec ts = new TableSpec(db, conf, (ASTNode) ast.getChild(0), true, this.noscan);
if (ts.specType == SpecType.DYNAMIC_PARTITION) {
// dynamic partitions
try {
ts.partitions = db.getPartitionsByNames(ts.tableHandle, ts.partSpec);
} catch (HiveException e) {
throw new SemanticException(generateErrorMessage(qb.getParseInfo().getSrcForAlias(alias), "Cannot get partitions for " + ts.partSpec), e);
}
}
tab.setTableSpec(ts);
qb.getParseInfo().addTableSpec(alias, ts);
}
ReadEntity parentViewInfo = PlanUtils.getParentViewInfo(getAliasId(alias, qb), viewAliasToInput);
// Temporary tables created during the execution are not the input sources
if (!PlanUtils.isValuesTempTable(alias)) {
PlanUtils.addInput(inputs, new ReadEntity(tab, parentViewInfo, parentViewInfo == null), mergeIsDirect);
}
}
LOG.info("Get metadata for subqueries");
// Go over the subqueries and getMetaData for these
for (String alias : qb.getSubqAliases()) {
boolean wasView = aliasToViewInfo.containsKey(alias);
boolean wasCTE = sqAliasToCTEName.containsKey(alias);
ReadEntity newParentInput = null;
if (wasView) {
viewsExpanded.add(aliasToViewInfo.get(alias).getFirst());
newParentInput = aliasToViewInfo.get(alias).getSecond();
} else if (wasCTE) {
ctesExpanded.add(sqAliasToCTEName.get(alias));
}
QBExpr qbexpr = qb.getSubqForAlias(alias);
getMetaData(qbexpr, newParentInput);
if (wasView) {
viewsExpanded.remove(viewsExpanded.size() - 1);
} else if (wasCTE) {
ctesExpanded.remove(ctesExpanded.size() - 1);
}
}
RowFormatParams rowFormatParams = new RowFormatParams();
StorageFormat storageFormat = new StorageFormat(conf);
LOG.info("Get metadata for destination tables");
// Go over all the destination structures and populate the related
// metadata
QBParseInfo qbp = qb.getParseInfo();
for (String name : qbp.getClauseNamesForDest()) {
ASTNode ast = qbp.getDestForClause(name);
switch(ast.getToken().getType()) {
case HiveParser.TOK_TAB:
{
TableSpec ts = new TableSpec(db, conf, ast);
if (ts.tableHandle.isView() || (!rewrittenRebuild && ts.tableHandle.isMaterializedView())) {
throw new SemanticException(ErrorMsg.DML_AGAINST_VIEW.getMsg());
}
Class<?> outputFormatClass = ts.tableHandle.getOutputFormatClass();
if (!ts.tableHandle.isNonNative() && !HiveOutputFormat.class.isAssignableFrom(outputFormatClass)) {
throw new SemanticException(ErrorMsg.INVALID_OUTPUT_FORMAT_TYPE.getMsg(ast, "The class is " + outputFormatClass.toString()));
}
boolean isTableWrittenTo = qb.getParseInfo().isInsertIntoTable(ts.tableHandle.getDbName(), ts.tableHandle.getTableName());
isTableWrittenTo |= (qb.getParseInfo().getInsertOverwriteTables().get(getUnescapedName((ASTNode) ast.getChild(0), ts.tableHandle.getDbName()).toLowerCase()) != null);
assert isTableWrittenTo : "Inconsistent data structure detected: we are writing to " + ts.tableHandle + " in " + name + " but it's not in isInsertIntoTable() or getInsertOverwriteTables()";
// Disallow update and delete on non-acid tables
boolean isAcid = AcidUtils.isFullAcidTable(ts.tableHandle);
if ((updating(name) || deleting(name)) && !isAcid) {
// here, it means the table itself doesn't support it.
throw new SemanticException(ErrorMsg.ACID_OP_ON_NONACID_TABLE, ts.tableName);
}
// but whether the table itself is partitioned is not know.
if (ts.specType != SpecType.STATIC_PARTITION) {
// This is a table or dynamic partition
qb.getMetaData().setDestForAlias(name, ts.tableHandle);
// has dynamic as well as static partitions
if (ts.partSpec != null && ts.partSpec.size() > 0) {
qb.getMetaData().setPartSpecForAlias(name, ts.partSpec);
}
} else {
// This is a partition
qb.getMetaData().setDestForAlias(name, ts.partHandle);
}
if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVESTATSAUTOGATHER)) {
// Add the table spec for the destination table.
qb.getParseInfo().addTableSpec(ts.tableName.toLowerCase(), ts);
}
break;
}
case HiveParser.TOK_DIR:
{
// This is a dfs file
String fname = stripQuotes(ast.getChild(0).getText());
if ((!qb.getParseInfo().getIsSubQ()) && (((ASTNode) ast.getChild(0)).getToken().getType() == HiveParser.TOK_TMP_FILE)) {
if (qb.isCTAS() || qb.isMaterializedView()) {
qb.setIsQuery(false);
ctx.setResDir(null);
ctx.setResFile(null);
Path location;
// If the CTAS query does specify a location, use the table location, else use the db location
if (qb.getTableDesc() != null && qb.getTableDesc().getLocation() != null) {
location = new Path(qb.getTableDesc().getLocation());
} else {
// allocate a temporary output dir on the location of the table
String tableName = getUnescapedName((ASTNode) ast.getChild(0));
String[] names = Utilities.getDbTableName(tableName);
try {
Warehouse wh = new Warehouse(conf);
// Use destination table's db location.
String destTableDb = qb.getTableDesc() != null ? qb.getTableDesc().getDatabaseName() : null;
if (destTableDb == null) {
destTableDb = names[0];
}
location = wh.getDatabasePath(db.getDatabase(destTableDb));
} catch (MetaException e) {
throw new SemanticException(e);
}
}
try {
CreateTableDesc tblDesc = qb.getTableDesc();
if (tblDesc != null && tblDesc.isTemporary() && AcidUtils.isInsertOnlyTable(tblDesc.getTblProps(), true)) {
fname = FileUtils.makeQualified(location, conf).toString();
} else {
fname = ctx.getExtTmpPathRelTo(FileUtils.makeQualified(location, conf)).toString();
}
} catch (Exception e) {
throw new SemanticException(generateErrorMessage(ast, "Error creating temporary folder on: " + location.toString()), e);
}
if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVESTATSAUTOGATHER)) {
TableSpec ts = new TableSpec(db, conf, this.ast);
// Add the table spec for the destination table.
qb.getParseInfo().addTableSpec(ts.tableName.toLowerCase(), ts);
}
} else {
// This is the only place where isQuery is set to true; it defaults to false.
qb.setIsQuery(true);
Path stagingPath = getStagingDirectoryPathname(qb);
fname = stagingPath.toString();
ctx.setResDir(stagingPath);
}
}
boolean isDfsFile = true;
if (ast.getChildCount() >= 2 && ast.getChild(1).getText().toLowerCase().equals("local")) {
isDfsFile = false;
}
// Set the destination for the SELECT query inside the CTAS
qb.getMetaData().setDestForAlias(name, fname, isDfsFile);
CreateTableDesc directoryDesc = new CreateTableDesc();
boolean directoryDescIsSet = false;
int numCh = ast.getChildCount();
for (int num = 1; num < numCh; num++) {
ASTNode child = (ASTNode) ast.getChild(num);
if (child != null) {
if (storageFormat.fillStorageFormat(child)) {
directoryDesc.setInputFormat(storageFormat.getInputFormat());
directoryDesc.setOutputFormat(storageFormat.getOutputFormat());
directoryDesc.setSerName(storageFormat.getSerde());
directoryDescIsSet = true;
continue;
}
switch(child.getToken().getType()) {
case HiveParser.TOK_TABLEROWFORMAT:
rowFormatParams.analyzeRowFormat(child);
directoryDesc.setFieldDelim(rowFormatParams.fieldDelim);
directoryDesc.setLineDelim(rowFormatParams.lineDelim);
directoryDesc.setCollItemDelim(rowFormatParams.collItemDelim);
directoryDesc.setMapKeyDelim(rowFormatParams.mapKeyDelim);
directoryDesc.setFieldEscape(rowFormatParams.fieldEscape);
directoryDesc.setNullFormat(rowFormatParams.nullFormat);
directoryDescIsSet = true;
break;
case HiveParser.TOK_TABLESERIALIZER:
ASTNode serdeChild = (ASTNode) child.getChild(0);
storageFormat.setSerde(unescapeSQLString(serdeChild.getChild(0).getText()));
directoryDesc.setSerName(storageFormat.getSerde());
if (serdeChild.getChildCount() > 1) {
directoryDesc.setSerdeProps(new HashMap<String, String>());
readProps((ASTNode) serdeChild.getChild(1).getChild(0), directoryDesc.getSerdeProps());
}
directoryDescIsSet = true;
break;
}
}
}
if (directoryDescIsSet) {
qb.setDirectoryDesc(directoryDesc);
}
break;
}
default:
throw new SemanticException(generateErrorMessage(ast, "Unknown Token Type " + ast.getToken().getType()));
}
}
}
use of org.apache.hadoop.hive.ql.hooks.ReadEntity in project hive by apache.
the class SemanticAnalyzer method genScriptPlan.
@SuppressWarnings("nls")
private Operator genScriptPlan(ASTNode trfm, QB qb, Operator input) throws SemanticException {
// If there is no "AS" clause, the output schema will be "key,value"
ArrayList<ColumnInfo> outputCols = new ArrayList<ColumnInfo>();
int inputSerDeNum = 1, inputRecordWriterNum = 2;
int outputSerDeNum = 4, outputRecordReaderNum = 5;
int outputColsNum = 6;
boolean outputColNames = false, outputColSchemas = false;
int execPos = 3;
boolean defaultOutputCols = false;
// Go over all the children
if (trfm.getChildCount() > outputColsNum) {
ASTNode outCols = (ASTNode) trfm.getChild(outputColsNum);
if (outCols.getType() == HiveParser.TOK_ALIASLIST) {
outputColNames = true;
} else if (outCols.getType() == HiveParser.TOK_TABCOLLIST) {
outputColSchemas = true;
}
}
// If column type is not specified, use a string
if (!outputColNames && !outputColSchemas) {
String intName = getColumnInternalName(0);
ColumnInfo colInfo = new ColumnInfo(intName, TypeInfoFactory.stringTypeInfo, null, false);
colInfo.setAlias("key");
outputCols.add(colInfo);
intName = getColumnInternalName(1);
colInfo = new ColumnInfo(intName, TypeInfoFactory.stringTypeInfo, null, false);
colInfo.setAlias("value");
outputCols.add(colInfo);
defaultOutputCols = true;
} else {
ASTNode collist = (ASTNode) trfm.getChild(outputColsNum);
int ccount = collist.getChildCount();
Set<String> colAliasNamesDuplicateCheck = new HashSet<String>();
if (outputColNames) {
for (int i = 0; i < ccount; ++i) {
String colAlias = unescapeIdentifier(((ASTNode) collist.getChild(i)).getText()).toLowerCase();
failIfColAliasExists(colAliasNamesDuplicateCheck, colAlias);
String intName = getColumnInternalName(i);
ColumnInfo colInfo = new ColumnInfo(intName, TypeInfoFactory.stringTypeInfo, null, false);
colInfo.setAlias(colAlias);
outputCols.add(colInfo);
}
} else {
for (int i = 0; i < ccount; ++i) {
ASTNode child = (ASTNode) collist.getChild(i);
assert child.getType() == HiveParser.TOK_TABCOL;
String colAlias = unescapeIdentifier(((ASTNode) child.getChild(0)).getText()).toLowerCase();
failIfColAliasExists(colAliasNamesDuplicateCheck, colAlias);
String intName = getColumnInternalName(i);
ColumnInfo colInfo = new ColumnInfo(intName, TypeInfoUtils.getTypeInfoFromTypeString(getTypeStringFromAST((ASTNode) child.getChild(1))), null, false);
colInfo.setAlias(colAlias);
outputCols.add(colInfo);
}
}
}
RowResolver out_rwsch = new RowResolver();
StringBuilder columns = new StringBuilder();
StringBuilder columnTypes = new StringBuilder();
for (int i = 0; i < outputCols.size(); ++i) {
if (i != 0) {
columns.append(",");
columnTypes.append(",");
}
columns.append(outputCols.get(i).getInternalName());
columnTypes.append(outputCols.get(i).getType().getTypeName());
out_rwsch.put(qb.getParseInfo().getAlias(), outputCols.get(i).getAlias(), outputCols.get(i));
}
StringBuilder inpColumns = new StringBuilder();
StringBuilder inpColumnTypes = new StringBuilder();
ArrayList<ColumnInfo> inputSchema = opParseCtx.get(input).getRowResolver().getColumnInfos();
for (int i = 0; i < inputSchema.size(); ++i) {
if (i != 0) {
inpColumns.append(",");
inpColumnTypes.append(",");
}
inpColumns.append(inputSchema.get(i).getInternalName());
inpColumnTypes.append(inputSchema.get(i).getType().getTypeName());
}
TableDesc outInfo;
TableDesc errInfo;
TableDesc inInfo;
String defaultSerdeName = conf.getVar(HiveConf.ConfVars.HIVESCRIPTSERDE);
Class<? extends Deserializer> serde;
try {
serde = (Class<? extends Deserializer>) Class.forName(defaultSerdeName, true, Utilities.getSessionSpecifiedClassLoader());
} catch (ClassNotFoundException e) {
throw new SemanticException(e);
}
int fieldSeparator = Utilities.tabCode;
if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVESCRIPTESCAPE)) {
fieldSeparator = Utilities.ctrlaCode;
}
// Input and Output Serdes
if (trfm.getChild(inputSerDeNum).getChildCount() > 0) {
inInfo = getTableDescFromSerDe((ASTNode) (((ASTNode) trfm.getChild(inputSerDeNum))).getChild(0), inpColumns.toString(), inpColumnTypes.toString(), false);
} else {
inInfo = PlanUtils.getTableDesc(serde, Integer.toString(fieldSeparator), inpColumns.toString(), inpColumnTypes.toString(), false, true);
}
if (trfm.getChild(outputSerDeNum).getChildCount() > 0) {
outInfo = getTableDescFromSerDe((ASTNode) (((ASTNode) trfm.getChild(outputSerDeNum))).getChild(0), columns.toString(), columnTypes.toString(), false);
// This is for backward compatibility. If the user did not specify the
// output column list, we assume that there are 2 columns: key and value.
// However, if the script outputs: col1, col2, col3 seperated by TAB, the
// requirement is: key is col and value is (col2 TAB col3)
} else {
outInfo = PlanUtils.getTableDesc(serde, Integer.toString(fieldSeparator), columns.toString(), columnTypes.toString(), defaultOutputCols);
}
// Error stream always uses the default serde with a single column
errInfo = PlanUtils.getTableDesc(serde, Integer.toString(Utilities.tabCode), "KEY");
// Output record readers
Class<? extends RecordReader> outRecordReader = getRecordReader((ASTNode) trfm.getChild(outputRecordReaderNum));
Class<? extends RecordWriter> inRecordWriter = getRecordWriter((ASTNode) trfm.getChild(inputRecordWriterNum));
Class<? extends RecordReader> errRecordReader = getDefaultRecordReader();
Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild(new ScriptDesc(fetchFilesNotInLocalFilesystem(stripQuotes(trfm.getChild(execPos).getText())), inInfo, inRecordWriter, outInfo, outRecordReader, errRecordReader, errInfo), new RowSchema(out_rwsch.getColumnInfos()), input), out_rwsch);
// disable backtracking
output.setColumnExprMap(new HashMap<String, ExprNodeDesc>());
// Add URI entity for transform script. script assumed t be local unless downloadable
if (conf.getBoolVar(ConfVars.HIVE_CAPTURE_TRANSFORM_ENTITY)) {
String scriptCmd = getScriptProgName(stripQuotes(trfm.getChild(execPos).getText()));
getInputs().add(new ReadEntity(new Path(scriptCmd), ResourceDownloader.isFileUri(scriptCmd)));
}
return output;
}
use of org.apache.hadoop.hive.ql.hooks.ReadEntity in project hive by apache.
the class DDLSemanticAnalyzer method analyzeCacheMetadata.
private void analyzeCacheMetadata(ASTNode ast) throws SemanticException {
Table tbl = AnalyzeCommandUtils.getTable(ast, this);
Map<String, String> partSpec = null;
CacheMetadataDesc desc;
// In 2 cases out of 3, we could pass the path and type directly to metastore...
if (AnalyzeCommandUtils.isPartitionLevelStats(ast)) {
partSpec = AnalyzeCommandUtils.getPartKeyValuePairsFromAST(tbl, ast, conf);
Partition part = getPartition(tbl, partSpec, true);
desc = new CacheMetadataDesc(tbl.getDbName(), tbl.getTableName(), part.getName());
inputs.add(new ReadEntity(part));
} else {
// Should we get all partitions for a partitioned table?
desc = new CacheMetadataDesc(tbl.getDbName(), tbl.getTableName(), tbl.isPartitioned());
inputs.add(new ReadEntity(tbl));
}
rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), desc)));
}
Aggregations