use of org.apache.hadoop.hive.ql.lib.Node in project hive by apache.
the class LineageInfo method getLineageInfo.
/**
* parses given query and gets the lineage info.
*
* @param query
* @throws ParseException
*/
public void getLineageInfo(String query) throws ParseException, SemanticException {
/*
* Get the AST tree
*/
ASTNode tree = ParseUtils.parse(query, null);
while ((tree.getToken() == null) && (tree.getChildCount() > 0)) {
tree = (ASTNode) tree.getChild(0);
}
/*
* initialize Event Processor and dispatcher.
*/
inputTableList.clear();
OutputTableList.clear();
// create a walker which walks the tree in a DFS manner while maintaining
// the operator stack. The dispatcher
// generates the plan from the operator tree
Map<Rule, NodeProcessor> rules = new LinkedHashMap<Rule, NodeProcessor>();
// The dispatcher fires the processor corresponding to the closest matching
// rule and passes the context along
Dispatcher disp = new DefaultRuleDispatcher(this, rules, null);
GraphWalker ogw = new DefaultGraphWalker(disp);
// Create a list of topop nodes
ArrayList<Node> topNodes = new ArrayList<Node>();
topNodes.add(tree);
ogw.startWalking(topNodes, null);
}
use of org.apache.hadoop.hive.ql.lib.Node in project hive by apache.
the class SemanticAnalyzer method parseStreamTables.
private void parseStreamTables(QBJoinTree joinTree, QB qb) {
List<String> streamAliases = joinTree.getStreamAliases();
for (Node hintNode : qb.getParseInfo().getHints().getChildren()) {
ASTNode hint = (ASTNode) hintNode;
if (hint.getChild(0).getType() == HintParser.TOK_STREAMTABLE) {
for (int i = 0; i < hint.getChild(1).getChildCount(); i++) {
if (streamAliases == null) {
streamAliases = new ArrayList<String>();
}
streamAliases.add(hint.getChild(1).getChild(i).getText());
}
}
}
joinTree.setStreamAliases(streamAliases);
}
use of org.apache.hadoop.hive.ql.lib.Node in project hive by apache.
the class SemanticAnalyzer method genValuesTempTable.
/**
* Generate a temp table out of a values clause
* See also {@link #preProcessForInsert(ASTNode, QB)}
*/
private ASTNode genValuesTempTable(ASTNode originalFrom, QB qb) throws SemanticException {
Path dataDir = null;
if (!qb.getEncryptedTargetTablePaths().isEmpty()) {
//currently only Insert into T values(...) is supported thus only 1 values clause
//and only 1 target table are possible. If/when support for
//select ... from values(...) is added an insert statement may have multiple
//encrypted target tables.
dataDir = ctx.getMRTmpPath(qb.getEncryptedTargetTablePaths().get(0).toUri());
}
// Pick a name for the table
SessionState ss = SessionState.get();
String tableName = VALUES_TMP_TABLE_NAME_PREFIX + ss.getNextValuesTempTableSuffix();
// Step 1, parse the values clause we were handed
List<? extends Node> fromChildren = originalFrom.getChildren();
// First child should be the virtual table ref
ASTNode virtualTableRef = (ASTNode) fromChildren.get(0);
assert virtualTableRef.getToken().getType() == HiveParser.TOK_VIRTUAL_TABREF : "Expected first child of TOK_VIRTUAL_TABLE to be TOK_VIRTUAL_TABREF but was " + virtualTableRef.getName();
List<? extends Node> virtualTableRefChildren = virtualTableRef.getChildren();
// First child of this should be the table name. If it's anonymous,
// then we don't have a table name.
ASTNode tabName = (ASTNode) virtualTableRefChildren.get(0);
if (tabName.getToken().getType() != HiveParser.TOK_ANONYMOUS) {
// you need to parse this list of columns names and build it into the table
throw new SemanticException(ErrorMsg.VALUES_TABLE_CONSTRUCTOR_NOT_SUPPORTED.getMsg());
}
// The second child of the TOK_VIRTUAL_TABLE should be TOK_VALUES_TABLE
ASTNode valuesTable = (ASTNode) fromChildren.get(1);
assert valuesTable.getToken().getType() == HiveParser.TOK_VALUES_TABLE : "Expected second child of TOK_VIRTUAL_TABLE to be TOK_VALUE_TABLE but was " + valuesTable.getName();
// Each of the children of TOK_VALUES_TABLE will be a TOK_VALUE_ROW
List<? extends Node> valuesTableChildren = valuesTable.getChildren();
// Now that we're going to start reading through the rows, open a file to write the rows too
// If we leave this method before creating the temporary table we need to be sure to clean up
// this file.
Path tablePath = null;
FileSystem fs = null;
FSDataOutputStream out = null;
try {
if (dataDir == null) {
tablePath = Warehouse.getDnsPath(new Path(ss.getTempTableSpace(), tableName), conf);
} else {
//if target table of insert is encrypted, make sure temporary table data is stored
//similarly encrypted
tablePath = Warehouse.getDnsPath(new Path(dataDir, tableName), conf);
}
fs = tablePath.getFileSystem(conf);
fs.mkdirs(tablePath);
Path dataFile = new Path(tablePath, "data_file");
out = fs.create(dataFile);
List<FieldSchema> fields = new ArrayList<FieldSchema>();
boolean firstRow = true;
for (Node n : valuesTableChildren) {
ASTNode valuesRow = (ASTNode) n;
assert valuesRow.getToken().getType() == HiveParser.TOK_VALUE_ROW : "Expected child of TOK_VALUE_TABLE to be TOK_VALUE_ROW but was " + valuesRow.getName();
// Each of the children of this should be a literal
List<? extends Node> valuesRowChildren = valuesRow.getChildren();
boolean isFirst = true;
int nextColNum = 1;
for (Node n1 : valuesRowChildren) {
ASTNode value = (ASTNode) n1;
if (firstRow) {
fields.add(new FieldSchema("tmp_values_col" + nextColNum++, "string", ""));
}
if (isFirst)
isFirst = false;
else
writeAsText("", out);
writeAsText(unparseExprForValuesClause(value), out);
}
writeAsText("\n", out);
firstRow = false;
}
// Step 2, create a temp table, using the created file as the data
StorageFormat format = new StorageFormat(conf);
format.processStorageFormat("TextFile");
Table table = db.newTable(tableName);
table.setSerializationLib(format.getSerde());
table.setFields(fields);
table.setDataLocation(tablePath);
table.getTTable().setTemporary(true);
table.setStoredAsSubDirectories(false);
table.setInputFormatClass(format.getInputFormat());
table.setOutputFormatClass(format.getOutputFormat());
db.createTable(table, false);
} catch (Exception e) {
String errMsg = ErrorMsg.INSERT_CANNOT_CREATE_TEMP_FILE.getMsg() + e.getMessage();
LOG.error(errMsg);
// Try to delete the file
if (fs != null && tablePath != null) {
try {
fs.delete(tablePath, false);
} catch (IOException swallowIt) {
}
}
throw new SemanticException(errMsg, e);
} finally {
IOUtils.closeStream(out);
}
// Step 3, return a new subtree with a from clause built around that temp table
// The form of the tree is TOK_TABREF->TOK_TABNAME->identifier(tablename)
Token t = new ClassicToken(HiveParser.TOK_TABREF);
ASTNode tabRef = new ASTNode(t);
t = new ClassicToken(HiveParser.TOK_TABNAME);
ASTNode tabNameNode = new ASTNode(t);
tabRef.addChild(tabNameNode);
t = new ClassicToken(HiveParser.Identifier, tableName);
ASTNode identifier = new ASTNode(t);
tabNameNode.addChild(identifier);
return tabRef;
}
use of org.apache.hadoop.hive.ql.lib.Node in project hive by apache.
the class SemanticAnalyzer method genUniqueJoinTree.
private QBJoinTree genUniqueJoinTree(QB qb, ASTNode joinParseTree, Map<String, Operator> aliasToOpInfo) throws SemanticException {
QBJoinTree joinTree = new QBJoinTree();
joinTree.setNoOuterJoin(false);
joinTree.setExpressions(new ArrayList<ArrayList<ASTNode>>());
joinTree.setFilters(new ArrayList<ArrayList<ASTNode>>());
joinTree.setFiltersForPushing(new ArrayList<ArrayList<ASTNode>>());
// Create joinTree structures to fill them up later
ArrayList<String> rightAliases = new ArrayList<String>();
ArrayList<String> leftAliases = new ArrayList<String>();
ArrayList<String> baseSrc = new ArrayList<String>();
ArrayList<Boolean> preserved = new ArrayList<Boolean>();
boolean lastPreserved = false;
int cols = -1;
for (int i = 0; i < joinParseTree.getChildCount(); i++) {
ASTNode child = (ASTNode) joinParseTree.getChild(i);
switch(child.getToken().getType()) {
case HiveParser.TOK_TABREF:
// Handle a table - populate aliases appropriately:
// leftAliases should contain the first table, rightAliases should
// contain all other tables and baseSrc should contain all tables
String tableName = getUnescapedUnqualifiedTableName((ASTNode) child.getChild(0));
String alias = child.getChildCount() == 1 ? tableName : unescapeIdentifier(child.getChild(child.getChildCount() - 1).getText().toLowerCase());
if (i == 0) {
leftAliases.add(alias);
joinTree.setLeftAlias(alias);
} else {
rightAliases.add(alias);
}
joinTree.getAliasToOpInfo().put(getModifiedAlias(qb, alias), aliasToOpInfo.get(alias));
joinTree.setId(qb.getId());
baseSrc.add(alias);
preserved.add(lastPreserved);
lastPreserved = false;
break;
case HiveParser.TOK_EXPLIST:
if (cols == -1 && child.getChildCount() != 0) {
cols = child.getChildCount();
} else if (child.getChildCount() != cols) {
throw new SemanticException("Tables with different or invalid " + "number of keys in UNIQUEJOIN");
}
ArrayList<ASTNode> expressions = new ArrayList<ASTNode>();
ArrayList<ASTNode> filt = new ArrayList<ASTNode>();
ArrayList<ASTNode> filters = new ArrayList<ASTNode>();
for (Node exp : child.getChildren()) {
expressions.add((ASTNode) exp);
}
joinTree.getExpressions().add(expressions);
joinTree.getFilters().add(filt);
joinTree.getFiltersForPushing().add(filters);
break;
case HiveParser.KW_PRESERVE:
lastPreserved = true;
break;
case HiveParser.TOK_SUBQUERY:
throw new SemanticException("Subqueries are not supported in UNIQUEJOIN");
default:
throw new SemanticException("Unexpected UNIQUEJOIN structure");
}
}
joinTree.setBaseSrc(baseSrc.toArray(new String[0]));
joinTree.setLeftAliases(leftAliases.toArray(new String[0]));
joinTree.setRightAliases(rightAliases.toArray(new String[0]));
JoinCond[] condn = new JoinCond[preserved.size()];
for (int i = 0; i < condn.length; i++) {
condn[i] = new JoinCond(preserved.get(i));
}
joinTree.setJoinCond(condn);
if ((qb.getParseInfo().getHints() != null) && !(conf.getVar(HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("tez"))) {
LOG.info("STREAMTABLE hint honored.");
parseStreamTables(joinTree, qb);
}
return joinTree;
}
use of org.apache.hadoop.hive.ql.lib.Node in project hive by apache.
the class SemanticAnalyzer method preProcessForInsert.
/**
* This will walk AST of an INSERT statement and assemble a list of target tables
* which are in an HDFS encryption zone. This is needed to make sure that so that
* the data from values clause of Insert ... select values(...) is stored securely.
* See also {@link #genValuesTempTable(ASTNode, QB)}
* @throws SemanticException
*/
private void preProcessForInsert(ASTNode node, QB qb) throws SemanticException {
try {
if (!(node != null && node.getToken() != null && node.getToken().getType() == HiveParser.TOK_QUERY)) {
return;
}
for (Node child : node.getChildren()) {
//(TOK_INSERT (TOK_INSERT_INTO (TOK_TAB (TOK_TABNAME T1)))
if (((ASTNode) child).getToken().getType() != HiveParser.TOK_INSERT) {
continue;
}
ASTNode n = (ASTNode) ((ASTNode) child).getFirstChildWithType(HiveParser.TOK_INSERT_INTO);
if (n == null)
continue;
n = (ASTNode) n.getFirstChildWithType(HiveParser.TOK_TAB);
if (n == null)
continue;
n = (ASTNode) n.getFirstChildWithType(HiveParser.TOK_TABNAME);
if (n == null)
continue;
String[] dbTab = getQualifiedTableName(n);
Table t = db.getTable(dbTab[0], dbTab[1]);
Path tablePath = t.getPath();
if (isPathEncrypted(tablePath)) {
qb.addEncryptedTargetTablePath(tablePath);
}
}
} catch (Exception ex) {
throw new SemanticException(ex);
}
}
Aggregations