use of org.apache.hadoop.hive.ql.plan.AlterTableDesc in project hive by apache.
the class ReplicationSemanticAnalyzer method analyzeReplLoad.
/*
* Example dump dirs we need to be able to handle :
*
* for: hive.repl.rootdir = staging/
* Then, repl dumps will be created in staging/<dumpdir>
*
* single-db-dump: staging/blah12345 will contain a db dir for the db specified
* blah12345/
* default/
* _metadata
* tbl1/
* _metadata
* dt=20160907/
* _files
* tbl2/
* tbl3/
* unptn_tbl/
* _metadata
* _files
*
* multi-db-dump: staging/bar12347 will contain dirs for each db covered
* staging/
* bar12347/
* default/
* ...
* sales/
* ...
*
* single table-dump: staging/baz123 will contain a table object dump inside
* staging/
* baz123/
* _metadata
* dt=20150931/
* _files
*
* incremental dump : staging/blue123 will contain dirs for each event inside.
* staging/
* blue123/
* 34/
* 35/
* 36/
*/
private void analyzeReplLoad(ASTNode ast) throws SemanticException {
LOG.debug("ReplSemanticAnalyzer.analyzeReplLoad: " + String.valueOf(dbNameOrPattern) + "." + String.valueOf(tblNameOrPattern) + " from " + String.valueOf(path));
try {
Path loadPath = new Path(path);
final FileSystem fs = loadPath.getFileSystem(conf);
if (!fs.exists(loadPath)) {
// supposed dump path does not exist.
throw new FileNotFoundException(loadPath.toUri().toString());
}
// Now, the dumped path can be one of three things:
// a) It can be a db dump, in which case we expect a set of dirs, each with a
// db name, and with a _metadata file in each, and table dirs inside that.
// b) It can be a table dump dir, in which case we expect a _metadata dump of
// a table in question in the dir, and individual ptn dir hierarchy.
// c) A dump can be an incremental dump, which means we have several subdirs
// each of which have the evid as the dir name, and each of which correspond
// to a event-level dump. Currently, only CREATE_TABLE and ADD_PARTITION are
// handled, so all of these dumps will be at a table/ptn level.
// For incremental repl, we will have individual events which can
// be other things like roles and fns as well.
// At this point, all dump dirs should contain a _dumpmetadata file that
// tells us what is inside that dumpdir.
DumpMetaData dmd = new DumpMetaData(loadPath);
boolean evDump = false;
if (dmd.isIncrementalDump()) {
LOG.debug("{} contains an incremental dump", loadPath);
evDump = true;
} else {
LOG.debug("{} contains an bootstrap dump", loadPath);
}
if ((!evDump) && (tblNameOrPattern != null) && !(tblNameOrPattern.isEmpty())) {
// not an event dump, and table name pattern specified, this has to be a tbl-level dump
rootTasks.addAll(analyzeTableLoad(dbNameOrPattern, tblNameOrPattern, path, null, null, null));
return;
}
FileStatus[] srcs = LoadSemanticAnalyzer.matchFilesOrDir(fs, loadPath);
if (srcs == null || (srcs.length == 0)) {
LOG.warn("Nothing to load at {}", loadPath.toUri().toString());
return;
}
FileStatus[] dirsInLoadPath = fs.listStatus(loadPath, EximUtil.getDirectoryFilter(fs));
if ((dirsInLoadPath == null) || (dirsInLoadPath.length == 0)) {
throw new IllegalArgumentException("No data to load in path " + loadPath.toUri().toString());
}
if (!evDump) {
// not an event dump, not a table dump - thus, a db dump
if ((dbNameOrPattern != null) && (dirsInLoadPath.length > 1)) {
LOG.debug("Found multiple dirs when we expected 1:");
for (FileStatus d : dirsInLoadPath) {
LOG.debug("> " + d.getPath().toUri().toString());
}
throw new IllegalArgumentException("Multiple dirs in " + loadPath.toUri().toString() + " does not correspond to REPL LOAD expecting to load to a singular destination point.");
}
for (FileStatus dir : dirsInLoadPath) {
analyzeDatabaseLoad(dbNameOrPattern, fs, dir);
}
} else {
// event dump, each subdir is an individual event dump.
// we need to guarantee that the directory listing we got is in order of evid.
Arrays.sort(dirsInLoadPath);
Task<? extends Serializable> evTaskRoot = TaskFactory.get(new DependencyCollectionWork(), conf);
Task<? extends Serializable> taskChainTail = evTaskRoot;
int evstage = 0;
Long lastEvid = null;
Map<String, Long> dbsUpdated = new ReplicationSpec.ReplStateMap<String, Long>();
Map<String, Long> tablesUpdated = new ReplicationSpec.ReplStateMap<String, Long>();
for (FileStatus dir : dirsInLoadPath) {
LOG.debug("Loading event from {} to {}.{}", dir.getPath().toUri(), dbNameOrPattern, tblNameOrPattern);
// event loads will behave similar to table loads, with one crucial difference
// precursor order is strict, and each event must be processed after the previous one.
// The way we handle this strict order is as follows:
// First, we start with a taskChainTail which is a dummy noop task (a DependecyCollectionTask)
// at the head of our event chain. For each event we process, we tell analyzeTableLoad to
// create tasks that use the taskChainTail as a dependency. Then, we collect all those tasks
// and introduce a new barrier task(also a DependencyCollectionTask) which depends on all
// these tasks. Then, this barrier task becomes our new taskChainTail. Thus, we get a set of
// tasks as follows:
//
// --->ev1.task1-- --->ev2.task1--
// / \ / \
// evTaskRoot-->*---->ev1.task2---*--> ev1.barrierTask-->*---->ev2.task2---*->evTaskChainTail
// \ /
// --->ev1.task3--
//
// Once this entire chain is generated, we add evTaskRoot to rootTasks, so as to execute the
// entire chain
String locn = dir.getPath().toUri().toString();
DumpMetaData eventDmd = new DumpMetaData(new Path(locn));
List<Task<? extends Serializable>> evTasks = analyzeEventLoad(dbNameOrPattern, tblNameOrPattern, locn, taskChainTail, dbsUpdated, tablesUpdated, eventDmd);
LOG.debug("evstage#{} got {} tasks", evstage, evTasks != null ? evTasks.size() : 0);
if ((evTasks != null) && (!evTasks.isEmpty())) {
Task<? extends Serializable> barrierTask = TaskFactory.get(new DependencyCollectionWork(), conf);
for (Task<? extends Serializable> t : evTasks) {
t.addDependentTask(barrierTask);
LOG.debug("Added {}:{} as a precursor of barrier task {}:{}", t.getClass(), t.getId(), barrierTask.getClass(), barrierTask.getId());
}
LOG.debug("Updated taskChainTail from {}{} to {}{}", taskChainTail.getClass(), taskChainTail.getId(), barrierTask.getClass(), barrierTask.getId());
taskChainTail = barrierTask;
evstage++;
lastEvid = dmd.eventTo;
}
}
if (evstage > 0) {
if ((tblNameOrPattern != null) && (!tblNameOrPattern.isEmpty())) {
// if tblNameOrPattern is specified, then dbNameOrPattern will be too, and
// thus, this is a table-level REPL LOAD - only table needs updating.
// If any of the individual events logged any other dbs as having changed,
// null them out.
dbsUpdated.clear();
tablesUpdated.clear();
tablesUpdated.put(dbNameOrPattern + "." + tblNameOrPattern, lastEvid);
} else if ((dbNameOrPattern != null) && (!dbNameOrPattern.isEmpty())) {
// if dbNameOrPattern is specified and tblNameOrPattern isn't, this is a
// db-level update, and thus, the database needs updating. In addition.
dbsUpdated.clear();
dbsUpdated.put(dbNameOrPattern, lastEvid);
}
}
for (String tableName : tablesUpdated.keySet()) {
// weird - AlterTableDesc requires a HashMap to update props instead of a Map.
HashMap<String, String> mapProp = new HashMap<String, String>();
mapProp.put(ReplicationSpec.KEY.CURR_STATE_ID.toString(), tablesUpdated.get(tableName).toString());
AlterTableDesc alterTblDesc = new AlterTableDesc(AlterTableDesc.AlterTableTypes.ADDPROPS, null, false);
alterTblDesc.setProps(mapProp);
alterTblDesc.setOldName(tableName);
Task<? extends Serializable> updateReplIdTask = TaskFactory.get(new DDLWork(inputs, outputs, alterTblDesc), conf);
taskChainTail.addDependentTask(updateReplIdTask);
taskChainTail = updateReplIdTask;
}
for (String dbName : dbsUpdated.keySet()) {
Map<String, String> mapProp = new HashMap<String, String>();
mapProp.put(ReplicationSpec.KEY.CURR_STATE_ID.toString(), dbsUpdated.get(dbName).toString());
AlterDatabaseDesc alterDbDesc = new AlterDatabaseDesc(dbName, mapProp);
Task<? extends Serializable> updateReplIdTask = TaskFactory.get(new DDLWork(inputs, outputs, alterDbDesc), conf);
taskChainTail.addDependentTask(updateReplIdTask);
taskChainTail = updateReplIdTask;
}
rootTasks.add(evTaskRoot);
}
} catch (Exception e) {
// TODO : simple wrap & rethrow for now, clean up with error codes
throw new SemanticException(e);
}
}
use of org.apache.hadoop.hive.ql.plan.AlterTableDesc in project hive by apache.
the class HCatSemanticAnalyzer method authorizeDDLWork.
@Override
protected void authorizeDDLWork(HiveSemanticAnalyzerHookContext cntxt, Hive hive, DDLWork work) throws HiveException {
// DB opereations, none of them are enforced by Hive right now.
ShowDatabasesDesc showDatabases = work.getShowDatabasesDesc();
if (showDatabases != null) {
authorize(HiveOperation.SHOWDATABASES.getInputRequiredPrivileges(), HiveOperation.SHOWDATABASES.getOutputRequiredPrivileges());
}
DropDatabaseDesc dropDb = work.getDropDatabaseDesc();
if (dropDb != null) {
Database db = cntxt.getHive().getDatabase(dropDb.getDatabaseName());
if (db != null) {
// if above returned a null, then the db does not exist - probably a
// "drop database if exists" clause - don't try to authorize then.
authorize(db, Privilege.DROP);
}
}
DescDatabaseDesc descDb = work.getDescDatabaseDesc();
if (descDb != null) {
Database db = cntxt.getHive().getDatabase(descDb.getDatabaseName());
authorize(db, Privilege.SELECT);
}
SwitchDatabaseDesc switchDb = work.getSwitchDatabaseDesc();
if (switchDb != null) {
Database db = cntxt.getHive().getDatabase(switchDb.getDatabaseName());
authorize(db, Privilege.SELECT);
}
ShowTablesDesc showTables = work.getShowTblsDesc();
if (showTables != null) {
String dbName = showTables.getDbName() == null ? SessionState.get().getCurrentDatabase() : showTables.getDbName();
authorize(cntxt.getHive().getDatabase(dbName), Privilege.SELECT);
}
ShowTableStatusDesc showTableStatus = work.getShowTblStatusDesc();
if (showTableStatus != null) {
String dbName = showTableStatus.getDbName() == null ? SessionState.get().getCurrentDatabase() : showTableStatus.getDbName();
authorize(cntxt.getHive().getDatabase(dbName), Privilege.SELECT);
}
// TODO: add alter database support in HCat
// Table operations.
DropTableDesc dropTable = work.getDropTblDesc();
if (dropTable != null) {
if (dropTable.getPartSpecs() == null) {
// drop table is already enforced by Hive. We only check for table level location even if the
// table is partitioned.
} else {
//this is actually a ALTER TABLE DROP PARITITION statement
for (DropTableDesc.PartSpec partSpec : dropTable.getPartSpecs()) {
// partitions are not added as write entries in drop partitions in Hive
Table table = hive.getTable(SessionState.get().getCurrentDatabase(), dropTable.getTableName());
List<Partition> partitions = null;
try {
partitions = hive.getPartitionsByFilter(table, partSpec.getPartSpec().getExprString());
} catch (Exception e) {
throw new HiveException(e);
}
for (Partition part : partitions) {
authorize(part, Privilege.DROP);
}
}
}
}
AlterTableDesc alterTable = work.getAlterTblDesc();
if (alterTable != null) {
Table table = hive.getTable(SessionState.get().getCurrentDatabase(), Utilities.getDbTableName(alterTable.getOldName())[1], false);
Partition part = null;
if (alterTable.getPartSpec() != null) {
part = hive.getPartition(table, alterTable.getPartSpec(), false);
}
String newLocation = alterTable.getNewLocation();
/* Hcat requires ALTER_DATA privileges for ALTER TABLE LOCATION statements
* for the old table/partition location and the new location.
*/
if (alterTable.getOp() == AlterTableDesc.AlterTableTypes.ALTERLOCATION) {
if (part != null) {
// authorize for the old
authorize(part, Privilege.ALTER_DATA);
// location, and new location
part.setLocation(newLocation);
authorize(part, Privilege.ALTER_DATA);
} else {
// authorize for the old
authorize(table, Privilege.ALTER_DATA);
// location, and new location
table.getTTable().getSd().setLocation(newLocation);
authorize(table, Privilege.ALTER_DATA);
}
}
//other alter operations are already supported by Hive
}
// we should be careful when authorizing table based on just the
// table name. If columns have separate authorization domain, it
// must be honored
DescTableDesc descTable = work.getDescTblDesc();
if (descTable != null) {
String tableName = extractTableName(descTable.getTableName());
authorizeTable(cntxt.getHive(), tableName, Privilege.SELECT);
}
ShowPartitionsDesc showParts = work.getShowPartsDesc();
if (showParts != null) {
String tableName = extractTableName(showParts.getTabName());
authorizeTable(cntxt.getHive(), tableName, Privilege.SELECT);
}
}
use of org.apache.hadoop.hive.ql.plan.AlterTableDesc in project hive by apache.
the class DDLSemanticAnalyzer method analyzeAltertableSkewedby.
/**
* Analyze alter table's skewed table
*
* @param ast
* node
* @throws SemanticException
*/
private void analyzeAltertableSkewedby(String[] qualified, ASTNode ast) throws SemanticException {
/**
* Throw an error if the user tries to use the DDL with
* hive.internal.ddl.list.bucketing.enable set to false.
*/
HiveConf hiveConf = SessionState.get().getConf();
Table tab = getTable(qualified);
inputs.add(new ReadEntity(tab));
outputs.add(new WriteEntity(tab, WriteEntity.WriteType.DDL_EXCLUSIVE));
validateAlterTableType(tab, AlterTableTypes.ADDSKEWEDBY);
String tableName = getDotName(qualified);
if (ast.getChildCount() == 0) {
/* Convert a skewed table to non-skewed table. */
AlterTableDesc alterTblDesc = new AlterTableDesc(tableName, true, new ArrayList<String>(), new ArrayList<List<String>>());
alterTblDesc.setStoredAsSubDirectories(false);
rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), alterTblDesc), conf));
} else {
switch(((ASTNode) ast.getChild(0)).getToken().getType()) {
case HiveParser.TOK_TABLESKEWED:
handleAlterTableSkewedBy(ast, tableName, tab);
break;
case HiveParser.TOK_STOREDASDIRS:
handleAlterTableDisableStoredAsDirs(tableName, tab);
break;
default:
assert false;
}
}
}
use of org.apache.hadoop.hive.ql.plan.AlterTableDesc in project hive by apache.
the class DDLSemanticAnalyzer method analyzeAlterTableSkewedLocation.
/**
* Analyze alter table's skewed location
*
* @param ast
* @param tableName
* @param partSpec
* @throws SemanticException
*/
private void analyzeAlterTableSkewedLocation(ASTNode ast, String tableName, HashMap<String, String> partSpec) throws SemanticException {
/**
* Throw an error if the user tries to use the DDL with
* hive.internal.ddl.list.bucketing.enable set to false.
*/
HiveConf hiveConf = SessionState.get().getConf();
/**
* Retrieve mappings from parser
*/
Map<List<String>, String> locations = new HashMap<List<String>, String>();
ArrayList<Node> locNodes = ast.getChildren();
if (null == locNodes) {
throw new SemanticException(ErrorMsg.ALTER_TBL_SKEWED_LOC_NO_LOC.getMsg());
} else {
for (Node locNode : locNodes) {
// TOK_SKEWED_LOCATIONS
ASTNode locAstNode = (ASTNode) locNode;
ArrayList<Node> locListNodes = locAstNode.getChildren();
if (null == locListNodes) {
throw new SemanticException(ErrorMsg.ALTER_TBL_SKEWED_LOC_NO_LOC.getMsg());
} else {
for (Node locListNode : locListNodes) {
// TOK_SKEWED_LOCATION_LIST
ASTNode locListAstNode = (ASTNode) locListNode;
ArrayList<Node> locMapNodes = locListAstNode.getChildren();
if (null == locMapNodes) {
throw new SemanticException(ErrorMsg.ALTER_TBL_SKEWED_LOC_NO_LOC.getMsg());
} else {
for (Node locMapNode : locMapNodes) {
// TOK_SKEWED_LOCATION_MAP
ASTNode locMapAstNode = (ASTNode) locMapNode;
ArrayList<Node> locMapAstNodeMaps = locMapAstNode.getChildren();
if ((null == locMapAstNodeMaps) || (locMapAstNodeMaps.size() != 2)) {
throw new SemanticException(ErrorMsg.ALTER_TBL_SKEWED_LOC_NO_MAP.getMsg());
} else {
List<String> keyList = new LinkedList<String>();
ASTNode node = (ASTNode) locMapAstNodeMaps.get(0);
if (node.getToken().getType() == HiveParser.TOK_TABCOLVALUES) {
keyList = getSkewedValuesFromASTNode(node);
} else if (isConstant(node)) {
keyList.add(PlanUtils.stripQuotes(node.getText()));
} else {
throw new SemanticException(ErrorMsg.SKEWED_TABLE_NO_COLUMN_VALUE.getMsg());
}
String newLocation = PlanUtils.stripQuotes(unescapeSQLString(((ASTNode) locMapAstNodeMaps.get(1)).getText()));
validateSkewedLocationString(newLocation);
locations.put(keyList, newLocation);
addLocationToOutputs(newLocation);
}
}
}
}
}
}
}
AlterTableDesc alterTblDesc = new AlterTableDesc(tableName, locations, partSpec);
addInputsOutputsAlterTable(tableName, partSpec, alterTblDesc);
rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), alterTblDesc), conf));
}
use of org.apache.hadoop.hive.ql.plan.AlterTableDesc in project hive by apache.
the class DDLSemanticAnalyzer method analyzeAlterTableClusterSort.
private void analyzeAlterTableClusterSort(ASTNode ast, String tableName, HashMap<String, String> partSpec) throws SemanticException {
AlterTableDesc alterTblDesc;
switch(ast.getChild(0).getType()) {
case HiveParser.TOK_NOT_CLUSTERED:
alterTblDesc = new AlterTableDesc(tableName, -1, new ArrayList<String>(), new ArrayList<Order>(), partSpec);
break;
case HiveParser.TOK_NOT_SORTED:
alterTblDesc = new AlterTableDesc(tableName, true, partSpec);
break;
case HiveParser.TOK_ALTERTABLE_BUCKETS:
ASTNode buckets = (ASTNode) ast.getChild(0);
List<String> bucketCols = getColumnNames((ASTNode) buckets.getChild(0));
List<Order> sortCols = new ArrayList<Order>();
int numBuckets = -1;
if (buckets.getChildCount() == 2) {
numBuckets = Integer.parseInt(buckets.getChild(1).getText());
} else {
sortCols = getColumnNamesOrder((ASTNode) buckets.getChild(1));
numBuckets = Integer.parseInt(buckets.getChild(2).getText());
}
if (numBuckets <= 0) {
throw new SemanticException(ErrorMsg.INVALID_BUCKET_NUMBER.getMsg());
}
alterTblDesc = new AlterTableDesc(tableName, numBuckets, bucketCols, sortCols, partSpec);
break;
default:
throw new SemanticException("Invalid operation " + ast.getChild(0).getType());
}
addInputsOutputsAlterTable(tableName, partSpec, alterTblDesc);
rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), alterTblDesc), conf));
}
Aggregations