use of org.apache.hadoop.hive.ql.ddl.DDLTask in project hive by apache.
the class CreateTableHook method postAnalyze.
@Override
public void postAnalyze(HiveSemanticAnalyzerHookContext context, List<Task<?>> rootTasks) throws SemanticException {
if (rootTasks.size() == 0) {
// There will be no DDL task created in case if its CREATE TABLE IF NOT EXISTS
return;
}
Task<?> t = rootTasks.get(rootTasks.size() - 1);
if (!(t instanceof DDLTask)) {
return;
}
DDLTask task = (DDLTask) t;
DDLDesc d = task.getWork().getDDLDesc();
if (!(d instanceof CreateTableDesc)) {
return;
}
CreateTableDesc desc = (CreateTableDesc) d;
Map<String, String> tblProps = desc.getTblProps();
if (tblProps == null) {
// tblProps will be null if user didnt use tblprops in his CREATE
// TABLE cmd.
tblProps = new HashMap<String, String>();
}
// first check if we will allow the user to create table.
String storageHandler = desc.getStorageHandler();
if (StringUtils.isNotEmpty(storageHandler)) {
try {
HiveStorageHandler storageHandlerInst = HCatUtil.getStorageHandler(context.getConf(), desc.getStorageHandler(), desc.getSerName(), desc.getInputFormat(), desc.getOutputFormat());
// Authorization checks are performed by the storageHandler.getAuthorizationProvider(), if
// StorageDelegationAuthorizationProvider is used.
} catch (IOException e) {
throw new SemanticException(e);
}
}
try {
Table table = context.getHive().newTable(desc.getDbTableName());
if (desc.getLocation() != null) {
table.setDataLocation(new Path(desc.getLocation()));
}
if (desc.getStorageHandler() != null) {
table.setProperty(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_STORAGE, desc.getStorageHandler());
}
for (Map.Entry<String, String> prop : tblProps.entrySet()) {
table.setProperty(prop.getKey(), prop.getValue());
}
for (Map.Entry<String, String> prop : desc.getSerdeProps().entrySet()) {
table.setSerdeParam(prop.getKey(), prop.getValue());
}
if (HCatAuthUtil.isAuthorizationEnabled(context.getConf())) {
authorize(table, Privilege.CREATE);
}
} catch (HiveException ex) {
throw new SemanticException(ex);
}
desc.setTblProps(tblProps);
context.getConf().set(HCatConstants.HCAT_CREATE_TBL_NAME, tableName);
}
use of org.apache.hadoop.hive.ql.ddl.DDLTask in project hive by apache.
the class DummyCreateTableHook method postAnalyze.
@Override
public void postAnalyze(HiveSemanticAnalyzerHookContext context, List<Task<?>> rootTasks) throws SemanticException {
CreateTableDesc desc = (CreateTableDesc) ((DDLTask) rootTasks.get(rootTasks.size() - 1)).getWork().getDDLDesc();
Map<String, String> tblProps = desc.getTblProps();
if (tblProps == null) {
tblProps = new HashMap<String, String>();
}
tblProps.put("createdBy", DummyCreateTableHook.class.getName());
tblProps.put("Message", "Open Source rocks!!");
desc.setTblProps(tblProps);
}
use of org.apache.hadoop.hive.ql.ddl.DDLTask in project hive by apache.
the class AcidExportSemanticAnalyzer method analyzeAcidExport.
/**
* See {@link #isAcidExport(ASTNode)}
* 1. create the temp table T
* 2. compile 'insert into T select * from acidTable'
* 3. compile 'export acidTable' (acidTable will be replaced with T during execution)
* 4. create task to drop T
*
* Using a true temp (session level) table means it should not affect replication and the table
* is not visible outside the Session that created for security
*/
private void analyzeAcidExport(ASTNode ast) throws SemanticException {
assert ast != null && ast.getToken() != null && ast.getToken().getType() == HiveParser.TOK_EXPORT;
ASTNode tableTree = (ASTNode) ast.getChild(0);
assert tableTree != null && tableTree.getType() == HiveParser.TOK_TAB;
ASTNode tokRefOrNameExportTable = (ASTNode) tableTree.getChild(0);
Table exportTable = getTargetTable(tokRefOrNameExportTable);
if (exportTable != null && (exportTable.isView() || exportTable.isMaterializedView())) {
throw new SemanticException("Views and Materialized Views can not be exported.");
}
assert AcidUtils.isFullAcidTable(exportTable);
// need to create the table "manually" rather than creating a task since it has to exist to
// compile the insert into T...
// this is db.table
final String newTableName = getTmptTableNameForExport(exportTable);
final TableName newTableNameRef = HiveTableName.of(newTableName);
Map<String, String> tblProps = new HashMap<>();
tblProps.put(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL, Boolean.FALSE.toString());
String location;
// it has the same life cycle as the tmp table
try {
// Generate a unique ID for temp table path.
// This path will be fixed for the life of the temp table.
Path path = new Path(SessionState.getTempTableSpace(conf), UUID.randomUUID().toString());
path = Warehouse.getDnsPath(path, conf);
location = path.toString();
} catch (MetaException err) {
throw new SemanticException("Error while generating temp table path:", err);
}
CreateTableLikeDesc ctlt = new CreateTableLikeDesc(newTableName, false, true, null, null, location, null, null, tblProps, // important so we get an exception on name collision
true, Warehouse.getQualifiedName(exportTable.getTTable()), false);
Table newTable;
try {
ReadEntity dbForTmpTable = new ReadEntity(db.getDatabase(exportTable.getDbName()));
// so the plan knows we are 'reading' this db - locks, security...
inputs.add(dbForTmpTable);
DDLTask createTableTask = (DDLTask) TaskFactory.get(new DDLWork(new HashSet<>(), new HashSet<>(), ctlt), conf);
// above get() doesn't set it
createTableTask.setConf(conf);
Context context = new Context(conf);
createTableTask.initialize(null, null, new TaskQueue(context), context);
createTableTask.execute();
newTable = db.getTable(newTableName);
} catch (HiveException ex) {
throw new SemanticException(ex);
}
// now generate insert statement
// insert into newTableName select * from ts <where partition spec>
StringBuilder rewrittenQueryStr = generateExportQuery(newTable.getPartCols(), tokRefOrNameExportTable, tableTree, newTableName);
ReparseResult rr = parseRewrittenQuery(rewrittenQueryStr, ctx.getCmd());
Context rewrittenCtx = rr.rewrittenCtx;
// it's set in parseRewrittenQuery()
rewrittenCtx.setIsUpdateDeleteMerge(false);
ASTNode rewrittenTree = rr.rewrittenTree;
try {
useSuper = true;
// newTable has to exist at this point to compile
super.analyze(rewrittenTree, rewrittenCtx);
} finally {
useSuper = false;
}
// now we have the rootTasks set up for Insert ... Select
removeStatsTasks(rootTasks);
// now make an ExportTask from temp table
/*analyzeExport() creates TableSpec which in turn tries to build
"public List<Partition> partitions" by looking in the metastore to find Partitions matching
the partition spec in the Export command. These of course don't exist yet since we've not
ran the insert stmt yet!!!!!!!
*/
Task<ExportWork> exportTask = ExportSemanticAnalyzer.analyzeExport(ast, newTableName, db, conf, inputs, outputs);
// Add an alter table task to set transactional props
// do it after populating temp table so that it's written as non-transactional table but
// update props before export so that export archive metadata has these props. This way when
// IMPORT is done for this archive and target table doesn't exist, it will be created as Acid.
Map<String, String> mapProps = new HashMap<>();
mapProps.put(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL, Boolean.TRUE.toString());
AlterTableSetPropertiesDesc alterTblDesc = new AlterTableSetPropertiesDesc(newTableNameRef, null, null, false, mapProps, false, false, null);
addExportTask(rootTasks, exportTask, TaskFactory.get(new DDLWork(getInputs(), getOutputs(), alterTblDesc)));
// Now make a task to drop temp table
// {@link DropTableAnalyzer#analyzeInternal(ASTNode ast)
ReplicationSpec replicationSpec = new ReplicationSpec();
DropTableDesc dropTblDesc = new DropTableDesc(newTableName, false, true, replicationSpec);
Task<DDLWork> dropTask = TaskFactory.get(new DDLWork(new HashSet<>(), new HashSet<>(), dropTblDesc), conf);
exportTask.addDependentTask(dropTask);
markReadEntityForUpdate();
if (ctx.isExplainPlan()) {
try {
// so that "explain" doesn't "leak" tmp tables
// TODO: catalog
db.dropTable(newTable.getDbName(), newTable.getTableName(), true, true, true);
} catch (HiveException ex) {
LOG.warn("Unable to drop " + newTableName + " due to: " + ex.getMessage(), ex);
}
}
}
use of org.apache.hadoop.hive.ql.ddl.DDLTask in project hive by apache.
the class TestConditionalResolverCommonJoin method testResolvingDriverAlias.
@Test
public void testResolvingDriverAlias() throws Exception {
ConditionalResolverCommonJoin resolver = new ConditionalResolverCommonJoin();
Map<Path, List<String>> pathToAliases = new HashMap<>();
pathToAliases.put(new Path("path1"), new ArrayList<String>(Arrays.asList("alias1", "alias2")));
pathToAliases.put(new Path("path2"), new ArrayList<String>(Arrays.asList("alias3")));
HashMap<String, Long> aliasToKnownSize = new HashMap<String, Long>();
aliasToKnownSize.put("alias1", 1024l);
aliasToKnownSize.put("alias2", 2048l);
aliasToKnownSize.put("alias3", 4096l);
DDLTask task1 = new DDLTask();
task1.setId("alias2");
DDLTask task2 = new DDLTask();
task2.setId("alias3");
// joins alias1, alias2, alias3 (alias1 was not eligible for big pos)
// Must be deterministic order map for consistent q-test output across Java versions
HashMap<Task<?>, Set<String>> taskToAliases = new LinkedHashMap<Task<?>, Set<String>>();
taskToAliases.put(task1, new HashSet<String>(Arrays.asList("alias2")));
taskToAliases.put(task2, new HashSet<String>(Arrays.asList("alias3")));
ConditionalResolverCommonJoin.ConditionalResolverCommonJoinCtx ctx = new ConditionalResolverCommonJoin.ConditionalResolverCommonJoinCtx();
ctx.setPathToAliases(pathToAliases);
ctx.setTaskToAliases(taskToAliases);
ctx.setAliasToKnownSize(aliasToKnownSize);
HiveConf conf = new HiveConf();
conf.setLongVar(HiveConf.ConfVars.HIVESMALLTABLESFILESIZE, 4096);
// alias3 only can be selected
Task resolved = resolver.resolveMapJoinTask(ctx, conf);
Assert.assertEquals("alias3", resolved.getId());
conf.setLongVar(HiveConf.ConfVars.HIVESMALLTABLESFILESIZE, 65536);
// alias1, alias2, alias3 all can be selected but overriden by biggest one (alias3)
resolved = resolver.resolveMapJoinTask(ctx, conf);
Assert.assertEquals("alias3", resolved.getId());
conf.setLongVar(HiveConf.ConfVars.HIVESMALLTABLESFILESIZE, 2048);
// not selected
resolved = resolver.resolveMapJoinTask(ctx, conf);
Assert.assertNull(resolved);
}
use of org.apache.hadoop.hive.ql.ddl.DDLTask in project hive by apache.
the class TaskCompiler method patchUpAfterCTASorMaterializedView.
private void patchUpAfterCTASorMaterializedView(List<Task<?>> rootTasks, Set<ReadEntity> inputs, Set<WriteEntity> outputs, Task<?> createTask, boolean createTaskAfterMoveTask) {
// clear the mapredWork output file from outputs for CTAS
// DDLWork at the tail of the chain will have the output
Iterator<WriteEntity> outIter = outputs.iterator();
while (outIter.hasNext()) {
switch(outIter.next().getType()) {
case DFS_DIR:
case LOCAL_DIR:
outIter.remove();
break;
default:
break;
}
}
// find all leaf tasks and make the DDLTask as a dependent task on all of them
Set<Task<?>> leaves = new LinkedHashSet<>();
getLeafTasks(rootTasks, leaves);
assert (leaves.size() > 0);
// Target task is supposed to be the last task
Task<?> targetTask = createTask;
for (Task<?> task : leaves) {
if (task instanceof StatsTask) {
// StatsTask require table to already exist
for (Task<?> parentOfStatsTask : task.getParentTasks()) {
if (parentOfStatsTask instanceof MoveTask && !createTaskAfterMoveTask) {
// For partitioned CTAS, we need to create the table before the move task
// as we need to create the partitions in metastore and for that we should
// have already registered the table
interleaveTask(parentOfStatsTask, createTask);
} else {
parentOfStatsTask.addDependentTask(createTask);
}
}
for (Task<?> parentOfCrtTblTask : createTask.getParentTasks()) {
parentOfCrtTblTask.removeDependentTask(task);
}
createTask.addDependentTask(task);
targetTask = task;
} else if (task instanceof MoveTask && !createTaskAfterMoveTask) {
// For partitioned CTAS, we need to create the table before the move task
// as we need to create the partitions in metastore and for that we should
// have already registered the table
interleaveTask(task, createTask);
targetTask = task;
} else {
task.addDependentTask(createTask);
}
}
// Add task to insert / delete materialized view from registry if needed
if (createTask instanceof DDLTask) {
DDLTask ddlTask = (DDLTask) createTask;
DDLWork work = ddlTask.getWork();
DDLDesc desc = work.getDDLDesc();
if (desc instanceof CreateMaterializedViewDesc) {
CreateMaterializedViewDesc createViewDesc = (CreateMaterializedViewDesc) desc;
String tableName = createViewDesc.getViewName();
boolean retrieveAndInclude = createViewDesc.isRewriteEnabled();
MaterializedViewUpdateDesc materializedViewUpdateDesc = new MaterializedViewUpdateDesc(tableName, retrieveAndInclude, false, false);
DDLWork ddlWork = new DDLWork(inputs, outputs, materializedViewUpdateDesc);
targetTask.addDependentTask(TaskFactory.get(ddlWork, conf));
} else if (desc instanceof AlterMaterializedViewRewriteDesc) {
AlterMaterializedViewRewriteDesc alterMVRewriteDesc = (AlterMaterializedViewRewriteDesc) desc;
String tableName = alterMVRewriteDesc.getMaterializedViewName();
boolean retrieveAndInclude = alterMVRewriteDesc.isRewriteEnable();
boolean disableRewrite = !alterMVRewriteDesc.isRewriteEnable();
MaterializedViewUpdateDesc materializedViewUpdateDesc = new MaterializedViewUpdateDesc(tableName, retrieveAndInclude, disableRewrite, false);
DDLWork ddlWork = new DDLWork(inputs, outputs, materializedViewUpdateDesc);
targetTask.addDependentTask(TaskFactory.get(ddlWork, conf));
}
}
}
Aggregations