use of org.apache.hadoop.hive.ql.ddl.view.materialized.update.MaterializedViewUpdateDesc in project hive by apache.
the class TaskCompiler method compile.
@SuppressWarnings("nls")
public void compile(final ParseContext pCtx, final List<Task<?>> rootTasks, final Set<ReadEntity> inputs, final Set<WriteEntity> outputs) throws SemanticException {
Context ctx = pCtx.getContext();
GlobalLimitCtx globalLimitCtx = pCtx.getGlobalLimitCtx();
List<Task<MoveWork>> mvTask = new ArrayList<>();
List<LoadTableDesc> loadTableWork = pCtx.getLoadTableWork();
List<LoadFileDesc> loadFileWork = pCtx.getLoadFileWork();
boolean isCStats = pCtx.getQueryProperties().isAnalyzeRewrite();
int outerQueryLimit = pCtx.getQueryProperties().getOuterQueryLimit();
boolean directInsertCtas = false;
if (pCtx.getCreateTable() != null && pCtx.getCreateTable().getStorageHandler() != null) {
try {
directInsertCtas = HiveUtils.getStorageHandler(conf, pCtx.getCreateTable().getStorageHandler()).directInsertCTAS();
} catch (HiveException e) {
throw new SemanticException("Failed to load storage handler: " + e.getMessage());
}
}
if (pCtx.getFetchTask() != null) {
if (pCtx.getFetchTask().getTblDesc() == null) {
return;
}
pCtx.getFetchTask().getWork().setHiveServerQuery(SessionState.get().isHiveServerQuery());
TableDesc resultTab = pCtx.getFetchTask().getTblDesc();
// then either the ThriftFormatter or the DefaultFetchFormatter should be used.
if (!resultTab.getSerdeClassName().equalsIgnoreCase(ThriftJDBCBinarySerDe.class.getName())) {
if (SessionState.get().isHiveServerQuery()) {
conf.set(SerDeUtils.LIST_SINK_OUTPUT_FORMATTER, ThriftFormatter.class.getName());
} else {
String formatterName = conf.get(SerDeUtils.LIST_SINK_OUTPUT_FORMATTER);
if (formatterName == null || formatterName.isEmpty()) {
conf.set(SerDeUtils.LIST_SINK_OUTPUT_FORMATTER, DefaultFetchFormatter.class.getName());
}
}
}
return;
}
if (!pCtx.getQueryProperties().isAnalyzeCommand()) {
LOG.debug("Skipping optimize operator plan for analyze command.");
optimizeOperatorPlan(pCtx);
}
/*
* In case of a select, use a fetch task instead of a move task.
* If the select is from analyze table column rewrite, don't create a fetch task. Instead create
* a column stats task later.
*/
if (pCtx.getQueryProperties().isQuery() && !isCStats) {
if ((!loadTableWork.isEmpty()) || (loadFileWork.size() != 1)) {
throw new SemanticException(ErrorMsg.INVALID_LOAD_TABLE_FILE_WORK.getMsg());
}
LoadFileDesc loadFileDesc = loadFileWork.get(0);
String cols = loadFileDesc.getColumns();
String colTypes = loadFileDesc.getColumnTypes();
TableDesc resultTab = pCtx.getFetchTableDesc();
boolean shouldSetOutputFormatter = false;
if (resultTab == null) {
ResultFileFormat resFileFormat = conf.getResultFileFormat();
String fileFormat;
Class<? extends Deserializer> serdeClass;
if (SessionState.get().getIsUsingThriftJDBCBinarySerDe() && resFileFormat == ResultFileFormat.SEQUENCEFILE) {
fileFormat = resFileFormat.toString();
serdeClass = ThriftJDBCBinarySerDe.class;
shouldSetOutputFormatter = true;
} else if (resFileFormat == ResultFileFormat.SEQUENCEFILE) {
// file format is changed so that IF file sink provides list of files to fetch from (instead
// of whole directory) list status is done on files (which is what HiveSequenceFileInputFormat does)
fileFormat = "HiveSequenceFile";
serdeClass = LazySimpleSerDe.class;
} else {
// All other cases we use the defined file format and LazySimpleSerde
fileFormat = resFileFormat.toString();
serdeClass = LazySimpleSerDe.class;
}
resultTab = PlanUtils.getDefaultQueryOutputTableDesc(cols, colTypes, fileFormat, serdeClass);
} else {
shouldSetOutputFormatter = resultTab.getProperties().getProperty(serdeConstants.SERIALIZATION_LIB).equalsIgnoreCase(ThriftJDBCBinarySerDe.class.getName());
}
if (shouldSetOutputFormatter) {
// Set the fetch formatter to be a no-op for the ListSinkOperator, since we will
// read formatted thrift objects from the output SequenceFile written by Tasks.
conf.set(SerDeUtils.LIST_SINK_OUTPUT_FORMATTER, NoOpFetchFormatter.class.getName());
}
FetchWork fetch = new FetchWork(loadFileDesc.getSourcePath(), resultTab, outerQueryLimit);
boolean isHiveServerQuery = SessionState.get().isHiveServerQuery();
fetch.setHiveServerQuery(isHiveServerQuery);
fetch.setSource(pCtx.getFetchSource());
fetch.setSink(pCtx.getFetchSink());
if (isHiveServerQuery && null != resultTab && resultTab.getSerdeClassName().equalsIgnoreCase(ThriftJDBCBinarySerDe.class.getName()) && HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_SERVER2_THRIFT_RESULTSET_SERIALIZE_IN_TASKS)) {
fetch.setIsUsingThriftJDBCBinarySerDe(true);
} else {
fetch.setIsUsingThriftJDBCBinarySerDe(false);
}
// The idea here is to keep an object reference both in FileSink and in FetchTask for list of files
// to be fetched. During Job close file sink will populate the list and fetch task later will use it
// to fetch the results.
Collection<Operator<?>> tableScanOps = Lists.<Operator<?>>newArrayList(pCtx.getTopOps().values());
Set<FileSinkOperator> fsOps = OperatorUtils.findOperators(tableScanOps, FileSinkOperator.class);
if (fsOps != null && fsOps.size() == 1) {
FileSinkOperator op = fsOps.iterator().next();
Set<FileStatus> filesToFetch = new HashSet<>();
op.getConf().setFilesToFetch(filesToFetch);
fetch.setFilesToFetch(filesToFetch);
}
pCtx.setFetchTask((FetchTask) TaskFactory.get(fetch));
// For the FetchTask, the limit optimization requires we fetch all the rows
// in memory and count how many rows we get. It's not practical if the
// limit factor is too big
int fetchLimit = HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVELIMITOPTMAXFETCH);
if (globalLimitCtx.isEnable() && globalLimitCtx.getGlobalLimit() > fetchLimit) {
LOG.info("For FetchTask, LIMIT " + globalLimitCtx.getGlobalLimit() + " > " + fetchLimit + ". Doesn't qualify limit optimization.");
globalLimitCtx.disableOpt();
}
if (outerQueryLimit == 0) {
// Believe it or not, some tools do generate queries with limit 0 and than expect
// query to run quickly. Lets meet their requirement.
LOG.info("Limit 0. No query execution needed.");
return;
}
} else if (!isCStats) {
for (LoadTableDesc ltd : loadTableWork) {
Task<MoveWork> tsk = TaskFactory.get(new MoveWork(null, null, ltd, null, false));
mvTask.add(tsk);
}
boolean oneLoadFileForCtas = true;
for (LoadFileDesc lfd : loadFileWork) {
if (pCtx.getQueryProperties().isCTAS() || pCtx.getQueryProperties().isMaterializedView()) {
if (!oneLoadFileForCtas) {
// should not have more than 1 load file for CTAS.
throw new SemanticException("One query is not expected to contain multiple CTAS loads statements");
}
setLoadFileLocation(pCtx, lfd);
oneLoadFileForCtas = false;
}
mvTask.add(TaskFactory.get(new MoveWork(null, null, null, lfd, false)));
}
}
generateTaskTree(rootTasks, pCtx, mvTask, inputs, outputs);
// For each task, set the key descriptor for the reducer
for (Task<?> rootTask : rootTasks) {
GenMapRedUtils.setKeyAndValueDescForTaskTree(rootTask);
}
// to be used, please do so
for (Task<?> rootTask : rootTasks) {
setInputFormat(rootTask);
}
optimizeTaskPlan(rootTasks, pCtx, ctx);
/*
* If the query was the result of analyze table column compute statistics rewrite, create
* a column stats task instead of a fetch task to persist stats to the metastore.
* As per HIVE-15903, we will also collect table stats when user computes column stats.
* That means, if isCStats || !pCtx.getColumnStatsAutoGatherContexts().isEmpty()
* We need to collect table stats
* if isCStats, we need to include a basic stats task
* else it is ColumnStatsAutoGather, which should have a move task with a stats task already.
*/
if (isCStats || !pCtx.getColumnStatsAutoGatherContexts().isEmpty()) {
// map from tablename to task (ColumnStatsTask which includes a BasicStatsTask)
Map<String, StatsTask> map = new LinkedHashMap<>();
if (isCStats) {
if (rootTasks == null || rootTasks.size() != 1 || pCtx.getTopOps() == null || pCtx.getTopOps().size() != 1) {
throw new SemanticException("Can not find correct root task!");
}
try {
Task<?> root = rootTasks.iterator().next();
StatsTask tsk = (StatsTask) genTableStats(pCtx, pCtx.getTopOps().values().iterator().next(), root, outputs);
root.addDependentTask(tsk);
map.put(extractTableFullName(tsk), tsk);
} catch (HiveException e) {
throw new SemanticException(e);
}
genColumnStatsTask(pCtx.getAnalyzeRewrite(), loadFileWork, map, outerQueryLimit, 0);
} else {
Set<Task<?>> leafTasks = new LinkedHashSet<Task<?>>();
getLeafTasks(rootTasks, leafTasks);
List<Task<?>> nonStatsLeafTasks = new ArrayList<>();
for (Task<?> tsk : leafTasks) {
// map table name to the correct ColumnStatsTask
if (tsk instanceof StatsTask) {
map.put(extractTableFullName((StatsTask) tsk), (StatsTask) tsk);
} else {
nonStatsLeafTasks.add(tsk);
}
}
// add cStatsTask as a dependent of all the nonStatsLeafTasks
for (Task<?> tsk : nonStatsLeafTasks) {
for (Task<?> cStatsTask : map.values()) {
tsk.addDependentTask(cStatsTask);
}
}
for (ColumnStatsAutoGatherContext columnStatsAutoGatherContext : pCtx.getColumnStatsAutoGatherContexts()) {
if (!columnStatsAutoGatherContext.isInsertInto()) {
genColumnStatsTask(columnStatsAutoGatherContext.getAnalyzeRewrite(), columnStatsAutoGatherContext.getLoadFileWork(), map, outerQueryLimit, 0);
} else {
int numBitVector;
try {
numBitVector = HiveStatsUtils.getNumBitVectorsForNDVEstimation(conf);
} catch (Exception e) {
throw new SemanticException(e.getMessage());
}
genColumnStatsTask(columnStatsAutoGatherContext.getAnalyzeRewrite(), columnStatsAutoGatherContext.getLoadFileWork(), map, outerQueryLimit, numBitVector);
}
}
}
}
decideExecMode(rootTasks, ctx, globalLimitCtx);
// ahead of time by the non-native table
if (pCtx.getQueryProperties().isCTAS() && !pCtx.getCreateTable().isMaterialization() && !directInsertCtas) {
// generate a DDL task and make it a dependent task of the leaf
CreateTableDesc crtTblDesc = pCtx.getCreateTable();
crtTblDesc.validate(conf);
Task<?> crtTblTask = TaskFactory.get(new DDLWork(inputs, outputs, crtTblDesc));
patchUpAfterCTASorMaterializedView(rootTasks, inputs, outputs, crtTblTask, CollectionUtils.isEmpty(crtTblDesc.getPartColNames()));
} else if (pCtx.getQueryProperties().isMaterializedView()) {
// generate a DDL task and make it a dependent task of the leaf
CreateMaterializedViewDesc viewDesc = pCtx.getCreateViewDesc();
Task<?> crtViewTask = TaskFactory.get(new DDLWork(inputs, outputs, viewDesc));
patchUpAfterCTASorMaterializedView(rootTasks, inputs, outputs, crtViewTask, CollectionUtils.isEmpty(viewDesc.getPartColNames()));
} else if (pCtx.getMaterializedViewUpdateDesc() != null) {
// If there is a materialized view update desc, we create introduce it at the end
// of the tree.
MaterializedViewUpdateDesc materializedViewDesc = pCtx.getMaterializedViewUpdateDesc();
DDLWork ddlWork = new DDLWork(inputs, outputs, materializedViewDesc);
Set<Task<?>> leafTasks = new LinkedHashSet<Task<?>>();
getLeafTasks(rootTasks, leafTasks);
Task<?> materializedViewTask = TaskFactory.get(ddlWork, conf);
for (Task<?> task : leafTasks) {
task.addDependentTask(materializedViewTask);
}
}
if (globalLimitCtx.isEnable() && pCtx.getFetchTask() != null) {
LOG.info("set least row check for FetchTask: " + globalLimitCtx.getGlobalLimit());
pCtx.getFetchTask().getWork().setLeastNumRows(globalLimitCtx.getGlobalLimit());
}
if (globalLimitCtx.isEnable() && globalLimitCtx.getLastReduceLimitDesc() != null) {
LOG.info("set least row check for LimitDesc: " + globalLimitCtx.getGlobalLimit());
globalLimitCtx.getLastReduceLimitDesc().setLeastRows(globalLimitCtx.getGlobalLimit());
}
Interner<TableDesc> interner = Interners.newStrongInterner();
// Perform Final chores on generated Map works
// 1. Intern the table descriptors
// 2. Derive final explain attributes based on previous compilation.
GenMapRedUtils.finalMapWorkChores(rootTasks, pCtx.getConf(), interner);
}
use of org.apache.hadoop.hive.ql.ddl.view.materialized.update.MaterializedViewUpdateDesc in project hive by apache.
the class SemanticAnalyzer method genFileSinkPlan.
@SuppressWarnings("nls")
protected Operator genFileSinkPlan(String dest, QB qb, Operator input) throws SemanticException {
RowResolver inputRR = opParseCtx.get(input).getRowResolver();
QBMetaData qbm = qb.getMetaData();
Integer destType = qbm.getDestTypeForAlias(dest);
// destination table if any
Table destinationTable = null;
// true for full ACID table and MM table
boolean destTableIsTransactional;
// should the destination table be written to using ACID
boolean destTableIsFullAcid;
// should we add files directly to the final path
boolean isDirectInsert = false;
AcidUtils.Operation acidOperation = null;
boolean destTableIsTemporary = false;
boolean destTableIsMaterialization = false;
// destination partition if any
Partition destinationPartition = null;
// the intermediate destination directory
Path queryTmpdir = null;
String moveTaskId = null;
// the final destination directory
Path destinationPath = null;
TableDesc tableDescriptor = null;
StructObjectInspector specificRowObjectInspector = null;
int currentTableId = 0;
boolean isLocal = false;
SortBucketRSCtx rsCtx = new SortBucketRSCtx();
DynamicPartitionCtx dpCtx = null;
LoadTableDesc ltd = null;
ListBucketingCtx lbCtx = null;
Map<String, String> partSpec = null;
boolean isMmTable = false, isMmCreate = false, isNonNativeTable = false;
Long writeId = null;
HiveTxnManager txnMgr = getTxnMgr();
switch(destType.intValue()) {
case QBMetaData.DEST_TABLE:
{
destinationTable = qbm.getDestTableForAlias(dest);
destTableIsTransactional = AcidUtils.isTransactionalTable(destinationTable);
destTableIsFullAcid = AcidUtils.isFullAcidTable(destinationTable);
destTableIsTemporary = destinationTable.isTemporary();
// Is the user trying to insert into a external tables
checkExternalTable(destinationTable);
partSpec = qbm.getPartSpecForAlias(dest);
destinationPath = destinationTable.getPath();
checkImmutableTable(qb, destinationTable, destinationPath, false);
// check for partition
List<FieldSchema> parts = destinationTable.getPartitionKeys();
if (parts != null && parts.size() > 0) {
// table is partitioned
if (partSpec == null || partSpec.size() == 0) {
// user did NOT specify partition
throw new SemanticException(generateErrorMessage(qb.getParseInfo().getDestForClause(dest), ErrorMsg.NEED_PARTITION_ERROR.getMsg()));
}
dpCtx = qbm.getDPCtx(dest);
if (dpCtx == null) {
destinationTable.validatePartColumnNames(partSpec, false);
dpCtx = new DynamicPartitionCtx(partSpec, conf.getVar(HiveConf.ConfVars.DEFAULTPARTITIONNAME), conf.getIntVar(HiveConf.ConfVars.DYNAMICPARTITIONMAXPARTSPERNODE));
qbm.setDPCtx(dest, dpCtx);
}
}
// Check for dynamic partitions.
dpCtx = checkDynPart(qb, qbm, destinationTable, partSpec, dest);
if (dpCtx != null && dpCtx.getSPPath() != null) {
destinationPath = new Path(destinationTable.getPath(), dpCtx.getSPPath());
}
isNonNativeTable = destinationTable.isNonNative();
isMmTable = AcidUtils.isInsertOnlyTable(destinationTable.getParameters());
AcidUtils.Operation acidOp = AcidUtils.Operation.NOT_ACID;
// this table_desc does not contain the partitioning columns
tableDescriptor = Utilities.getTableDesc(destinationTable);
if (!isNonNativeTable) {
if (destTableIsTransactional) {
acidOp = getAcidType(tableDescriptor.getOutputFileFormatClass(), dest, isMmTable);
}
}
isDirectInsert = isDirectInsert(destTableIsFullAcid, acidOp);
acidOperation = acidOp;
queryTmpdir = getTmpDir(isNonNativeTable, isMmTable, isDirectInsert, destinationPath);
moveTaskId = getMoveTaskId();
if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) {
Utilities.FILE_OP_LOGGER.trace("create filesink w/DEST_TABLE specifying " + queryTmpdir + " from " + destinationPath);
}
if (dpCtx != null) {
// set the root of the temporary path where dynamic partition columns will populate
dpCtx.setRootPath(queryTmpdir);
}
// Add NOT NULL constraint check
input = genConstraintsPlan(dest, qb, input);
if (!qb.getIsQuery()) {
input = genConversionSelectOperator(dest, qb, input, destinationTable.getDeserializer(), dpCtx, parts);
}
if (destinationTable.isMaterializedView() && mvRebuildMode == MaterializationRebuildMode.INSERT_OVERWRITE_REBUILD) {
// Data organization (DISTRIBUTED, SORTED, CLUSTERED) for materialized view
// TODO: We only do this for a full rebuild
String sortColsStr = destinationTable.getProperty(Constants.MATERIALIZED_VIEW_SORT_COLUMNS);
String distributeColsStr = destinationTable.getProperty(Constants.MATERIALIZED_VIEW_DISTRIBUTE_COLUMNS);
if (sortColsStr != null || distributeColsStr != null) {
input = genMaterializedViewDataOrgPlan(destinationTable, sortColsStr, distributeColsStr, inputRR, input);
}
} else {
// Add sorting/bucketing if needed
input = genBucketingSortingDest(dest, input, qb, tableDescriptor, destinationTable, rsCtx);
}
idToTableNameMap.put(String.valueOf(destTableId), destinationTable.getTableName());
currentTableId = destTableId;
destTableId++;
// NOTE: specify Dynamic partitions in dest_tab for WriteEntity
if (!isNonNativeTable || destinationTable.getStorageHandler().commitInMoveTask()) {
if (destTableIsTransactional) {
acidOp = getAcidType(tableDescriptor.getOutputFileFormatClass(), dest, isMmTable);
checkAcidConstraints();
} else {
lbCtx = constructListBucketingCtx(destinationTable.getSkewedColNames(), destinationTable.getSkewedColValues(), destinationTable.getSkewedColValueLocationMaps(), destinationTable.isStoredAsSubDirectories());
}
try {
if (ctx.getExplainConfig() != null) {
// For explain plan, txn won't be opened and doesn't make sense to allocate write id
writeId = null;
} else {
if (isMmTable) {
writeId = txnMgr.getTableWriteId(destinationTable.getDbName(), destinationTable.getTableName());
} else {
writeId = acidOp == Operation.NOT_ACID ? null : txnMgr.getTableWriteId(destinationTable.getDbName(), destinationTable.getTableName());
}
}
} catch (LockException ex) {
throw new SemanticException("Failed to allocate write Id", ex);
}
boolean isReplace = !qb.getParseInfo().isInsertIntoTable(destinationTable.getDbName(), destinationTable.getTableName());
ltd = new LoadTableDesc(queryTmpdir, tableDescriptor, dpCtx, acidOp, isReplace, writeId);
if (writeId != null) {
ltd.setStmtId(txnMgr.getCurrentStmtId());
}
ltd.setMoveTaskId(moveTaskId);
// For Acid table, Insert Overwrite shouldn't replace the table content. We keep the old
// deltas and base and leave them up to the cleaner to clean up
boolean isInsertInto = qb.getParseInfo().isInsertIntoTable(destinationTable.getDbName(), destinationTable.getTableName());
LoadFileType loadType;
if (isDirectInsert) {
loadType = LoadFileType.IGNORE;
} else if (!isInsertInto && !destTableIsTransactional) {
loadType = LoadFileType.REPLACE_ALL;
} else {
loadType = LoadFileType.KEEP_EXISTING;
}
ltd.setLoadFileType(loadType);
ltd.setInsertOverwrite(!isInsertInto);
ltd.setIsDirectInsert(isDirectInsert);
ltd.setLbCtx(lbCtx);
loadTableWork.add(ltd);
} else {
// This is a non-native table.
// We need to set stats as inaccurate.
setStatsForNonNativeTable(destinationTable.getDbName(), destinationTable.getTableName());
// true if it is insert overwrite.
boolean overwrite = !qb.getParseInfo().isInsertIntoTable(String.format("%s.%s", destinationTable.getDbName(), destinationTable.getTableName()));
createPreInsertDesc(destinationTable, overwrite);
ltd = new LoadTableDesc(queryTmpdir, tableDescriptor, partSpec == null ? ImmutableMap.of() : partSpec);
ltd.setInsertOverwrite(overwrite);
ltd.setLoadFileType(overwrite ? LoadFileType.REPLACE_ALL : LoadFileType.KEEP_EXISTING);
}
if (destinationTable.isMaterializedView()) {
materializedViewUpdateDesc = new MaterializedViewUpdateDesc(destinationTable.getFullyQualifiedName(), false, false, true);
}
WriteEntity output = generateTableWriteEntity(dest, destinationTable, partSpec, ltd, dpCtx);
ctx.getLoadTableOutputMap().put(ltd, output);
break;
}
case QBMetaData.DEST_PARTITION:
{
destinationPartition = qbm.getDestPartitionForAlias(dest);
destinationTable = destinationPartition.getTable();
destTableIsTransactional = AcidUtils.isTransactionalTable(destinationTable);
destTableIsFullAcid = AcidUtils.isFullAcidTable(destinationTable);
checkExternalTable(destinationTable);
Path partPath = destinationPartition.getDataLocation();
checkImmutableTable(qb, destinationTable, partPath, true);
// Previous behavior (HIVE-1707) used to replace the partition's dfs with the table's dfs.
// The changes in HIVE-19891 appears to no longer support that behavior.
destinationPath = partPath;
if (MetaStoreUtils.isArchived(destinationPartition.getTPartition())) {
try {
String conflictingArchive = ArchiveUtils.conflictingArchiveNameOrNull(db, destinationTable, destinationPartition.getSpec());
String message = String.format("Insert conflict with existing archive: %s", conflictingArchive);
throw new SemanticException(message);
} catch (SemanticException err) {
throw err;
} catch (HiveException err) {
throw new SemanticException(err);
}
}
isNonNativeTable = destinationTable.isNonNative();
isMmTable = AcidUtils.isInsertOnlyTable(destinationTable.getParameters());
AcidUtils.Operation acidOp = AcidUtils.Operation.NOT_ACID;
// this table_desc does not contain the partitioning columns
tableDescriptor = Utilities.getTableDesc(destinationTable);
if (!isNonNativeTable) {
if (destTableIsTransactional) {
acidOp = getAcidType(tableDescriptor.getOutputFileFormatClass(), dest, isMmTable);
}
}
isDirectInsert = isDirectInsert(destTableIsFullAcid, acidOp);
acidOperation = acidOp;
queryTmpdir = getTmpDir(isNonNativeTable, isMmTable, isDirectInsert, destinationPath);
moveTaskId = getMoveTaskId();
if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) {
Utilities.FILE_OP_LOGGER.trace("create filesink w/DEST_PARTITION specifying " + queryTmpdir + " from " + destinationPath);
}
// Add NOT NULL constraint check
input = genConstraintsPlan(dest, qb, input);
if (!qb.getIsQuery()) {
input = genConversionSelectOperator(dest, qb, input, destinationTable.getDeserializer(), dpCtx, null);
}
if (destinationTable.isMaterializedView() && mvRebuildMode == MaterializationRebuildMode.INSERT_OVERWRITE_REBUILD) {
// Data organization (DISTRIBUTED, SORTED, CLUSTERED) for materialized view
// TODO: We only do this for a full rebuild
String sortColsStr = destinationTable.getProperty(Constants.MATERIALIZED_VIEW_SORT_COLUMNS);
String distributeColsStr = destinationTable.getProperty(Constants.MATERIALIZED_VIEW_DISTRIBUTE_COLUMNS);
if (sortColsStr != null || distributeColsStr != null) {
input = genMaterializedViewDataOrgPlan(destinationTable, sortColsStr, distributeColsStr, inputRR, input);
}
} else {
// Add sorting/bucketing if needed
input = genBucketingSortingDest(dest, input, qb, tableDescriptor, destinationTable, rsCtx);
}
idToTableNameMap.put(String.valueOf(destTableId), destinationTable.getTableName());
currentTableId = destTableId;
destTableId++;
if (destTableIsTransactional) {
acidOp = getAcidType(tableDescriptor.getOutputFileFormatClass(), dest, isMmTable);
checkAcidConstraints();
} else {
// Transactional tables can't be list bucketed or have skewed cols
lbCtx = constructListBucketingCtx(destinationPartition.getSkewedColNames(), destinationPartition.getSkewedColValues(), destinationPartition.getSkewedColValueLocationMaps(), destinationPartition.isStoredAsSubDirectories());
}
try {
if (ctx.getExplainConfig() != null) {
// For explain plan, txn won't be opened and doesn't make sense to allocate write id
writeId = null;
} else {
if (isMmTable) {
writeId = txnMgr.getTableWriteId(destinationTable.getDbName(), destinationTable.getTableName());
} else {
writeId = (acidOp == Operation.NOT_ACID) ? null : txnMgr.getTableWriteId(destinationTable.getDbName(), destinationTable.getTableName());
}
}
} catch (LockException ex) {
throw new SemanticException("Failed to allocate write Id", ex);
}
ltd = new LoadTableDesc(queryTmpdir, tableDescriptor, destinationPartition.getSpec(), acidOp, writeId);
if (writeId != null) {
ltd.setStmtId(txnMgr.getCurrentStmtId());
}
// For the current context for generating File Sink Operator, it is either INSERT INTO or INSERT OVERWRITE.
// So the next line works.
boolean isInsertInto = !qb.getParseInfo().isDestToOpTypeInsertOverwrite(dest);
// For Acid table, Insert Overwrite shouldn't replace the table content. We keep the old
// deltas and base and leave them up to the cleaner to clean up
LoadFileType loadType;
if (isDirectInsert) {
loadType = LoadFileType.IGNORE;
} else if (!isInsertInto && !destTableIsTransactional) {
loadType = LoadFileType.REPLACE_ALL;
} else {
loadType = LoadFileType.KEEP_EXISTING;
}
ltd.setLoadFileType(loadType);
ltd.setInsertOverwrite(!isInsertInto);
ltd.setIsDirectInsert(isDirectInsert);
ltd.setLbCtx(lbCtx);
ltd.setMoveTaskId(moveTaskId);
loadTableWork.add(ltd);
if (!outputs.add(new WriteEntity(destinationPartition, determineWriteType(ltd, dest)))) {
throw new SemanticException(ErrorMsg.OUTPUT_SPECIFIED_MULTIPLE_TIMES.getMsg(destinationTable.getTableName() + "@" + destinationPartition.getName()));
}
break;
}
case QBMetaData.DEST_LOCAL_FILE:
isLocal = true;
// fall through
case QBMetaData.DEST_DFS_FILE:
{
destinationPath = getDestinationFilePath(qbm.getDestFileForAlias(dest), isMmTable);
// CTAS case: the file output format and serde are defined by the create
// table command rather than taking the default value
List<FieldSchema> fieldSchemas = null;
List<FieldSchema> partitionColumns = null;
List<String> partitionColumnNames = null;
List<FieldSchema> sortColumns = null;
List<String> sortColumnNames = null;
List<FieldSchema> distributeColumns = null;
List<String> distributeColumnNames = null;
List<ColumnInfo> fileSinkColInfos = null;
List<ColumnInfo> sortColInfos = null;
List<ColumnInfo> distributeColInfos = null;
TableName tableName = null;
Map<String, String> tblProps = null;
CreateTableDesc tblDesc = qb.getTableDesc();
CreateMaterializedViewDesc viewDesc = qb.getViewDesc();
if (tblDesc != null) {
fieldSchemas = new ArrayList<>();
partitionColumns = new ArrayList<>();
partitionColumnNames = tblDesc.getPartColNames();
fileSinkColInfos = new ArrayList<>();
destTableIsTemporary = tblDesc.isTemporary();
destTableIsMaterialization = tblDesc.isMaterialization();
tableName = TableName.fromString(tblDesc.getDbTableName(), null, tblDesc.getDatabaseName());
tblProps = tblDesc.getTblProps();
} else if (viewDesc != null) {
fieldSchemas = new ArrayList<>();
partitionColumns = new ArrayList<>();
partitionColumnNames = viewDesc.getPartColNames();
sortColumns = new ArrayList<>();
sortColumnNames = viewDesc.getSortColNames();
distributeColumns = new ArrayList<>();
distributeColumnNames = viewDesc.getDistributeColNames();
fileSinkColInfos = new ArrayList<>();
sortColInfos = new ArrayList<>();
distributeColInfos = new ArrayList<>();
destTableIsTemporary = false;
destTableIsMaterialization = false;
tableName = HiveTableName.ofNullableWithNoDefault(viewDesc.getViewName());
tblProps = viewDesc.getTblProps();
}
destTableIsTransactional = tblProps != null && AcidUtils.isTablePropertyTransactional(tblProps);
if (destTableIsTransactional) {
try {
if (ctx.getExplainConfig() != null) {
// For explain plan, txn won't be opened and doesn't make sense to allocate write id
writeId = 0L;
} else {
writeId = txnMgr.getTableWriteId(tableName.getDb(), tableName.getTable());
}
} catch (LockException ex) {
throw new SemanticException("Failed to allocate write Id", ex);
}
if (AcidUtils.isInsertOnlyTable(tblProps, true)) {
isMmTable = isMmCreate = true;
if (tblDesc != null) {
tblDesc.setInitialMmWriteId(writeId);
} else {
viewDesc.setInitialMmWriteId(writeId);
}
}
}
if (isLocal) {
assert !isMmTable;
// for local directory - we always write to map-red intermediate
// store and then copy to local fs
queryTmpdir = ctx.getMRTmpPath();
} else {
// no copy is required. we may want to revisit this policy in future
try {
Path qPath = FileUtils.makeQualified(destinationPath, conf);
queryTmpdir = isMmTable ? qPath : ctx.getTempDirForFinalJobPath(qPath);
if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) {
Utilities.FILE_OP_LOGGER.trace("Setting query directory " + queryTmpdir + " from " + destinationPath + " (" + isMmTable + ")");
}
} catch (Exception e) {
throw new SemanticException("Error creating temporary folder on: " + destinationPath, e);
}
}
// Check for dynamic partitions.
final String cols, colTypes;
final boolean isPartitioned;
if (dpCtx != null) {
throw new SemanticException("Dynamic partition context has already been created, this should not happen");
}
if (!CollectionUtils.isEmpty(partitionColumnNames)) {
ColsAndTypes ct = deriveFileSinkColTypes(inputRR, partitionColumnNames, sortColumnNames, distributeColumnNames, fieldSchemas, partitionColumns, sortColumns, distributeColumns, fileSinkColInfos, sortColInfos, distributeColInfos);
cols = ct.cols;
colTypes = ct.colTypes;
dpCtx = new DynamicPartitionCtx(partitionColumnNames, conf.getVar(HiveConf.ConfVars.DEFAULTPARTITIONNAME), conf.getIntVar(HiveConf.ConfVars.DYNAMICPARTITIONMAXPARTSPERNODE));
qbm.setDPCtx(dest, dpCtx);
// set the root of the temporary path where dynamic partition columns will populate
dpCtx.setRootPath(queryTmpdir);
isPartitioned = true;
} else {
ColsAndTypes ct = deriveFileSinkColTypes(inputRR, sortColumnNames, distributeColumnNames, fieldSchemas, sortColumns, distributeColumns, sortColInfos, distributeColInfos);
cols = ct.cols;
colTypes = ct.colTypes;
isPartitioned = false;
}
// update the create table descriptor with the resulting schema.
if (tblDesc != null) {
tblDesc.setCols(new ArrayList<>(fieldSchemas));
tblDesc.setPartCols(new ArrayList<>(partitionColumns));
} else if (viewDesc != null) {
viewDesc.setSchema(new ArrayList<>(fieldSchemas));
viewDesc.setPartCols(new ArrayList<>(partitionColumns));
if (viewDesc.isOrganized()) {
viewDesc.setSortCols(new ArrayList<>(sortColumns));
viewDesc.setDistributeCols(new ArrayList<>(distributeColumns));
}
}
boolean isDestTempFile = true;
if (ctx.isMRTmpFileURI(destinationPath.toUri().toString()) == false && ctx.isResultCacheDir(destinationPath) == false) {
// not a temp dir and not a result cache dir
idToTableNameMap.put(String.valueOf(destTableId), destinationPath.toUri().toString());
currentTableId = destTableId;
destTableId++;
isDestTempFile = false;
}
if (tblDesc == null) {
if (viewDesc != null) {
tableDescriptor = PlanUtils.getTableDesc(viewDesc, cols, colTypes);
} else if (qb.getIsQuery()) {
Class<? extends Deserializer> serdeClass = LazySimpleSerDe.class;
String fileFormat = conf.getResultFileFormat().toString();
if (SessionState.get().getIsUsingThriftJDBCBinarySerDe()) {
serdeClass = ThriftJDBCBinarySerDe.class;
fileFormat = ResultFileFormat.SEQUENCEFILE.toString();
// Set the fetch formatter to be a no-op for the ListSinkOperator, since we'll
// write out formatted thrift objects to SequenceFile
conf.set(SerDeUtils.LIST_SINK_OUTPUT_FORMATTER, NoOpFetchFormatter.class.getName());
} else if (fileFormat.equals(PlanUtils.LLAP_OUTPUT_FORMAT_KEY)) {
// If this output format is Llap, check to see if Arrow is requested
boolean useArrow = HiveConf.getBoolVar(conf, HiveConf.ConfVars.LLAP_OUTPUT_FORMAT_ARROW);
serdeClass = useArrow ? ArrowColumnarBatchSerDe.class : LazyBinarySerDe2.class;
}
tableDescriptor = PlanUtils.getDefaultQueryOutputTableDesc(cols, colTypes, fileFormat, serdeClass);
} else {
tableDescriptor = PlanUtils.getDefaultTableDesc(qb.getDirectoryDesc(), cols, colTypes);
}
} else {
tableDescriptor = PlanUtils.getTableDesc(tblDesc, cols, colTypes);
}
// if available, set location in table desc properties
if (tblDesc != null && tblDesc.getLocation() != null && tableDescriptor != null && !tableDescriptor.getProperties().containsKey(hive_metastoreConstants.META_TABLE_LOCATION)) {
tableDescriptor.getProperties().setProperty(hive_metastoreConstants.META_TABLE_LOCATION, tblDesc.getLocation());
}
// We need a specific rowObjectInspector in this case
try {
specificRowObjectInspector = (StructObjectInspector) tableDescriptor.getDeserializer(conf).getObjectInspector();
} catch (Exception e) {
throw new SemanticException(e.getMessage(), e);
}
boolean isDfsDir = (destType == QBMetaData.DEST_DFS_FILE);
try {
destinationTable = tblDesc != null ? tblDesc.toTable(conf) : viewDesc != null ? viewDesc.toTable(conf) : null;
} catch (HiveException e) {
throw new SemanticException(e);
}
destTableIsFullAcid = AcidUtils.isFullAcidTable(destinationTable);
// Data organization (DISTRIBUTED, SORTED, CLUSTERED) for materialized view
if (viewDesc != null && viewDesc.isOrganized()) {
input = genMaterializedViewDataOrgPlan(sortColInfos, distributeColInfos, inputRR, input);
}
moveTaskId = getMoveTaskId();
if (isPartitioned) {
// Create a SELECT that may reorder the columns if needed
RowResolver rowResolver = new RowResolver();
List<ExprNodeDesc> columnExprs = new ArrayList<>();
List<String> colNames = new ArrayList<>();
Map<String, ExprNodeDesc> colExprMap = new HashMap<>();
for (int i = 0; i < fileSinkColInfos.size(); i++) {
ColumnInfo ci = fileSinkColInfos.get(i);
ExprNodeDesc columnExpr = new ExprNodeColumnDesc(ci);
String name = getColumnInternalName(i);
rowResolver.put("", name, new ColumnInfo(name, columnExpr.getTypeInfo(), "", false));
columnExprs.add(columnExpr);
colNames.add(name);
colExprMap.put(name, columnExpr);
}
input = putOpInsertMap(OperatorFactory.getAndMakeChild(new SelectDesc(columnExprs, colNames), new RowSchema(rowResolver.getColumnInfos()), input), rowResolver);
input.setColumnExprMap(colExprMap);
// If this is a partitioned CTAS or MV statement, we are going to create a LoadTableDesc
// object. Although the table does not exist in metastore, we will swap the CreateTableTask
// and MoveTask resulting from this LoadTable so in this specific case, first we create
// the metastore table, then we move and commit the partitions. At least for the time being,
// this order needs to be enforced because metastore expects a table to exist before we can
// add any partitions to it.
isNonNativeTable = tableDescriptor.isNonNative();
if (!isNonNativeTable || destinationTable.getStorageHandler().commitInMoveTask()) {
AcidUtils.Operation acidOp = AcidUtils.Operation.NOT_ACID;
if (destTableIsTransactional) {
acidOp = getAcidType(tableDescriptor.getOutputFileFormatClass(), dest, isMmTable);
checkAcidConstraints();
}
// isReplace = false in case concurrent operation is executed
ltd = new LoadTableDesc(queryTmpdir, tableDescriptor, dpCtx, acidOp, false, writeId);
if (writeId != null) {
ltd.setStmtId(txnMgr.getCurrentStmtId());
}
ltd.setLoadFileType(LoadFileType.KEEP_EXISTING);
ltd.setInsertOverwrite(false);
loadTableWork.add(ltd);
} else {
// This is a non-native table.
// We need to set stats as inaccurate.
setStatsForNonNativeTable(tableDescriptor.getDbName(), tableDescriptor.getTableName());
ltd = new LoadTableDesc(queryTmpdir, tableDescriptor, dpCtx.getPartSpec());
ltd.setInsertOverwrite(false);
ltd.setLoadFileType(LoadFileType.KEEP_EXISTING);
}
ltd.setMoveTaskId(moveTaskId);
ltd.setMdTable(destinationTable);
WriteEntity output = generateTableWriteEntity(dest, destinationTable, dpCtx.getPartSpec(), ltd, dpCtx);
ctx.getLoadTableOutputMap().put(ltd, output);
} else {
// Create LFD even for MM CTAS - it's a no-op move, but it still seems to be used for stats.
LoadFileDesc loadFileDesc = new LoadFileDesc(tblDesc, viewDesc, queryTmpdir, destinationPath, isDfsDir, cols, colTypes, // there is a change here - prev version had 'transactional', one before 'acid'
destTableIsFullAcid ? Operation.INSERT : Operation.NOT_ACID, isMmCreate);
loadFileDesc.setMoveTaskId(moveTaskId);
loadFileWork.add(loadFileDesc);
try {
Path qualifiedPath = destinationPath.getFileSystem(conf).makeQualified(destinationPath);
if (!outputs.add(new WriteEntity(qualifiedPath, !isDfsDir, isDestTempFile))) {
throw new SemanticException(ErrorMsg.OUTPUT_SPECIFIED_MULTIPLE_TIMES.getMsg(destinationPath.toUri().toString()));
}
} catch (IOException ex) {
throw new SemanticException("Error while getting the full qualified path for the given directory: " + ex.getMessage());
}
}
break;
}
default:
throw new SemanticException("Unknown destination type: " + destType);
}
inputRR = opParseCtx.get(input).getRowResolver();
List<ColumnInfo> vecCol = new ArrayList<ColumnInfo>();
if (updating(dest) || deleting(dest)) {
vecCol.add(new ColumnInfo(VirtualColumn.ROWID.getName(), VirtualColumn.ROWID.getTypeInfo(), "", true));
} else {
try {
// If we already have a specific inspector (view or directory as a target) use that
// Otherwise use the table deserializer to get the inspector
StructObjectInspector rowObjectInspector = specificRowObjectInspector != null ? specificRowObjectInspector : (StructObjectInspector) destinationTable.getDeserializer().getObjectInspector();
List<? extends StructField> fields = rowObjectInspector.getAllStructFieldRefs();
for (StructField field : fields) {
vecCol.add(new ColumnInfo(field.getFieldName(), TypeInfoUtils.getTypeInfoFromObjectInspector(field.getFieldObjectInspector()), "", false));
}
} catch (Exception e) {
throw new SemanticException(e.getMessage(), e);
}
}
RowSchema fsRS = new RowSchema(vecCol);
// The output files of a FileSink can be merged if they are either not being written to a table
// or are being written to a table which is not bucketed
// and table the table is not sorted
boolean canBeMerged = (destinationTable == null || !((destinationTable.getNumBuckets() > 0) || (destinationTable.getSortCols() != null && destinationTable.getSortCols().size() > 0)));
// If this table is working with ACID semantics, turn off merging
canBeMerged &= !destTableIsFullAcid;
// Generate the partition columns from the parent input
if (destType == QBMetaData.DEST_TABLE || destType == QBMetaData.DEST_PARTITION) {
genPartnCols(dest, input, qb, tableDescriptor, destinationTable, rsCtx);
}
FileSinkDesc fileSinkDesc = createFileSinkDesc(dest, tableDescriptor, destinationPartition, // this was 1/4 acid
destinationPath, // this was 1/4 acid
currentTableId, // this was 1/4 acid
destTableIsFullAcid, // this was 1/4 acid
destTableIsTemporary, destTableIsMaterialization, queryTmpdir, rsCtx, dpCtx, lbCtx, fsRS, canBeMerged, destinationTable, writeId, isMmCreate, destType, qb, isDirectInsert, acidOperation, moveTaskId);
if (isMmCreate) {
// Add FSD so that the LoadTask compilation could fix up its path to avoid the move.
if (tableDesc != null) {
tableDesc.setWriter(fileSinkDesc);
} else {
createVwDesc.setWriter(fileSinkDesc);
}
}
if (fileSinkDesc.getInsertOverwrite()) {
if (ltd != null) {
ltd.setInsertOverwrite(true);
}
}
if (null != tableDescriptor && useBatchingSerializer(tableDescriptor.getSerdeClassName())) {
fileSinkDesc.setIsUsingBatchingSerDe(true);
} else {
fileSinkDesc.setIsUsingBatchingSerDe(false);
}
Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild(fileSinkDesc, fsRS, input), inputRR);
// in case of a merge statement.
if (!isDirectInsert || acidOperation == AcidUtils.Operation.INSERT) {
handleLineage(ltd, output);
}
setWriteIdForSurrogateKeys(ltd, input);
LOG.debug("Created FileSink Plan for clause: {}dest_path: {} row schema: {}", dest, destinationPath, inputRR);
FileSinkOperator fso = (FileSinkOperator) output;
fso.getConf().setTable(destinationTable);
// and it is an insert overwrite or insert into table
if (conf.getBoolVar(ConfVars.HIVESTATSAUTOGATHER) && conf.getBoolVar(ConfVars.HIVESTATSCOLAUTOGATHER) && destinationTable != null && (!destinationTable.isNonNative() || destinationTable.getStorageHandler().commitInMoveTask()) && !destTableIsTemporary && !destTableIsMaterialization && ColumnStatsAutoGatherContext.canRunAutogatherStats(fso)) {
if (destType == QBMetaData.DEST_TABLE) {
genAutoColumnStatsGatheringPipeline(destinationTable, partSpec, input, qb.getParseInfo().isInsertIntoTable(destinationTable.getDbName(), destinationTable.getTableName()), false);
} else if (destType == QBMetaData.DEST_PARTITION) {
genAutoColumnStatsGatheringPipeline(destinationTable, destinationPartition.getSpec(), input, qb.getParseInfo().isInsertIntoTable(destinationTable.getDbName(), destinationTable.getTableName()), false);
} else if (destType == QBMetaData.DEST_LOCAL_FILE || destType == QBMetaData.DEST_DFS_FILE) {
// CTAS or CMV statement
genAutoColumnStatsGatheringPipeline(destinationTable, null, input, false, true);
}
}
return output;
}
use of org.apache.hadoop.hive.ql.ddl.view.materialized.update.MaterializedViewUpdateDesc in project hive by apache.
the class AlterMaterializedViewRewriteAnalyzer method analyzeInternal.
@Override
public void analyzeInternal(ASTNode root) throws SemanticException {
TableName tableName = getQualifiedTableName((ASTNode) root.getChild(0));
// Value for the flag
boolean rewriteEnable;
switch(root.getChild(1).getType()) {
case HiveParser.TOK_REWRITE_ENABLED:
rewriteEnable = true;
break;
case HiveParser.TOK_REWRITE_DISABLED:
rewriteEnable = false;
break;
default:
throw new SemanticException("Invalid alter materialized view expression");
}
// It can be fully qualified name or use default database
Table materializedViewTable = getTable(tableName, true);
// only uses transactional (MM and ACID) tables
if (rewriteEnable) {
for (SourceTable sourceTable : materializedViewTable.getMVMetadata().getSourceTables()) {
if (!AcidUtils.isTransactionalTable(sourceTable.getTable())) {
throw new SemanticException("Automatic rewriting for materialized view cannot be enabled if the " + "materialized view uses non-transactional tables");
}
}
}
AlterMaterializedViewRewriteDesc desc = new AlterMaterializedViewRewriteDesc(tableName.getNotEmptyDbTable(), rewriteEnable);
if (AcidUtils.isTransactionalTable(materializedViewTable)) {
ddlDescWithWriteId = desc;
}
inputs.add(new ReadEntity(materializedViewTable));
outputs.add(new WriteEntity(materializedViewTable, WriteEntity.WriteType.DDL_EXCLUSIVE));
// Create task for alterMVRewriteDesc
DDLWork work = new DDLWork(getInputs(), getOutputs(), desc);
Task<?> targetTask = TaskFactory.get(work);
// Create task to update rewrite flag as dependant of previous one
MaterializedViewUpdateDesc materializedViewUpdateDesc = new MaterializedViewUpdateDesc(tableName.getNotEmptyDbTable(), rewriteEnable, !rewriteEnable, false);
DDLWork updateDdlWork = new DDLWork(getInputs(), getOutputs(), materializedViewUpdateDesc);
targetTask.addDependentTask(TaskFactory.get(updateDdlWork, conf));
// Add root task
rootTasks.add(targetTask);
}
use of org.apache.hadoop.hive.ql.ddl.view.materialized.update.MaterializedViewUpdateDesc in project hive by apache.
the class TaskCompiler method patchUpAfterCTASorMaterializedView.
private void patchUpAfterCTASorMaterializedView(List<Task<?>> rootTasks, Set<ReadEntity> inputs, Set<WriteEntity> outputs, Task<?> createTask, boolean createTaskAfterMoveTask) {
// clear the mapredWork output file from outputs for CTAS
// DDLWork at the tail of the chain will have the output
Iterator<WriteEntity> outIter = outputs.iterator();
while (outIter.hasNext()) {
switch(outIter.next().getType()) {
case DFS_DIR:
case LOCAL_DIR:
outIter.remove();
break;
default:
break;
}
}
// find all leaf tasks and make the DDLTask as a dependent task on all of them
Set<Task<?>> leaves = new LinkedHashSet<>();
getLeafTasks(rootTasks, leaves);
assert (leaves.size() > 0);
// Target task is supposed to be the last task
Task<?> targetTask = createTask;
for (Task<?> task : leaves) {
if (task instanceof StatsTask) {
// StatsTask require table to already exist
for (Task<?> parentOfStatsTask : task.getParentTasks()) {
if (parentOfStatsTask instanceof MoveTask && !createTaskAfterMoveTask) {
// For partitioned CTAS, we need to create the table before the move task
// as we need to create the partitions in metastore and for that we should
// have already registered the table
interleaveTask(parentOfStatsTask, createTask);
} else {
parentOfStatsTask.addDependentTask(createTask);
}
}
for (Task<?> parentOfCrtTblTask : createTask.getParentTasks()) {
parentOfCrtTblTask.removeDependentTask(task);
}
createTask.addDependentTask(task);
targetTask = task;
} else if (task instanceof MoveTask && !createTaskAfterMoveTask) {
// For partitioned CTAS, we need to create the table before the move task
// as we need to create the partitions in metastore and for that we should
// have already registered the table
interleaveTask(task, createTask);
targetTask = task;
} else {
task.addDependentTask(createTask);
}
}
// Add task to insert / delete materialized view from registry if needed
if (createTask instanceof DDLTask) {
DDLTask ddlTask = (DDLTask) createTask;
DDLWork work = ddlTask.getWork();
DDLDesc desc = work.getDDLDesc();
if (desc instanceof CreateMaterializedViewDesc) {
CreateMaterializedViewDesc createViewDesc = (CreateMaterializedViewDesc) desc;
String tableName = createViewDesc.getViewName();
boolean retrieveAndInclude = createViewDesc.isRewriteEnabled();
MaterializedViewUpdateDesc materializedViewUpdateDesc = new MaterializedViewUpdateDesc(tableName, retrieveAndInclude, false, false);
DDLWork ddlWork = new DDLWork(inputs, outputs, materializedViewUpdateDesc);
targetTask.addDependentTask(TaskFactory.get(ddlWork, conf));
} else if (desc instanceof AlterMaterializedViewRewriteDesc) {
AlterMaterializedViewRewriteDesc alterMVRewriteDesc = (AlterMaterializedViewRewriteDesc) desc;
String tableName = alterMVRewriteDesc.getMaterializedViewName();
boolean retrieveAndInclude = alterMVRewriteDesc.isRewriteEnable();
boolean disableRewrite = !alterMVRewriteDesc.isRewriteEnable();
MaterializedViewUpdateDesc materializedViewUpdateDesc = new MaterializedViewUpdateDesc(tableName, retrieveAndInclude, disableRewrite, false);
DDLWork ddlWork = new DDLWork(inputs, outputs, materializedViewUpdateDesc);
targetTask.addDependentTask(TaskFactory.get(ddlWork, conf));
}
}
}
use of org.apache.hadoop.hive.ql.ddl.view.materialized.update.MaterializedViewUpdateDesc in project hive by apache.
the class SemanticAnalyzer method analyzeInternal.
@SuppressWarnings("checkstyle:methodlength")
void analyzeInternal(ASTNode ast, Supplier<PlannerContext> pcf) throws SemanticException {
LOG.info("Starting Semantic Analysis");
// 1. Generate Resolved Parse tree from syntax tree
boolean needsTransform = needsTransform();
// change the location of position alias process here
processPositionAlias(ast);
cacheTableHelper.populateCache(ctx.getParsedTables(), conf, getTxnMgr());
PlannerContext plannerCtx = pcf.get();
if (!genResolvedParseTree(ast, plannerCtx)) {
return;
}
if (HiveConf.getBoolVar(conf, ConfVars.HIVE_REMOVE_ORDERBY_IN_SUBQUERY)) {
for (String alias : qb.getSubqAliases()) {
removeOBInSubQuery(qb.getSubqForAlias(alias));
}
}
final String llapIOETLSkipFormat = HiveConf.getVar(conf, ConfVars.LLAP_IO_ETL_SKIP_FORMAT);
if (qb.getParseInfo().hasInsertTables() || qb.isCTAS()) {
if (llapIOETLSkipFormat.equalsIgnoreCase("encode")) {
conf.setBoolean(ConfVars.LLAP_IO_ENCODE_ENABLED.varname, false);
LOG.info("Disabling LLAP IO encode as ETL query is detected");
} else if (llapIOETLSkipFormat.equalsIgnoreCase("all")) {
conf.setBoolean(ConfVars.LLAP_IO_ENABLED.varname, false);
LOG.info("Disabling LLAP IO as ETL query is detected");
}
}
// Check query results cache.
// If no masking/filtering required, then we can check the cache now, before
// generating the operator tree and going through CBO.
// Otherwise we have to wait until after the masking/filtering step.
boolean isCacheEnabled = isResultsCacheEnabled();
QueryResultsCache.LookupInfo lookupInfo = null;
if (isCacheEnabled && !needsTransform && queryTypeCanUseCache()) {
lookupInfo = createLookupInfoForQuery(ast);
if (checkResultsCache(lookupInfo, false)) {
return;
}
}
ASTNode astForMasking;
if (isCBOExecuted() && needsTransform && (qb.isCTAS() || forViewCreation || qb.isMaterializedView() || qb.isMultiDestQuery())) {
// If we use CBO and we may apply masking/filtering policies, we create a copy of the ast.
// The reason is that the generation of the operator tree may modify the initial ast,
// but if we need to parse for a second time, we would like to parse the unmodified ast.
astForMasking = (ASTNode) ParseDriver.adaptor.dupTree(ast);
} else {
astForMasking = ast;
}
// 2. Gen OP Tree from resolved Parse Tree
sinkOp = genOPTree(ast, plannerCtx);
boolean usesMasking = false;
if (!forViewCreation && ast.getToken().getType() != HiveParser.TOK_CREATE_MATERIALIZED_VIEW && (tableMask.isEnabled() && analyzeRewrite == null)) {
// Here we rewrite the * and also the masking table
ParseResult rewrittenResult = rewriteASTWithMaskAndFilter(tableMask, astForMasking, ctx.getTokenRewriteStream(), ctx, db);
ASTNode rewrittenAST = rewrittenResult.getTree();
if (astForMasking != rewrittenAST) {
usesMasking = true;
plannerCtx = pcf.get();
ctx.setSkipTableMasking(true);
ctx.setTokenRewriteStream(rewrittenResult.getTokenRewriteStream());
init(true);
// change the location of position alias process here
processPositionAlias(rewrittenAST);
genResolvedParseTree(rewrittenAST, plannerCtx);
if (this instanceof CalcitePlanner) {
((CalcitePlanner) this).resetCalciteConfiguration();
}
sinkOp = genOPTree(rewrittenAST, plannerCtx);
}
}
// validate if this sink operation is allowed for non-native tables
if (sinkOp instanceof FileSinkOperator) {
FileSinkOperator fileSinkOperator = (FileSinkOperator) sinkOp;
Optional<HiveStorageHandler> handler = Optional.ofNullable(fileSinkOperator).map(FileSinkOperator::getConf).map(FileSinkDesc::getTable).map(Table::getStorageHandler);
if (handler.isPresent()) {
handler.get().validateSinkDesc(fileSinkOperator.getConf());
}
}
// TODO: Enable caching for queries with masking/filtering
if (isCacheEnabled && needsTransform && !usesMasking && queryTypeCanUseCache()) {
lookupInfo = createLookupInfoForQuery(ast);
if (checkResultsCache(lookupInfo, false)) {
return;
}
}
// 3. Deduce Resultset Schema
if ((forViewCreation || createVwDesc != null) && !this.ctx.isCboSucceeded()) {
resultSchema = convertRowSchemaToViewSchema(opParseCtx.get(sinkOp).getRowResolver());
} else {
// succeeds.
if (resultSchema == null) {
resultSchema = convertRowSchemaToResultSetSchema(opParseCtx.get(sinkOp).getRowResolver(), HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_RESULTSET_USE_UNIQUE_COLUMN_NAMES));
}
}
// 4. Generate Parse Context for Optimizer & Physical compiler
copyInfoToQueryProperties(queryProperties);
ParseContext pCtx = new ParseContext(queryState, opToPartPruner, opToPartList, topOps, new HashSet<JoinOperator>(joinContext.keySet()), new HashSet<SMBMapJoinOperator>(smbMapJoinContext.keySet()), loadTableWork, loadFileWork, columnStatsAutoGatherContexts, ctx, idToTableNameMap, destTableId, uCtx, listMapJoinOpsNoReducer, prunedPartitions, tabNameToTabObject, opToSamplePruner, globalLimitCtx, nameToSplitSample, inputs, rootTasks, opToPartToSkewedPruner, viewAliasToInput, reduceSinkOperatorsAddedByEnforceBucketingSorting, analyzeRewrite, tableDesc, createVwDesc, materializedViewUpdateDesc, queryProperties, viewProjectToTableSchema);
// Set the semijoin hints in parse context
pCtx.setSemiJoinHints(parseSemiJoinHint(getQB().getParseInfo().getHintList()));
// Set the mapjoin hint if it needs to be disabled.
pCtx.setDisableMapJoin(disableMapJoinWithHint(getQB().getParseInfo().getHintList()));
if (forViewCreation) {
// Generate lineage info if LineageLogger hook is configured.
// Add the transformation that computes the lineage information.
Set<String> postExecHooks = Sets.newHashSet(Splitter.on(",").trimResults().omitEmptyStrings().split(Strings.nullToEmpty(HiveConf.getVar(conf, HiveConf.ConfVars.POSTEXECHOOKS))));
if (postExecHooks.contains("org.apache.hadoop.hive.ql.hooks.PostExecutePrinter") || postExecHooks.contains("org.apache.hadoop.hive.ql.hooks.LineageLogger") || postExecHooks.contains("org.apache.atlas.hive.hook.HiveHook")) {
List<Transform> transformations = new ArrayList<Transform>();
transformations.add(new HiveOpConverterPostProc());
transformations.add(new Generator(postExecHooks));
for (Transform t : transformations) {
pCtx = t.transform(pCtx);
}
}
}
// 5. Take care of view creation
if (createVwDesc != null) {
if (ctx.getExplainAnalyze() == AnalyzeState.RUNNING) {
return;
}
if (!ctx.isCboSucceeded()) {
saveViewDefinition();
}
// validate the create view statement at this point, the createVwDesc gets
// all the information for semanticcheck
validateCreateView();
createVwDesc.setTablesUsed(pCtx.getTablesUsed());
}
// it means that in step 2, the ColumnAccessInfo was already created
if (!forViewCreation || getColumnAccessInfo() == null) {
// 6. Generate table access stats if required
if (HiveConf.getBoolVar(this.conf, HiveConf.ConfVars.HIVE_STATS_COLLECT_TABLEKEYS)) {
TableAccessAnalyzer tableAccessAnalyzer = new TableAccessAnalyzer(pCtx);
setTableAccessInfo(tableAccessAnalyzer.analyzeTableAccess());
}
AuxOpTreeSignature.linkAuxSignatures(pCtx);
// 7. Perform Logical optimization
if (LOG.isDebugEnabled()) {
LOG.debug("Before logical optimization\n" + Operator.toString(pCtx.getTopOps().values()));
}
Optimizer optm = new Optimizer();
optm.setPctx(pCtx);
optm.initialize(conf);
pCtx = optm.optimize();
if (pCtx.getColumnAccessInfo() != null) {
// set ColumnAccessInfo for view column authorization
setColumnAccessInfo(pCtx.getColumnAccessInfo());
}
if (LOG.isDebugEnabled()) {
LOG.debug("After logical optimization\n" + Operator.toString(pCtx.getTopOps().values()));
}
// 8. Generate column access stats if required - wait until column pruning
// takes place during optimization
boolean isColumnInfoNeedForAuth = SessionState.get().isAuthorizationModeV2() && HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_AUTHORIZATION_ENABLED);
if (isColumnInfoNeedForAuth || HiveConf.getBoolVar(this.conf, HiveConf.ConfVars.HIVE_STATS_COLLECT_SCANCOLS)) {
ColumnAccessAnalyzer columnAccessAnalyzer = new ColumnAccessAnalyzer(pCtx);
// view column access info is carried by this.getColumnAccessInfo().
setColumnAccessInfo(columnAccessAnalyzer.analyzeColumnAccess(this.getColumnAccessInfo()));
}
}
if (forViewCreation) {
return;
}
// 9. Optimize Physical op tree & Translate to target execution engine (MR,
// TEZ..)
compilePlan(pCtx);
// find all Acid FileSinkOperatorS
new QueryPlanPostProcessor(rootTasks, acidFileSinks, ctx.getExecutionId());
// 10. Attach CTAS/Insert-Commit-hooks for Storage Handlers
final Optional<TezTask> optionalTezTask = rootTasks.stream().filter(task -> task instanceof TezTask).map(task -> (TezTask) task).findFirst();
if (optionalTezTask.isPresent()) {
final TezTask tezTask = optionalTezTask.get();
rootTasks.stream().filter(task -> task.getWork() instanceof DDLWork).map(task -> (DDLWork) task.getWork()).filter(ddlWork -> ddlWork.getDDLDesc() instanceof PreInsertTableDesc).map(ddlWork -> (PreInsertTableDesc) ddlWork.getDDLDesc()).map(desc -> new InsertCommitHookDesc(desc.getTable(), desc.isOverwrite())).forEach(insertCommitHookDesc -> tezTask.addDependentTask(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), insertCommitHookDesc), conf)));
}
LOG.info("Completed plan generation");
// 11. put accessed columns to readEntity
if (HiveConf.getBoolVar(this.conf, HiveConf.ConfVars.HIVE_STATS_COLLECT_SCANCOLS)) {
putAccessedColumnsToReadEntity(inputs, columnAccessInfo);
}
if (isCacheEnabled && lookupInfo != null) {
if (queryCanBeCached()) {
// requires SemanticAnalyzer state to be reset.
if (checkResultsCache(lookupInfo, true)) {
LOG.info("Cached result found on second lookup");
} else {
QueryResultsCache.QueryInfo queryInfo = createCacheQueryInfoForQuery(lookupInfo);
// Specify that the results of this query can be cached.
setCacheUsage(new CacheUsage(CacheUsage.CacheStatus.CAN_CACHE_QUERY_RESULTS, queryInfo));
}
}
}
}
Aggregations