use of org.apache.hadoop.hive.common.TableName in project hive by apache.
the class RenamePartitionHandler method handle.
@Override
public List<Task<?>> handle(Context context) throws SemanticException {
AlterPartitionMessage msg = deserializer.getAlterPartitionMessage(context.dmd.getPayload());
String actualDbName = context.isDbNameEmpty() ? msg.getDB() : context.dbName;
String actualTblName = msg.getTable();
Map<String, String> newPartSpec = new LinkedHashMap<>();
Map<String, String> oldPartSpec = new LinkedHashMap<>();
TableName tableName = TableName.fromString(actualTblName, null, actualDbName);
Table tableObj;
ReplicationSpec replicationSpec = context.eventOnlyReplicationSpec();
try {
Iterator<String> beforeIterator = msg.getPtnObjBefore().getValuesIterator();
Iterator<String> afterIterator = msg.getPtnObjAfter().getValuesIterator();
tableObj = msg.getTableObj();
for (FieldSchema fs : tableObj.getPartitionKeys()) {
oldPartSpec.put(fs.getName(), beforeIterator.next());
newPartSpec.put(fs.getName(), afterIterator.next());
}
AlterTableRenamePartitionDesc renamePtnDesc = new AlterTableRenamePartitionDesc(tableName, oldPartSpec, newPartSpec, replicationSpec, null);
renamePtnDesc.setWriteId(msg.getWriteId());
Task<DDLWork> renamePtnTask = TaskFactory.get(new DDLWork(readEntitySet, writeEntitySet, renamePtnDesc, true, context.getDumpDirectory(), context.getMetricCollector()), context.hiveConf);
context.log.debug("Added rename ptn task : {}:{}->{}", renamePtnTask.getId(), oldPartSpec, newPartSpec);
updatedMetadata.set(context.dmd.getEventTo().toString(), actualDbName, actualTblName, newPartSpec);
return ReplUtils.addChildTask(renamePtnTask);
} catch (Exception e) {
throw (e instanceof SemanticException) ? (SemanticException) e : new SemanticException("Error reading message members", e);
}
}
use of org.apache.hadoop.hive.common.TableName in project hive by apache.
the class AddDefaultConstraintHandler method handle.
@Override
public List<Task<?>> handle(Context context) throws SemanticException {
AddDefaultConstraintMessage msg = deserializer.getAddDefaultConstraintMessage(context.dmd.getPayload());
List<SQLDefaultConstraint> dcs;
try {
dcs = msg.getDefaultConstraints();
} catch (Exception e) {
if (!(e instanceof SemanticException)) {
throw new SemanticException("Error reading message members", e);
} else {
throw (SemanticException) e;
}
}
List<Task<?>> tasks = new ArrayList<Task<?>>();
if (dcs.isEmpty()) {
return tasks;
}
final String actualDbName = context.isDbNameEmpty() ? dcs.get(0).getTable_db() : context.dbName;
final String actualTblName = dcs.get(0).getTable_name();
final TableName tName = TableName.fromString(actualTblName, null, actualDbName);
for (SQLDefaultConstraint dc : dcs) {
dc.setTable_db(actualDbName);
dc.setTable_name(actualTblName);
}
Constraints constraints = new Constraints(null, null, null, null, dcs, null);
AlterTableAddConstraintDesc addConstraintsDesc = new AlterTableAddConstraintDesc(tName, context.eventOnlyReplicationSpec(), constraints);
Task<DDLWork> addConstraintsTask = TaskFactory.get(new DDLWork(readEntitySet, writeEntitySet, addConstraintsDesc, true, context.getDumpDirectory(), context.getMetricCollector()), context.hiveConf);
tasks.add(addConstraintsTask);
context.log.debug("Added add constrains task : {}:{}", addConstraintsTask.getId(), actualTblName);
updatedMetadata.set(context.dmd.getEventTo().toString(), actualDbName, actualTblName, null);
return Collections.singletonList(addConstraintsTask);
}
use of org.apache.hadoop.hive.common.TableName in project hive by apache.
the class StatsUpdaterThread method runOneWorkerIteration.
@VisibleForTesting
public boolean runOneWorkerIteration(SessionState ss, String user, HiveConf conf, boolean doWait) throws InterruptedException {
AnalyzeWork req;
if (doWait) {
req = workQueue.take();
} else {
req = workQueue.poll();
if (req == null) {
return false;
}
}
String cmd = null;
try {
if (doWait) {
// This is the first call, open the session
SessionState.start(ss);
}
TableName tb = req.tableName;
if (MetaStoreUtils.isDbBeingFailedOver(rs.getDatabase(tb.getCat(), tb.getDb()))) {
LOG.info("Skipping table: {} as it belongs to database which is being failed over." + tb.getTable());
return true;
}
cmd = req.buildCommand();
LOG.debug("Running {} based on {}", cmd, req);
DriverUtils.runOnDriver(conf, user, ss, cmd);
} catch (Exception e) {
LOG.error("Analyze command failed: " + cmd, e);
try {
ss.close();
} catch (IOException e1) {
LOG.warn("Failed to close a bad session", e1);
} finally {
SessionState.detachSession();
}
} finally {
markAnalyzeDone(req);
}
return true;
}
use of org.apache.hadoop.hive.common.TableName in project hive by apache.
the class StatsUpdaterThread method runOneIteration.
@VisibleForTesting
public boolean runOneIteration() {
List<TableName> fullTableNames;
try {
fullTableNames = getTablesToCheck();
} catch (Throwable t) {
LOG.error("Stats updater thread cannot retrieve tables and will now exit", t);
stopWorkers();
throw new RuntimeException(t);
}
LOG.debug("Processing {}", fullTableNames);
boolean hadUpdates = false;
Map<String, Boolean> dbsToSkip = new HashMap<>();
for (TableName fullTableName : fullTableNames) {
try {
List<AnalyzeWork> commands = processOneTable(fullTableName, dbsToSkip);
hadUpdates = hadUpdates || commands != null;
if (commands != null) {
for (AnalyzeWork req : commands) {
markAnalyzeInProgress(req);
workQueue.put(req);
}
}
} catch (Exception e) {
LOG.error("Failed to process " + fullTableName + "; skipping for now", e);
}
}
return hadUpdates;
}
use of org.apache.hadoop.hive.common.TableName in project hive by apache.
the class SharedWorkOptimizer method sharedWorkOptimization.
/**
* Class wrapping shared work optimizer.
* This implementation enables merging of TS with different schemas by taking the union of the
* {@link TableScanDesc#getNeededColumns()} and {@link TableScanDesc#getNeededColumnIDs()}
* from both {@link TableScanOperator}s.
*/
public boolean sharedWorkOptimization(ParseContext pctx, SharedWorkOptimizerCache optimizerCache, List<TableScanOperator> tableScans, Mode mode, boolean schemaMerge) throws SemanticException {
// Boolean to keep track of whether this method actually merged any TS operators
boolean mergedExecuted = false;
Set<TableScanOperator> retainedScans = new LinkedHashSet<>();
Set<Operator<?>> removedOps = new HashSet<>();
for (TableScanOperator discardableTsOp : tableScans) {
TableName tableName1 = discardableTsOp.getTableName();
if (discardableTsOp.getNumChild() == 0) {
removedOps.add(discardableTsOp);
}
if (removedOps.contains(discardableTsOp)) {
LOG.debug("Skip {} as it has already been removed", discardableTsOp);
continue;
}
for (TableScanOperator retainableTsOp : retainedScans) {
if (optimizerCache.getWorkGroup(discardableTsOp).contains(retainableTsOp)) {
LOG.trace("No need check further {} and {} are in the same group", discardableTsOp, retainableTsOp);
continue;
}
if (removedOps.contains(retainableTsOp)) {
LOG.debug("Skip {} as it has already been removed", retainableTsOp);
continue;
}
LOG.debug("Can we merge {} into {} to remove a scan on {}?", discardableTsOp, retainableTsOp, tableName1);
SharedResult sr;
// If Iceberg metadata tables are in the query, disable this optimisation.
String metaTable1 = retainableTsOp.getConf().getTableMetadata().getMetaTable();
String metaTable2 = discardableTsOp.getConf().getTableMetadata().getMetaTable();
if (metaTable1 != null || metaTable2 != null) {
LOG.info("Skip the schema merging as the query contains Iceberg metadata table.");
continue;
}
if (!schemaMerge && !compatibleSchema(retainableTsOp, discardableTsOp)) {
LOG.debug("incompatible schemas: {} {} for {} (and merge disabled)", discardableTsOp, retainableTsOp, tableName1);
continue;
}
if (mode == Mode.RemoveSemijoin) {
// We check if the two table scan operators can actually be merged modulo SJs.
// Hence, two conditions should be met:
// (i) the TS ops should be mergeable excluding any kind of DPP, and
// (ii) the DPP branches (excluding SJs) should be the same
boolean mergeable = areMergeable(pctx, retainableTsOp, discardableTsOp);
if (!mergeable) {
// Skip
LOG.debug("{} and {} cannot be merged", retainableTsOp, discardableTsOp);
continue;
}
boolean validMerge = areMergeableExcludeSemijoinsExtendedCheck(pctx, optimizerCache, retainableTsOp, discardableTsOp);
if (!validMerge) {
// Skip
LOG.debug("{} and {} do not meet preconditions", retainableTsOp, discardableTsOp);
continue;
}
// If tests pass, we create the shared work optimizer additional information
// about the part of the tree that can be merged. We need to regenerate the
// cache because semijoin operators have been removed
sr = extractSharedOptimizationInfoForRoot(pctx, optimizerCache, retainableTsOp, discardableTsOp, true, true);
} else if (mode == Mode.DPPUnion) {
boolean mergeable = areMergeable(pctx, retainableTsOp, discardableTsOp);
if (!mergeable) {
LOG.debug("{} and {} cannot be merged", retainableTsOp, discardableTsOp);
continue;
}
boolean validMerge = areMergeableDppUnion(pctx, optimizerCache, retainableTsOp, discardableTsOp);
if (!validMerge) {
// Skip
LOG.debug("{} and {} do not meet preconditions", retainableTsOp, discardableTsOp);
continue;
}
// If tests pass, we create the shared work optimizer additional information
// about the part of the tree that can be merged. We need to regenerate the
// cache because semijoin operators have been removed
sr = extractSharedOptimizationInfoForRoot(pctx, optimizerCache, retainableTsOp, discardableTsOp, false, false);
if (!validPreConditions(pctx, optimizerCache, sr)) {
continue;
}
} else if (mode == Mode.SubtreeMerge) {
// First we quickly check if the two table scan operators can actually be merged
if (!areMergeable(pctx, retainableTsOp, discardableTsOp) || !areMergeableExtendedCheck(pctx, optimizerCache, retainableTsOp, discardableTsOp)) {
// Skip
LOG.debug("{} and {} cannot be merged", retainableTsOp, discardableTsOp);
continue;
}
// Secondly, we extract information about the part of the tree that can be merged
// as well as some structural information (memory consumption) that needs to be
// used to determined whether the merge can happen
sr = extractSharedOptimizationInfoForRoot(pctx, optimizerCache, retainableTsOp, discardableTsOp, true, true);
// tables.
if (!validPreConditions(pctx, optimizerCache, sr)) {
// Skip
LOG.debug("{} and {} do not meet preconditions", retainableTsOp, discardableTsOp);
continue;
}
} else {
throw new RuntimeException("unhandled mode: " + mode);
}
// We can merge
mergedExecuted = true;
if (mode != Mode.DPPUnion && sr.retainableOps.size() > 1) {
// More than TS operator
Operator<?> lastRetainableOp = sr.retainableOps.get(sr.retainableOps.size() - 1);
Operator<?> lastDiscardableOp = sr.discardableOps.get(sr.discardableOps.size() - 1);
if (lastDiscardableOp.getNumChild() != 0) {
List<Operator<? extends OperatorDesc>> allChildren = Lists.newArrayList(lastDiscardableOp.getChildOperators());
for (Operator<? extends OperatorDesc> op : allChildren) {
lastDiscardableOp.getChildOperators().remove(op);
op.replaceParent(lastDiscardableOp, lastRetainableOp);
lastRetainableOp.getChildOperators().add(op);
}
}
LOG.debug("Merging subtree starting at {} into subtree starting at {}", discardableTsOp, retainableTsOp);
} else {
if (sr.discardableOps.size() > 1) {
throw new RuntimeException("we can't discard more in this path");
}
DecomposedTs modelR = new DecomposedTs(retainableTsOp);
DecomposedTs modelD = new DecomposedTs(discardableTsOp);
// Push filter on top of children for retainable
pushFilterToTopOfTableScan(optimizerCache, modelR);
if (mode == Mode.RemoveSemijoin || mode == Mode.SubtreeMerge) {
// For RemoveSemiJoin; this will clear the discardable's semijoin filters
replaceSemijoinExpressions(discardableTsOp, modelR.getSemiJoinFilter());
}
modelD.replaceTabAlias(discardableTsOp.getConf().getAlias(), retainableTsOp.getConf().getAlias());
// Push filter on top of children for discardable
pushFilterToTopOfTableScan(optimizerCache, modelD);
// Obtain filter for shared TS operator
ExprNodeDesc exprNode = null;
if (modelR.normalFilterExpr != null && modelD.normalFilterExpr != null) {
exprNode = disjunction(modelR.normalFilterExpr, modelD.normalFilterExpr);
}
List<ExprNodeDesc> semiJoinExpr = null;
if (mode == Mode.DPPUnion) {
assert modelR.semijoinExprNodes != null;
assert modelD.semijoinExprNodes != null;
ExprNodeDesc disjunction = disjunction(conjunction(modelR.semijoinExprNodes), conjunction(modelD.semijoinExprNodes));
semiJoinExpr = disjunction == null ? null : Lists.newArrayList(disjunction);
} else {
semiJoinExpr = modelR.semijoinExprNodes;
}
// Create expression node that will be used for the retainable table scan
exprNode = conjunction(semiJoinExpr, exprNode);
// Replace filter
retainableTsOp.getConf().setFilterExpr((ExprNodeGenericFuncDesc) exprNode);
// Replace table scan operator
adoptChildren(retainableTsOp, discardableTsOp);
LOG.debug("Merging {} into {}", discardableTsOp, retainableTsOp);
}
// we are going to eliminate
if (mode != Mode.DPPUnion) {
for (Operator<?> op : sr.discardableInputOps) {
OperatorUtils.removeOperator(op);
optimizerCache.removeOp(op);
removedOps.add(op);
// Remove DPP predicates
if (op instanceof ReduceSinkOperator) {
SemiJoinBranchInfo sjbi = pctx.getRsToSemiJoinBranchInfo().get(op);
if (sjbi != null && !sr.discardableOps.contains(sjbi.getTsOp()) && !sr.discardableInputOps.contains(sjbi.getTsOp())) {
GenTezUtils.removeSemiJoinOperator(pctx, (ReduceSinkOperator) op, sjbi.getTsOp());
optimizerCache.tableScanToDPPSource.remove(sjbi.getTsOp(), op);
}
} else if (op instanceof AppMasterEventOperator) {
DynamicPruningEventDesc dped = (DynamicPruningEventDesc) op.getConf();
if (!sr.discardableOps.contains(dped.getTableScan()) && !sr.discardableInputOps.contains(dped.getTableScan())) {
GenTezUtils.removeSemiJoinOperator(pctx, (AppMasterEventOperator) op, dped.getTableScan());
optimizerCache.tableScanToDPPSource.remove(dped.getTableScan(), op);
}
}
LOG.debug("Input operator removed: {}", op);
}
}
// Filtered-out rows from one branch might be needed by another branch sharing a TSop
if (retainableTsOp.getProbeDecodeContext() != null) {
LOG.debug("Removing probeDecodeCntx for merged TS op {}", retainableTsOp);
retainableTsOp.setProbeDecodeContext(null);
retainableTsOp.getConf().setProbeDecodeContext(null);
}
// Then we merge the operators of the works we are going to merge
mergeSchema(discardableTsOp, retainableTsOp);
if (mode == Mode.DPPUnion) {
// reparent all
Collection<Operator<?>> discardableDPP = optimizerCache.tableScanToDPPSource.get(discardableTsOp);
for (Operator<?> op : discardableDPP) {
if (op instanceof ReduceSinkOperator) {
SemiJoinBranchInfo sjInfo = pctx.getRsToSemiJoinBranchInfo().get(op);
sjInfo.setTableScan(retainableTsOp);
} else if (op.getConf() instanceof DynamicPruningEventDesc) {
DynamicPruningEventDesc dynamicPruningEventDesc = (DynamicPruningEventDesc) op.getConf();
dynamicPruningEventDesc.setTableScan(retainableTsOp);
}
}
optimizerCache.tableScanToDPPSource.get(retainableTsOp).addAll(discardableDPP);
discardableDPP.clear();
}
optimizerCache.removeOpAndCombineWork(discardableTsOp, retainableTsOp);
removedOps.add(discardableTsOp);
// Finally we remove the expression from the tree
for (Operator<?> op : sr.discardableOps) {
OperatorUtils.removeOperator(op);
optimizerCache.removeOp(op);
removedOps.add(op);
LOG.debug("Operator removed: {}", op);
}
if (pctx.getConf().getBoolVar(ConfVars.HIVE_SHARED_WORK_DOWNSTREAM_MERGE)) {
if (sr.discardableOps.size() == 1) {
downStreamMerge(retainableTsOp, optimizerCache, pctx);
}
}
break;
}
if (removedOps.contains(discardableTsOp)) {
// This operator has been removed, remove it from the list of existing operators
// FIXME: there is no point of this
retainedScans.remove(discardableTsOp);
} else {
// This operator has not been removed, include it in the list of existing operators
retainedScans.add(discardableTsOp);
}
}
// Remove unused table scan operators
pctx.getTopOps().entrySet().removeIf((Entry<String, TableScanOperator> e) -> e.getValue().getNumChild() == 0);
tableScans.removeAll(removedOps);
return mergedExecuted;
}
Aggregations