use of org.apache.hadoop.hive.ql.hooks.ReadEntity in project hive by apache.
the class TestDbTxnManager method addTableInput.
private void addTableInput() {
ReadEntity re = new ReadEntity(newTable(false));
readEntities.add(re);
}
use of org.apache.hadoop.hive.ql.hooks.ReadEntity in project hive by apache.
the class IndexWhereProcessor method process.
@Override
public /**
* Process a node of the operator tree. This matches on the rule in IndexWhereTaskDispatcher
*/
Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException {
TableScanOperator operator = (TableScanOperator) nd;
List<Node> opChildren = operator.getChildren();
TableScanDesc operatorDesc = operator.getConf();
if (operatorDesc == null || !tsToIndices.containsKey(operator)) {
return null;
}
List<Index> indexes = tsToIndices.get(operator);
ExprNodeDesc predicate = operatorDesc.getFilterExpr();
IndexWhereProcCtx context = (IndexWhereProcCtx) procCtx;
ParseContext pctx = context.getParseContext();
LOG.info("Processing predicate for index optimization");
if (predicate == null) {
LOG.info("null predicate pushed down");
return null;
}
LOG.info(predicate.getExprString());
// check if we have tsToIndices on all partitions in this table scan
Set<Partition> queryPartitions;
try {
queryPartitions = IndexUtils.checkPartitionsCoveredByIndex(operator, pctx, indexes);
if (queryPartitions == null) {
// partitions not covered
return null;
}
} catch (HiveException e) {
LOG.error("Fatal Error: problem accessing metastore", e);
throw new SemanticException(e);
}
// we can only process MapReduce tasks to check input size
if (!context.getCurrentTask().isMapRedTask()) {
return null;
}
MapRedTask currentTask = (MapRedTask) context.getCurrentTask();
// get potential reentrant index queries from each index
Map<Index, HiveIndexQueryContext> queryContexts = new HashMap<Index, HiveIndexQueryContext>();
// make sure we have an index on the table being scanned
TableDesc tblDesc = operator.getTableDesc();
Map<String, List<Index>> indexesByType = new HashMap<String, List<Index>>();
for (Index indexOnTable : indexes) {
if (indexesByType.get(indexOnTable.getIndexHandlerClass()) == null) {
List<Index> newType = new ArrayList<Index>();
newType.add(indexOnTable);
indexesByType.put(indexOnTable.getIndexHandlerClass(), newType);
} else {
indexesByType.get(indexOnTable.getIndexHandlerClass()).add(indexOnTable);
}
}
// choose index type with most tsToIndices of the same type on the table
// TODO HIVE-2130 This would be a good place for some sort of cost based choice?
List<Index> bestIndexes = indexesByType.values().iterator().next();
for (List<Index> indexTypes : indexesByType.values()) {
if (bestIndexes.size() < indexTypes.size()) {
bestIndexes = indexTypes;
}
}
// rewrite index queries for the chosen index type
HiveIndexQueryContext tmpQueryContext = new HiveIndexQueryContext();
tmpQueryContext.setQueryPartitions(queryPartitions);
rewriteForIndexes(predicate, bestIndexes, pctx, currentTask, tmpQueryContext);
List<Task<?>> indexTasks = tmpQueryContext.getQueryTasks();
if (indexTasks != null && indexTasks.size() > 0) {
queryContexts.put(bestIndexes.get(0), tmpQueryContext);
}
// choose an index rewrite to use
if (queryContexts.size() > 0) {
// TODO HIVE-2130 This would be a good place for some sort of cost based choice?
Index chosenIndex = queryContexts.keySet().iterator().next();
// modify the parse context to use indexing
// we need to delay this until we choose one index so that we don't attempt to modify pctx multiple times
HiveIndexQueryContext queryContext = queryContexts.get(chosenIndex);
// prepare the map reduce job to use indexing
MapWork work = currentTask.getWork().getMapWork();
work.setInputformat(queryContext.getIndexInputFormat());
work.addIndexIntermediateFile(queryContext.getIndexIntermediateFile());
// modify inputs based on index query
Set<ReadEntity> inputs = pctx.getSemanticInputs();
inputs.addAll(queryContext.getAdditionalSemanticInputs());
List<Task<?>> chosenRewrite = queryContext.getQueryTasks();
// add dependencies so index query runs first
insertIndexQuery(pctx, context, chosenRewrite);
}
return null;
}
use of org.apache.hadoop.hive.ql.hooks.ReadEntity in project hive by apache.
the class DDLSemanticAnalyzer method analyzeDropIndex.
private void analyzeDropIndex(ASTNode ast) throws SemanticException {
String indexName = unescapeIdentifier(ast.getChild(0).getText());
String tableName = getUnescapedName((ASTNode) ast.getChild(1));
boolean ifExists = (ast.getFirstChildWithType(HiveParser.TOK_IFEXISTS) != null);
// we want to signal an error if the index doesn't exist and we're
// configured not to ignore this
boolean throwException = !ifExists && !HiveConf.getBoolVar(conf, ConfVars.DROPIGNORESNONEXISTENT);
Table tbl = getTable(tableName, false);
if (throwException && tbl == null) {
throw new SemanticException(ErrorMsg.INVALID_TABLE.getMsg(tableName));
}
try {
Index idx = db.getIndex(tableName, indexName);
} catch (HiveException e) {
if (!(e.getCause() instanceof NoSuchObjectException)) {
throw new SemanticException(ErrorMsg.CANNOT_DROP_INDEX.getMsg("dropping index"), e);
}
if (throwException) {
throw new SemanticException(ErrorMsg.INVALID_INDEX.getMsg(indexName));
}
}
if (tbl != null) {
inputs.add(new ReadEntity(tbl));
}
DropIndexDesc dropIdxDesc = new DropIndexDesc(indexName, tableName, throwException);
rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), dropIdxDesc), conf));
}
use of org.apache.hadoop.hive.ql.hooks.ReadEntity in project hive by apache.
the class ExportSemanticAnalyzer method prepareExport.
// FIXME : Move to EximUtil - it's okay for this to stay here for a little while more till we finalize the statics
public static void prepareExport(ASTNode ast, URI toURI, TableSpec ts, ReplicationSpec replicationSpec, Hive db, HiveConf conf, Context ctx, List<Task<? extends Serializable>> rootTasks, HashSet<ReadEntity> inputs, HashSet<WriteEntity> outputs, Logger LOG) throws SemanticException {
if (ts != null) {
try {
EximUtil.validateTable(ts.tableHandle);
if (replicationSpec.isInReplicationScope() && ts.tableHandle.isTemporary()) {
// No replication for temporary tables either
ts = null;
} else if (ts.tableHandle.isView()) {
replicationSpec.setIsMetadataOnly(true);
}
} catch (SemanticException e) {
// ignore for replication, error if not.
if (replicationSpec.isInReplicationScope()) {
// null out ts so we can't use it.
ts = null;
} else {
throw e;
}
}
}
try {
FileSystem fs = FileSystem.get(toURI, conf);
Path toPath = new Path(toURI.getScheme(), toURI.getAuthority(), toURI.getPath());
try {
FileStatus tgt = fs.getFileStatus(toPath);
// target exists
if (!tgt.isDir()) {
throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg(ast, "Target is not a directory : " + toURI));
} else {
FileStatus[] files = fs.listStatus(toPath, FileUtils.HIDDEN_FILES_PATH_FILTER);
if (files != null && files.length != 0) {
throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg(ast, "Target is not an empty directory : " + toURI));
}
}
} catch (FileNotFoundException e) {
}
} catch (IOException e) {
throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg(ast), e);
}
PartitionIterable partitions = null;
try {
replicationSpec.setCurrentReplicationState(String.valueOf(db.getMSC().getCurrentNotificationEventId().getEventId()));
if ((ts != null) && (ts.tableHandle.isPartitioned())) {
if (ts.specType == TableSpec.SpecType.TABLE_ONLY) {
// TABLE-ONLY, fetch partitions if regular export, don't if metadata-only
if (replicationSpec.isMetadataOnly()) {
partitions = null;
} else {
partitions = new PartitionIterable(db, ts.tableHandle, null, conf.getIntVar(HiveConf.ConfVars.METASTORE_BATCH_RETRIEVE_MAX));
}
} else {
// PARTITIONS specified - partitions inside tableSpec
partitions = new PartitionIterable(ts.partitions);
}
} else {
// Either tableHandle isn't partitioned => null, or repl-export after ts becomes null => null.
// or this is a noop-replication export, so we can skip looking at ptns.
partitions = null;
}
Path path = new Path(ctx.getLocalTmpPath(), EximUtil.METADATA_NAME);
EximUtil.createExportDump(FileSystem.getLocal(conf), path, (ts != null ? ts.tableHandle : null), partitions, replicationSpec);
Task<? extends Serializable> rTask = ReplCopyTask.getDumpCopyTask(replicationSpec, path, new Path(toURI), conf);
rootTasks.add(rTask);
LOG.debug("_metadata file written into " + path.toString() + " and then copied to " + toURI.toString());
} catch (Exception e) {
throw new SemanticException(ErrorMsg.IO_ERROR.getMsg("Exception while writing out the local file"), e);
}
if (!(replicationSpec.isMetadataOnly() || (ts == null))) {
Path parentPath = new Path(toURI);
if (ts.tableHandle.isPartitioned()) {
for (Partition partition : partitions) {
Path fromPath = partition.getDataLocation();
Path toPartPath = new Path(parentPath, partition.getName());
Task<? extends Serializable> rTask = ReplCopyTask.getDumpCopyTask(replicationSpec, fromPath, toPartPath, conf);
rootTasks.add(rTask);
inputs.add(new ReadEntity(partition));
}
} else {
Path fromPath = ts.tableHandle.getDataLocation();
Path toDataPath = new Path(parentPath, EximUtil.DATA_PATH_NAME);
Task<? extends Serializable> rTask = ReplCopyTask.getDumpCopyTask(replicationSpec, fromPath, toDataPath, conf);
rootTasks.add(rTask);
inputs.add(new ReadEntity(ts.tableHandle));
}
outputs.add(toWriteEntity(parentPath, conf));
}
}
use of org.apache.hadoop.hive.ql.hooks.ReadEntity in project hive by apache.
the class DDLTask method renamePartition.
/**
* Rename a partition in a table
*
* @param db
* Database to rename the partition.
* @param renamePartitionDesc
* rename old Partition to new one.
* @return Returns 0 when execution succeeds and above 0 if it fails.
* @throws HiveException
*/
private int renamePartition(Hive db, RenamePartitionDesc renamePartitionDesc) throws HiveException {
String tableName = renamePartitionDesc.getTableName();
LinkedHashMap<String, String> oldPartSpec = renamePartitionDesc.getOldPartSpec();
if (!allowOperationInReplicationScope(db, tableName, oldPartSpec, renamePartitionDesc.getReplicationSpec())) {
// or the existing table is newer than our update.
if (LOG.isDebugEnabled()) {
LOG.debug("DDLTask: Rename Partition is skipped as table {} / partition {} is newer than update", tableName, FileUtils.makePartName(new ArrayList<>(oldPartSpec.keySet()), new ArrayList<>(oldPartSpec.values())));
}
return 0;
}
String[] names = Utilities.getDbTableName(tableName);
if (Utils.isBootstrapDumpInProgress(db, names[0])) {
LOG.error("DDLTask: Rename Partition not allowed as bootstrap dump in progress");
throw new HiveException("Rename Partition: Not allowed as bootstrap dump in progress");
}
Table tbl = db.getTable(tableName);
Partition oldPart = db.getPartition(tbl, oldPartSpec, false);
if (oldPart == null) {
String partName = FileUtils.makePartName(new ArrayList<String>(oldPartSpec.keySet()), new ArrayList<String>(oldPartSpec.values()));
throw new HiveException("Rename partition: source partition [" + partName + "] does not exist.");
}
Partition part = db.getPartition(tbl, oldPartSpec, false);
part.setValues(renamePartitionDesc.getNewPartSpec());
db.renamePartition(tbl, oldPartSpec, part);
Partition newPart = db.getPartition(tbl, renamePartitionDesc.getNewPartSpec(), false);
work.getInputs().add(new ReadEntity(oldPart));
// We've already obtained a lock on the table, don't lock the partition too
addIfAbsentByName(new WriteEntity(newPart, WriteEntity.WriteType.DDL_NO_LOCK));
return 0;
}
Aggregations