use of org.apache.hadoop.hive.ql.metadata.Partition in project hive by apache.
the class IndexWhereProcessor method process.
@Override
public /**
* Process a node of the operator tree. This matches on the rule in IndexWhereTaskDispatcher
*/
Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException {
TableScanOperator operator = (TableScanOperator) nd;
List<Node> opChildren = operator.getChildren();
TableScanDesc operatorDesc = operator.getConf();
if (operatorDesc == null || !tsToIndices.containsKey(operator)) {
return null;
}
List<Index> indexes = tsToIndices.get(operator);
ExprNodeDesc predicate = operatorDesc.getFilterExpr();
IndexWhereProcCtx context = (IndexWhereProcCtx) procCtx;
ParseContext pctx = context.getParseContext();
LOG.info("Processing predicate for index optimization");
if (predicate == null) {
LOG.info("null predicate pushed down");
return null;
}
LOG.info(predicate.getExprString());
// check if we have tsToIndices on all partitions in this table scan
Set<Partition> queryPartitions;
try {
queryPartitions = IndexUtils.checkPartitionsCoveredByIndex(operator, pctx, indexes);
if (queryPartitions == null) {
// partitions not covered
return null;
}
} catch (HiveException e) {
LOG.error("Fatal Error: problem accessing metastore", e);
throw new SemanticException(e);
}
// we can only process MapReduce tasks to check input size
if (!context.getCurrentTask().isMapRedTask()) {
return null;
}
MapRedTask currentTask = (MapRedTask) context.getCurrentTask();
// get potential reentrant index queries from each index
Map<Index, HiveIndexQueryContext> queryContexts = new HashMap<Index, HiveIndexQueryContext>();
// make sure we have an index on the table being scanned
TableDesc tblDesc = operator.getTableDesc();
Map<String, List<Index>> indexesByType = new HashMap<String, List<Index>>();
for (Index indexOnTable : indexes) {
if (indexesByType.get(indexOnTable.getIndexHandlerClass()) == null) {
List<Index> newType = new ArrayList<Index>();
newType.add(indexOnTable);
indexesByType.put(indexOnTable.getIndexHandlerClass(), newType);
} else {
indexesByType.get(indexOnTable.getIndexHandlerClass()).add(indexOnTable);
}
}
// choose index type with most tsToIndices of the same type on the table
// TODO HIVE-2130 This would be a good place for some sort of cost based choice?
List<Index> bestIndexes = indexesByType.values().iterator().next();
for (List<Index> indexTypes : indexesByType.values()) {
if (bestIndexes.size() < indexTypes.size()) {
bestIndexes = indexTypes;
}
}
// rewrite index queries for the chosen index type
HiveIndexQueryContext tmpQueryContext = new HiveIndexQueryContext();
tmpQueryContext.setQueryPartitions(queryPartitions);
rewriteForIndexes(predicate, bestIndexes, pctx, currentTask, tmpQueryContext);
List<Task<?>> indexTasks = tmpQueryContext.getQueryTasks();
if (indexTasks != null && indexTasks.size() > 0) {
queryContexts.put(bestIndexes.get(0), tmpQueryContext);
}
// choose an index rewrite to use
if (queryContexts.size() > 0) {
// TODO HIVE-2130 This would be a good place for some sort of cost based choice?
Index chosenIndex = queryContexts.keySet().iterator().next();
// modify the parse context to use indexing
// we need to delay this until we choose one index so that we don't attempt to modify pctx multiple times
HiveIndexQueryContext queryContext = queryContexts.get(chosenIndex);
// prepare the map reduce job to use indexing
MapWork work = currentTask.getWork().getMapWork();
work.setInputformat(queryContext.getIndexInputFormat());
work.addIndexIntermediateFile(queryContext.getIndexIntermediateFile());
// modify inputs based on index query
Set<ReadEntity> inputs = pctx.getSemanticInputs();
inputs.addAll(queryContext.getAdditionalSemanticInputs());
List<Task<?>> chosenRewrite = queryContext.getQueryTasks();
// add dependencies so index query runs first
insertIndexQuery(pctx, context, chosenRewrite);
}
return null;
}
use of org.apache.hadoop.hive.ql.metadata.Partition in project hive by apache.
the class PartitionPruner method getAllPartitions.
private static Set<Partition> getAllPartitions(Table tab) throws HiveException {
PerfLogger perfLogger = SessionState.getPerfLogger();
perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.PARTITION_RETRIEVING);
Set<Partition> result = Hive.get().getAllPartitionsOf(tab);
perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.PARTITION_RETRIEVING);
return result;
}
use of org.apache.hadoop.hive.ql.metadata.Partition in project hive by apache.
the class IndexMetadataChangeTask method execute.
@Override
protected int execute(DriverContext driverContext) {
try {
Hive db = Hive.get(conf);
IndexMetadataChangeWork work = this.getWork();
String tblName = work.getIndexTbl();
Table tbl = db.getTable(work.getDbName(), tblName);
if (tbl == null) {
console.printError("Index table can not be null.");
return 1;
}
if (!tbl.getTableType().equals(TableType.INDEX_TABLE)) {
console.printError("Table " + tbl.getTableName() + " not specified.");
return 1;
}
if (tbl.isPartitioned() && work.getPartSpec() == null) {
console.printError("Index table is partitioned, but no partition specified.");
return 1;
}
if (work.getPartSpec() != null) {
Partition part = db.getPartition(tbl, work.getPartSpec(), false);
if (part == null) {
console.printError("Partition " + Warehouse.makePartName(work.getPartSpec(), false).toString() + " does not exist.");
return 1;
}
Path path = part.getDataLocation();
FileSystem fs = path.getFileSystem(conf);
FileStatus fstat = fs.getFileStatus(path);
part.getParameters().put(HiveIndex.INDEX_TABLE_CREATETIME, Long.toString(fstat.getModificationTime()));
db.alterPartition(tbl.getTableName(), part, null);
} else {
Path url = new Path(tbl.getPath().toString());
FileSystem fs = url.getFileSystem(conf);
FileStatus fstat = fs.getFileStatus(url);
tbl.getParameters().put(HiveIndex.INDEX_TABLE_CREATETIME, Long.toString(fstat.getModificationTime()));
db.alterTable(tbl.getDbName() + "." + tbl.getTableName(), tbl, null);
}
} catch (Exception e) {
e.printStackTrace();
console.printError("Error changing index table/partition metadata " + e.getMessage());
return 1;
}
return 0;
}
use of org.apache.hadoop.hive.ql.metadata.Partition in project hive by apache.
the class TableBasedIndexHandler method generateIndexBuildTaskList.
@Override
public List<Task<?>> generateIndexBuildTaskList(org.apache.hadoop.hive.ql.metadata.Table baseTbl, org.apache.hadoop.hive.metastore.api.Index index, List<Partition> indexTblPartitions, List<Partition> baseTblPartitions, org.apache.hadoop.hive.ql.metadata.Table indexTbl, Set<ReadEntity> inputs, Set<WriteEntity> outputs) throws HiveException {
try {
TableDesc desc = Utilities.getTableDesc(indexTbl);
List<Partition> newBaseTblPartitions = new ArrayList<Partition>();
List<Task<?>> indexBuilderTasks = new ArrayList<Task<?>>();
if (!baseTbl.isPartitioned()) {
// the table does not have any partition, then create index for the
// whole table
Task<?> indexBuilder = getIndexBuilderMapRedTask(inputs, outputs, index, false, new PartitionDesc(desc, null), indexTbl.getTableName(), new PartitionDesc(Utilities.getTableDesc(baseTbl), null), baseTbl.getTableName(), indexTbl.getDbName());
indexBuilderTasks.add(indexBuilder);
} else {
// table
for (int i = 0; i < indexTblPartitions.size(); i++) {
Partition indexPart = indexTblPartitions.get(i);
Partition basePart = null;
for (int j = 0; j < baseTblPartitions.size(); j++) {
if (baseTblPartitions.get(j).getName().equals(indexPart.getName())) {
basePart = baseTblPartitions.get(j);
newBaseTblPartitions.add(baseTblPartitions.get(j));
break;
}
}
if (basePart == null) {
throw new RuntimeException("Partitions of base table and index table are inconsistent.");
}
// for each partition, spawn a map reduce task.
Task<?> indexBuilder = getIndexBuilderMapRedTask(inputs, outputs, index, true, new PartitionDesc(indexPart), indexTbl.getTableName(), new PartitionDesc(basePart), baseTbl.getTableName(), indexTbl.getDbName());
indexBuilderTasks.add(indexBuilder);
}
}
return indexBuilderTasks;
} catch (Exception e) {
throw new SemanticException(e);
}
}
use of org.apache.hadoop.hive.ql.metadata.Partition in project hive by apache.
the class CompactIndexHandler method getIndexPredicateAnalyzer.
/**
* Instantiate a new predicate analyzer suitable for determining
* whether we can use an index, based on rules for indexes in
* WHERE clauses that we support
*
* @return preconfigured predicate analyzer for WHERE queries
*/
private IndexPredicateAnalyzer getIndexPredicateAnalyzer(Index index, Set<Partition> queryPartitions) {
IndexPredicateAnalyzer analyzer = new IndexPredicateAnalyzer();
analyzer.addComparisonOp(GenericUDFOPEqual.class.getName());
analyzer.addComparisonOp(GenericUDFOPLessThan.class.getName());
analyzer.addComparisonOp(GenericUDFOPEqualOrLessThan.class.getName());
analyzer.addComparisonOp(GenericUDFOPGreaterThan.class.getName());
analyzer.addComparisonOp(GenericUDFOPEqualOrGreaterThan.class.getName());
// only return results for columns in this index
List<FieldSchema> columnSchemas = index.getSd().getCols();
for (FieldSchema column : columnSchemas) {
analyzer.allowColumnName(column.getName());
}
// partitioned columns are treated as if they have indexes so that the partitions
// are used during the index query generation
partitionCols = new HashSet<String>();
for (Partition part : queryPartitions) {
if (part.getSpec().isEmpty()) {
// empty partitions are from whole tables, so we don't want to add them in
continue;
}
for (String column : part.getSpec().keySet()) {
analyzer.allowColumnName(column);
partitionCols.add(column);
}
}
return analyzer;
}
Aggregations