use of org.apache.hadoop.hive.ql.index.HiveIndexHandler in project hive by apache.
the class DDLSemanticAnalyzer method getIndexBuilderMapRed.
private List<Task<?>> getIndexBuilderMapRed(String[] names, String indexName, HashMap<String, String> partSpec) throws SemanticException {
try {
Index index = db.getIndex(names[0], names[1], indexName);
Table indexTbl = null;
String indexTableName = index.getIndexTableName();
if (indexTableName != null) {
indexTbl = getTable(Utilities.getDbTableName(index.getDbName(), indexTableName));
}
Table baseTbl = getTable(new String[] { index.getDbName(), index.getOrigTableName() });
String handlerCls = index.getIndexHandlerClass();
HiveIndexHandler handler = HiveUtils.getIndexHandler(conf, handlerCls);
List<Partition> indexTblPartitions = null;
List<Partition> baseTblPartitions = null;
if (indexTbl != null) {
indexTblPartitions = new ArrayList<Partition>();
baseTblPartitions = preparePartitions(baseTbl, partSpec, indexTbl, db, indexTblPartitions);
}
List<Task<?>> ret = handler.generateIndexBuildTaskList(baseTbl, index, indexTblPartitions, baseTblPartitions, indexTbl, getInputs(), getOutputs());
return ret;
} catch (Exception e) {
throw new SemanticException(e);
}
}
use of org.apache.hadoop.hive.ql.index.HiveIndexHandler in project hive by apache.
the class HiveUtils method getIndexHandler.
public static HiveIndexHandler getIndexHandler(HiveConf conf, String indexHandlerClass) throws HiveException {
if (indexHandlerClass == null) {
return null;
}
try {
Class<? extends HiveIndexHandler> handlerClass = (Class<? extends HiveIndexHandler>) Class.forName(indexHandlerClass, true, Utilities.getSessionSpecifiedClassLoader());
HiveIndexHandler indexHandler = ReflectionUtils.newInstance(handlerClass, conf);
return indexHandler;
} catch (ClassNotFoundException e) {
throw new HiveException("Error in loading index handler." + e.getMessage(), e);
}
}
use of org.apache.hadoop.hive.ql.index.HiveIndexHandler in project hive by apache.
the class IndexWhereProcessor method rewriteForIndexes.
/**
* Get a list of Tasks to activate use of tsToIndices.
* Generate the tasks for the index query (where we store results of
* querying the index in a tmp file) inside the IndexHandler
* @param predicate Predicate of query to rewrite
* @param index Index to use for rewrite
* @param pctx
* @param task original task before rewrite
* @param queryContext stores return values
*/
private void rewriteForIndexes(ExprNodeDesc predicate, List<Index> indexes, ParseContext pctx, Task<MapredWork> task, HiveIndexQueryContext queryContext) throws SemanticException {
HiveIndexHandler indexHandler;
// All tsToIndices in the list are of the same type, and therefore can use the
// same handler to generate the index query tasks
Index index = indexes.get(0);
try {
indexHandler = HiveUtils.getIndexHandler(pctx.getConf(), index.getIndexHandlerClass());
} catch (HiveException e) {
LOG.error("Exception while loading IndexHandler: " + index.getIndexHandlerClass(), e);
throw new SemanticException("Failed to load indexHandler: " + index.getIndexHandlerClass(), e);
}
// check the size
try {
ContentSummary inputSummary = Utilities.getInputSummary(pctx.getContext(), task.getWork().getMapWork(), null);
long inputSize = inputSummary.getLength();
if (!indexHandler.checkQuerySize(inputSize, pctx.getConf())) {
queryContext.setQueryTasks(null);
return;
}
} catch (IOException e) {
throw new SemanticException("Failed to get task size", e);
}
// use the IndexHandler to generate the index query
indexHandler.generateIndexQuery(indexes, predicate, pctx, queryContext);
return;
}
use of org.apache.hadoop.hive.ql.index.HiveIndexHandler in project hive by apache.
the class Hive method createIndex.
/**
*
* @param tableName
* table name
* @param indexName
* index name
* @param indexHandlerClass
* index handler class
* @param indexedCols
* index columns
* @param indexTblName
* index table's name
* @param deferredRebuild
* referred build index table's data
* @param inputFormat
* input format
* @param outputFormat
* output format
* @param serde
* @param storageHandler
* index table's storage handler
* @param location
* location
* @param idxProps
* idx
* @param serdeProps
* serde properties
* @param collItemDelim
* @param fieldDelim
* @param fieldEscape
* @param lineDelim
* @param mapKeyDelim
* @throws HiveException
*/
public void createIndex(String tableName, String indexName, String indexHandlerClass, List<String> indexedCols, String indexTblName, boolean deferredRebuild, String inputFormat, String outputFormat, String serde, String storageHandler, String location, Map<String, String> idxProps, Map<String, String> tblProps, Map<String, String> serdeProps, String collItemDelim, String fieldDelim, String fieldEscape, String lineDelim, String mapKeyDelim, String indexComment) throws HiveException {
try {
String tdname = Utilities.getDatabaseName(tableName);
String idname = Utilities.getDatabaseName(indexTblName);
if (!idname.equals(tdname)) {
throw new HiveException("Index on different database (" + idname + ") from base table (" + tdname + ") is not supported.");
}
Index old_index = null;
try {
old_index = getIndex(tableName, indexName);
} catch (Exception e) {
}
if (old_index != null) {
throw new HiveException("Index " + indexName + " already exists on table " + tableName);
}
org.apache.hadoop.hive.metastore.api.Table baseTbl = getTable(tableName).getTTable();
if (TableType.VIRTUAL_VIEW.toString().equals(baseTbl.getTableType())) {
throw new HiveException("tableName=" + tableName + " is a VIRTUAL VIEW. Index on VIRTUAL VIEW is not supported.");
}
if (baseTbl.isTemporary()) {
throw new HiveException("tableName=" + tableName + " is a TEMPORARY TABLE. Index on TEMPORARY TABLE is not supported.");
}
org.apache.hadoop.hive.metastore.api.Table temp = null;
try {
temp = getTable(indexTblName).getTTable();
} catch (Exception e) {
}
if (temp != null) {
throw new HiveException("Table name " + indexTblName + " already exists. Choose another name.");
}
SerDeInfo serdeInfo = new SerDeInfo();
serdeInfo.setName(indexTblName);
if (serde != null) {
serdeInfo.setSerializationLib(serde);
} else {
if (storageHandler == null) {
serdeInfo.setSerializationLib(org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.class.getName());
} else {
HiveStorageHandler sh = HiveUtils.getStorageHandler(getConf(), storageHandler);
String serDeClassName = sh.getSerDeClass().getName();
serdeInfo.setSerializationLib(serDeClassName);
}
}
serdeInfo.setParameters(new HashMap<String, String>());
if (fieldDelim != null) {
serdeInfo.getParameters().put(FIELD_DELIM, fieldDelim);
serdeInfo.getParameters().put(SERIALIZATION_FORMAT, fieldDelim);
}
if (fieldEscape != null) {
serdeInfo.getParameters().put(ESCAPE_CHAR, fieldEscape);
}
if (collItemDelim != null) {
serdeInfo.getParameters().put(COLLECTION_DELIM, collItemDelim);
}
if (mapKeyDelim != null) {
serdeInfo.getParameters().put(MAPKEY_DELIM, mapKeyDelim);
}
if (lineDelim != null) {
serdeInfo.getParameters().put(LINE_DELIM, lineDelim);
}
if (serdeProps != null) {
Iterator<Entry<String, String>> iter = serdeProps.entrySet().iterator();
while (iter.hasNext()) {
Entry<String, String> m = iter.next();
serdeInfo.getParameters().put(m.getKey(), m.getValue());
}
}
List<FieldSchema> indexTblCols = new ArrayList<FieldSchema>();
List<Order> sortCols = new ArrayList<Order>();
int k = 0;
Table metaBaseTbl = new Table(baseTbl);
// Even though we are storing these in metastore, get regular columns. Indexes on lengthy
// types from e.g. Avro schema will just fail to create the index table (by design).
List<FieldSchema> cols = metaBaseTbl.getCols();
for (int i = 0; i < cols.size(); i++) {
FieldSchema col = cols.get(i);
if (indexedCols.contains(col.getName())) {
indexTblCols.add(col);
sortCols.add(new Order(col.getName(), 1));
k++;
}
}
if (k != indexedCols.size()) {
throw new RuntimeException("Check the index columns, they should appear in the table being indexed.");
}
int time = (int) (System.currentTimeMillis() / 1000);
org.apache.hadoop.hive.metastore.api.Table tt = null;
HiveIndexHandler indexHandler = HiveUtils.getIndexHandler(this.getConf(), indexHandlerClass);
String itname = Utilities.getTableName(indexTblName);
if (indexHandler.usesIndexTable()) {
tt = new org.apache.hadoop.hive.ql.metadata.Table(idname, itname).getTTable();
List<FieldSchema> partKeys = baseTbl.getPartitionKeys();
tt.setPartitionKeys(partKeys);
tt.setTableType(TableType.INDEX_TABLE.toString());
if (tblProps != null) {
for (Entry<String, String> prop : tblProps.entrySet()) {
tt.putToParameters(prop.getKey(), prop.getValue());
}
}
SessionState ss = SessionState.get();
CreateTableAutomaticGrant grants;
if (ss != null && ((grants = ss.getCreateTableGrants()) != null)) {
PrincipalPrivilegeSet principalPrivs = new PrincipalPrivilegeSet();
principalPrivs.setUserPrivileges(grants.getUserGrants());
principalPrivs.setGroupPrivileges(grants.getGroupGrants());
principalPrivs.setRolePrivileges(grants.getRoleGrants());
tt.setPrivileges(principalPrivs);
}
}
if (!deferredRebuild) {
throw new RuntimeException("Please specify deferred rebuild using \" WITH DEFERRED REBUILD \".");
}
StorageDescriptor indexSd = new StorageDescriptor(indexTblCols, location, inputFormat, outputFormat, false, /*compressed - not used*/
-1, /*numBuckets - default is -1 when the table has no buckets*/
serdeInfo, null, /*bucketCols*/
sortCols, null);
String ttname = Utilities.getTableName(tableName);
Index indexDesc = new Index(indexName, indexHandlerClass, tdname, ttname, time, time, itname, indexSd, new HashMap<String, String>(), deferredRebuild);
if (indexComment != null) {
indexDesc.getParameters().put("comment", indexComment);
}
if (idxProps != null) {
indexDesc.getParameters().putAll(idxProps);
}
indexHandler.analyzeIndexDefinition(baseTbl, indexDesc, tt);
this.getMSC().createIndex(indexDesc, tt);
} catch (Exception e) {
throw new HiveException(e);
}
}
Aggregations