use of org.apache.hadoop.hive.ql.parse.SemanticException in project hive by apache.
the class CreateTableDesc method validate.
public void validate(HiveConf conf) throws SemanticException {
if ((this.getCols() == null) || (this.getCols().size() == 0)) {
// for now make sure that serde exists
if (Table.hasMetastoreBasedSchema(conf, serName) && StringUtils.isEmpty(getStorageHandler())) {
throw new SemanticException(ErrorMsg.INVALID_TBL_DDL_SERDE.getMsg());
}
return;
}
if (this.getStorageHandler() == null) {
try {
Class<?> origin = Class.forName(this.getOutputFormat(), true, Utilities.getSessionSpecifiedClassLoader());
Class<? extends OutputFormat> replaced = HiveFileFormatUtils.getOutputFormatSubstitute(origin);
if (!HiveOutputFormat.class.isAssignableFrom(replaced)) {
throw new SemanticException(ErrorMsg.INVALID_OUTPUT_FORMAT_TYPE.getMsg());
}
} catch (ClassNotFoundException e) {
throw new SemanticException(ErrorMsg.CLASSPATH_ERROR.getMsg(), e);
}
}
List<String> colNames = ParseUtils.validateColumnNameUniqueness(this.getCols());
if (this.getBucketCols() != null) {
// all columns in cluster and sort are valid columns
Iterator<String> bucketCols = this.getBucketCols().iterator();
while (bucketCols.hasNext()) {
String bucketCol = bucketCols.next();
boolean found = false;
Iterator<String> colNamesIter = colNames.iterator();
while (colNamesIter.hasNext()) {
String colName = colNamesIter.next();
if (bucketCol.equalsIgnoreCase(colName)) {
found = true;
break;
}
}
if (!found) {
throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(" \'" + bucketCol + "\'"));
}
}
}
if (this.getSortCols() != null) {
// all columns in cluster and sort are valid columns
Iterator<Order> sortCols = this.getSortCols().iterator();
while (sortCols.hasNext()) {
String sortCol = sortCols.next().getCol();
boolean found = false;
Iterator<String> colNamesIter = colNames.iterator();
while (colNamesIter.hasNext()) {
String colName = colNamesIter.next();
if (sortCol.equalsIgnoreCase(colName)) {
found = true;
break;
}
}
if (!found) {
throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(" \'" + sortCol + "\'"));
}
}
}
if (this.getPartCols() != null) {
// there is no overlap between columns and partitioning columns
Iterator<FieldSchema> partColsIter = this.getPartCols().iterator();
while (partColsIter.hasNext()) {
FieldSchema fs = partColsIter.next();
String partCol = fs.getName();
TypeInfo pti = null;
try {
pti = TypeInfoFactory.getPrimitiveTypeInfo(fs.getType());
} catch (Exception err) {
LOG.error("Failed to get type info", err);
}
if (null == pti) {
throw new SemanticException(ErrorMsg.PARTITION_COLUMN_NON_PRIMITIVE.getMsg() + " Found " + partCol + " of type: " + fs.getType());
}
Iterator<String> colNamesIter = colNames.iterator();
while (colNamesIter.hasNext()) {
String colName = BaseSemanticAnalyzer.unescapeIdentifier(colNamesIter.next());
if (partCol.equalsIgnoreCase(colName)) {
throw new SemanticException(ErrorMsg.COLUMN_REPEATED_IN_PARTITIONING_COLS.getMsg());
}
}
}
}
/* Validate skewed information. */
ValidationUtility.validateSkewedInformation(colNames, this.getSkewedColNames(), this.getSkewedColValues());
}
use of org.apache.hadoop.hive.ql.parse.SemanticException in project phoenix by apache.
the class IndexPredicateAnalyzer method analyzePredicate.
/**
* Analyzes a predicate.
*
* @param predicate predicate to be analyzed
* @param searchConditions receives conditions produced by analysis
* @return residual predicate which could not be translated to
* searchConditions
*/
public ExprNodeDesc analyzePredicate(ExprNodeDesc predicate, final List<IndexSearchCondition> searchConditions) {
Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
NodeProcessor nodeProcessor = new NodeProcessor() {
@Override
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException {
// a pure conjunction: reject OR, CASE, etc.
for (Node ancestor : stack) {
if (nd == ancestor) {
break;
}
if (!FunctionRegistry.isOpAnd((ExprNodeDesc) ancestor)) {
return nd;
}
}
return analyzeExpr((ExprNodeGenericFuncDesc) nd, searchConditions, nodeOutputs);
}
};
Dispatcher disp = new DefaultRuleDispatcher(nodeProcessor, opRules, null);
GraphWalker ogw = new DefaultGraphWalker(disp);
ArrayList<Node> topNodes = new ArrayList<Node>();
topNodes.add(predicate);
HashMap<Node, Object> nodeOutput = new HashMap<Node, Object>();
try {
ogw.startWalking(topNodes, nodeOutput);
} catch (SemanticException ex) {
throw new RuntimeException(ex);
}
ExprNodeDesc residualPredicate = (ExprNodeDesc) nodeOutput.get(predicate);
return residualPredicate;
}
use of org.apache.hadoop.hive.ql.parse.SemanticException in project hive by apache.
the class HiveGBOpConvUtil method genMapSideGBSkewGBKeysAndDistUDAFNotPresent.
/**
* GB-RS-GB2
*
* @param inputOpAf
* @param aggRel
* @param gbInfo
* @return
* @throws SemanticException
*/
private static OpAttr genMapSideGBSkewGBKeysAndDistUDAFNotPresent(OpAttr inputOpAf, HiveAggregate aggRel, GBInfo gbInfo) throws SemanticException {
OpAttr mapSideGB = null;
OpAttr mapSideRS = null;
OpAttr reduceSideGB2 = null;
// 1. Sanity check
if (gbInfo.grpSetRqrAdditionalMRJob) {
String errorMsg = "The number of rows per input row due to grouping sets is " + gbInfo.grpSets.size();
throw new SemanticException(ErrorMsg.HIVE_GROUPING_SETS_THRESHOLD_NOT_ALLOWED_WITH_SKEW.getMsg(errorMsg));
}
// 1. Insert MapSide GB
mapSideGB = genMapSideGB(inputOpAf, gbInfo);
// 2. Insert MapSide RS
mapSideRS = genMapSideGBRS(mapSideGB, gbInfo);
// 3. Insert ReduceSide GB2
reduceSideGB2 = genReduceSideGB2(mapSideRS, gbInfo);
return reduceSideGB2;
}
use of org.apache.hadoop.hive.ql.parse.SemanticException in project hive by apache.
the class HiveOpConverter method genJoin.
private static JoinOperator genJoin(RelNode join, ExprNodeDesc[][] joinExpressions, List<List<ExprNodeDesc>> filterExpressions, List<Operator<?>> children, String[] baseSrc, String tabAlias) throws SemanticException {
// 1. Extract join type
JoinCondDesc[] joinCondns;
boolean semiJoin;
boolean noOuterJoin;
if (join instanceof HiveMultiJoin) {
HiveMultiJoin hmj = (HiveMultiJoin) join;
joinCondns = new JoinCondDesc[hmj.getJoinInputs().size()];
for (int i = 0; i < hmj.getJoinInputs().size(); i++) {
joinCondns[i] = new JoinCondDesc(new JoinCond(hmj.getJoinInputs().get(i).left, hmj.getJoinInputs().get(i).right, transformJoinType(hmj.getJoinTypes().get(i))));
}
semiJoin = false;
noOuterJoin = !hmj.isOuterJoin();
} else {
joinCondns = new JoinCondDesc[1];
semiJoin = join instanceof SemiJoin;
JoinType joinType;
if (semiJoin) {
joinType = JoinType.LEFTSEMI;
} else {
joinType = extractJoinType((Join) join);
}
joinCondns[0] = new JoinCondDesc(new JoinCond(0, 1, joinType));
noOuterJoin = joinType != JoinType.FULLOUTER && joinType != JoinType.LEFTOUTER && joinType != JoinType.RIGHTOUTER;
}
// 2. We create the join aux structures
ArrayList<ColumnInfo> outputColumns = new ArrayList<ColumnInfo>();
ArrayList<String> outputColumnNames = new ArrayList<String>(join.getRowType().getFieldNames());
Operator<?>[] childOps = new Operator[children.size()];
Map<String, Byte> reversedExprs = new HashMap<String, Byte>();
Map<Byte, List<ExprNodeDesc>> exprMap = new HashMap<Byte, List<ExprNodeDesc>>();
Map<Byte, List<ExprNodeDesc>> filters = new HashMap<Byte, List<ExprNodeDesc>>();
Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
HashMap<Integer, Set<String>> posToAliasMap = new HashMap<Integer, Set<String>>();
int outputPos = 0;
for (int pos = 0; pos < children.size(); pos++) {
// 2.1. Backtracking from RS
ReduceSinkOperator inputRS = (ReduceSinkOperator) children.get(pos);
if (inputRS.getNumParent() != 1) {
throw new SemanticException("RS should have single parent");
}
Operator<?> parent = inputRS.getParentOperators().get(0);
ReduceSinkDesc rsDesc = inputRS.getConf();
int[] index = inputRS.getValueIndex();
Byte tag = (byte) rsDesc.getTag();
// 2.1.1. If semijoin...
if (semiJoin && pos != 0) {
exprMap.put(tag, new ArrayList<ExprNodeDesc>());
childOps[pos] = inputRS;
continue;
}
posToAliasMap.put(pos, new HashSet<String>(inputRS.getSchema().getTableNames()));
List<String> keyColNames = rsDesc.getOutputKeyColumnNames();
List<String> valColNames = rsDesc.getOutputValueColumnNames();
Map<String, ExprNodeDesc> descriptors = buildBacktrackFromReduceSinkForJoin(outputPos, outputColumnNames, keyColNames, valColNames, index, parent, baseSrc[pos]);
List<ColumnInfo> parentColumns = parent.getSchema().getSignature();
for (int i = 0; i < index.length; i++) {
ColumnInfo info = new ColumnInfo(parentColumns.get(i));
info.setInternalName(outputColumnNames.get(outputPos));
info.setTabAlias(tabAlias);
outputColumns.add(info);
reversedExprs.put(outputColumnNames.get(outputPos), tag);
outputPos++;
}
exprMap.put(tag, new ArrayList<ExprNodeDesc>(descriptors.values()));
colExprMap.putAll(descriptors);
childOps[pos] = inputRS;
}
// 3. We populate the filters and filterMap structure needed in the join descriptor
List<List<ExprNodeDesc>> filtersPerInput = Lists.newArrayList();
int[][] filterMap = new int[children.size()][];
for (int i = 0; i < children.size(); i++) {
filtersPerInput.add(new ArrayList<ExprNodeDesc>());
}
// 3. We populate the filters structure
for (int i = 0; i < filterExpressions.size(); i++) {
int leftPos = joinCondns[i].getLeft();
int rightPos = joinCondns[i].getRight();
for (ExprNodeDesc expr : filterExpressions.get(i)) {
// We need to update the exprNode, as currently
// they refer to columns in the output of the join;
// they should refer to the columns output by the RS
int inputPos = updateExprNode(expr, reversedExprs, colExprMap);
if (inputPos == -1) {
inputPos = leftPos;
}
filtersPerInput.get(inputPos).add(expr);
if (joinCondns[i].getType() == JoinDesc.FULL_OUTER_JOIN || joinCondns[i].getType() == JoinDesc.LEFT_OUTER_JOIN || joinCondns[i].getType() == JoinDesc.RIGHT_OUTER_JOIN) {
if (inputPos == leftPos) {
updateFilterMap(filterMap, leftPos, rightPos);
} else {
updateFilterMap(filterMap, rightPos, leftPos);
}
}
}
}
for (int pos = 0; pos < children.size(); pos++) {
ReduceSinkOperator inputRS = (ReduceSinkOperator) children.get(pos);
ReduceSinkDesc rsDesc = inputRS.getConf();
Byte tag = (byte) rsDesc.getTag();
filters.put(tag, filtersPerInput.get(pos));
}
// 4. We create the join operator with its descriptor
JoinDesc desc = new JoinDesc(exprMap, outputColumnNames, noOuterJoin, joinCondns, filters, joinExpressions, null);
desc.setReversedExprs(reversedExprs);
desc.setFilterMap(filterMap);
JoinOperator joinOp = (JoinOperator) OperatorFactory.getAndMakeChild(childOps[0].getCompilationOpContext(), desc, new RowSchema(outputColumns), childOps);
joinOp.setColumnExprMap(colExprMap);
joinOp.setPosToAliasMap(posToAliasMap);
joinOp.getConf().setBaseSrc(baseSrc);
if (LOG.isDebugEnabled()) {
LOG.debug("Generated " + joinOp + " with row schema: [" + joinOp.getSchema() + "]");
}
return joinOp;
}
use of org.apache.hadoop.hive.ql.parse.SemanticException in project hive by apache.
the class AbstractBucketJoinProc method checkConvertBucketMapJoin.
/*
* Can this mapjoin be converted to a bucketed mapjoin ?
* The following checks are performed:
* a. The join columns contains all the bucket columns.
* b. The join keys are not transformed in the sub-query.
* c. All partitions contain the expected number of files (number of buckets).
* d. The number of buckets in the big table can be divided by no of buckets in small tables.
*/
protected boolean checkConvertBucketMapJoin(BucketJoinProcCtx context, Map<String, Operator<? extends OperatorDesc>> aliasToOpInfo, Map<Byte, List<ExprNodeDesc>> keysMap, String baseBigAlias, List<String> joinAliases) throws SemanticException {
LinkedHashMap<String, List<Integer>> tblAliasToNumberOfBucketsInEachPartition = new LinkedHashMap<String, List<Integer>>();
LinkedHashMap<String, List<List<String>>> tblAliasToBucketedFilePathsInEachPartition = new LinkedHashMap<String, List<List<String>>>();
HashMap<String, TableScanOperator> topOps = pGraphContext.getTopOps();
HashMap<String, String> aliasToNewAliasMap = new HashMap<String, String>();
// (partition to bucket file names) and (partition to bucket number) for
// the big table;
LinkedHashMap<Partition, List<String>> bigTblPartsToBucketFileNames = new LinkedHashMap<Partition, List<String>>();
LinkedHashMap<Partition, Integer> bigTblPartsToBucketNumber = new LinkedHashMap<Partition, Integer>();
// accessing order of join cols to bucket cols, should be same
Integer[] joinKeyOrder = null;
boolean bigTablePartitioned = true;
for (int index = 0; index < joinAliases.size(); index++) {
String alias = joinAliases.get(index);
Operator<? extends OperatorDesc> topOp = aliasToOpInfo.get(alias);
// The alias may not be present in case of a sub-query
if (topOp == null) {
return false;
}
List<String> keys = toColumns(keysMap.get((byte) index));
if (keys == null || keys.isEmpty()) {
return false;
}
int oldKeySize = keys.size();
TableScanOperator tso = TableAccessAnalyzer.genRootTableScan(topOp, keys);
if (tso == null) {
// between topOp and root TableScan operator. We don't handle that case, and simply return
return false;
}
// For nested sub-queries, the alias mapping is not maintained in QB currently.
if (topOps.containsValue(tso)) {
for (Map.Entry<String, TableScanOperator> topOpEntry : topOps.entrySet()) {
if (topOpEntry.getValue() == tso) {
String newAlias = topOpEntry.getKey();
if (!newAlias.equals(alias)) {
joinAliases.set(index, newAlias);
if (baseBigAlias.equals(alias)) {
baseBigAlias = newAlias;
}
aliasToNewAliasMap.put(alias, newAlias);
alias = newAlias;
}
break;
}
}
} else {
// Ideally, this should never happen, and this should be an assert.
return false;
}
// be removed, and the size before and after the genRootTableScan will be different.
if (keys.size() != oldKeySize) {
return false;
}
if (joinKeyOrder == null) {
joinKeyOrder = new Integer[keys.size()];
}
Table tbl = tso.getConf().getTableMetadata();
if (AcidUtils.isInsertOnlyTable(tbl.getParameters())) {
Utilities.FILE_OP_LOGGER.debug("No bucketed join on MM table " + tbl.getTableName());
return false;
}
if (tbl.isPartitioned()) {
PrunedPartitionList prunedParts = pGraphContext.getPrunedPartitions(alias, tso);
List<Partition> partitions = prunedParts.getNotDeniedPartns();
// construct a mapping of (Partition->bucket file names) and (Partition -> bucket number)
if (partitions.isEmpty()) {
if (!alias.equals(baseBigAlias)) {
tblAliasToNumberOfBucketsInEachPartition.put(alias, Arrays.<Integer>asList());
tblAliasToBucketedFilePathsInEachPartition.put(alias, new ArrayList<List<String>>());
}
} else {
List<Integer> buckets = new ArrayList<Integer>();
List<List<String>> files = new ArrayList<List<String>>();
for (Partition p : partitions) {
if (!checkBucketColumns(p.getBucketCols(), keys, joinKeyOrder)) {
return false;
}
List<String> fileNames = getBucketFilePathsOfPartition(p.getDataLocation(), pGraphContext);
// The number of files for the table should be same as number of buckets.
int bucketCount = p.getBucketCount();
if (fileNames.size() != 0 && fileNames.size() != bucketCount) {
String msg = "The number of buckets for table " + tbl.getTableName() + " partition " + p.getName() + " is " + p.getBucketCount() + ", whereas the number of files is " + fileNames.size();
throw new SemanticException(ErrorMsg.BUCKETED_TABLE_METADATA_INCORRECT.getMsg(msg));
}
if (alias.equals(baseBigAlias)) {
bigTblPartsToBucketFileNames.put(p, fileNames);
bigTblPartsToBucketNumber.put(p, bucketCount);
} else {
files.add(fileNames);
buckets.add(bucketCount);
}
}
if (!alias.equals(baseBigAlias)) {
tblAliasToNumberOfBucketsInEachPartition.put(alias, buckets);
tblAliasToBucketedFilePathsInEachPartition.put(alias, files);
}
}
} else {
if (!checkBucketColumns(tbl.getBucketCols(), keys, joinKeyOrder)) {
return false;
}
List<String> fileNames = getBucketFilePathsOfPartition(tbl.getDataLocation(), pGraphContext);
Integer num = new Integer(tbl.getNumBuckets());
// The number of files for the table should be same as number of buckets.
if (fileNames.size() != 0 && fileNames.size() != num) {
String msg = "The number of buckets for table " + tbl.getTableName() + " is " + tbl.getNumBuckets() + ", whereas the number of files is " + fileNames.size();
throw new SemanticException(ErrorMsg.BUCKETED_TABLE_METADATA_INCORRECT.getMsg(msg));
}
if (alias.equals(baseBigAlias)) {
bigTblPartsToBucketFileNames.put(null, fileNames);
bigTblPartsToBucketNumber.put(null, tbl.getNumBuckets());
bigTablePartitioned = false;
} else {
tblAliasToNumberOfBucketsInEachPartition.put(alias, Arrays.asList(num));
tblAliasToBucketedFilePathsInEachPartition.put(alias, Arrays.asList(fileNames));
}
}
}
// the big table can be divided by no of buckets in small tables.
for (Integer numBucketsInPartitionOfBigTable : bigTblPartsToBucketNumber.values()) {
if (!checkNumberOfBucketsAgainstBigTable(tblAliasToNumberOfBucketsInEachPartition, numBucketsInPartitionOfBigTable)) {
return false;
}
}
context.setTblAliasToNumberOfBucketsInEachPartition(tblAliasToNumberOfBucketsInEachPartition);
context.setTblAliasToBucketedFilePathsInEachPartition(tblAliasToBucketedFilePathsInEachPartition);
context.setBigTblPartsToBucketFileNames(bigTblPartsToBucketFileNames);
context.setBigTblPartsToBucketNumber(bigTblPartsToBucketNumber);
context.setJoinAliases(joinAliases);
context.setBaseBigAlias(baseBigAlias);
context.setBigTablePartitioned(bigTablePartitioned);
if (!aliasToNewAliasMap.isEmpty()) {
context.setAliasToNewAliasMap(aliasToNewAliasMap);
}
return true;
}
Aggregations