use of org.apache.hadoop.hive.ql.parse.SemanticException in project hive by apache.
the class MapJoinProcessor method genLocalWorkForMapJoin.
public static void genLocalWorkForMapJoin(MapredWork newWork, MapJoinOperator newMapJoinOp, int mapJoinPos) throws SemanticException {
try {
// generate the local work for the big table alias
MapJoinProcessor.genMapJoinLocalWork(newWork, newMapJoinOp, mapJoinPos);
// clean up the mapred work
newWork.getMapWork().setLeftInputJoin(false);
newWork.getMapWork().setBaseSrc(null);
newWork.getMapWork().setMapAliases(null);
} catch (Exception e) {
e.printStackTrace();
throw new SemanticException("Failed to generate new mapJoin operator " + "by exception : " + e.getMessage());
}
}
use of org.apache.hadoop.hive.ql.parse.SemanticException in project hive by apache.
the class SparkMapJoinProcessor method convertMapJoin.
/**
* convert a regular join to a a map-side join.
*
* @param conf
* @param opParseCtxMap
* @param op join operator
* @param joinTree qb join tree
* @param bigTablePos position of the source to be read as part of
* map-reduce framework. All other sources are cached in memory
* @param noCheckOuterJoin
* @param validateMapJoinTree
*/
@Override
public MapJoinOperator convertMapJoin(HiveConf conf, JoinOperator op, boolean leftSrc, String[] baseSrc, List<String> mapAliases, int bigTablePos, boolean noCheckOuterJoin, boolean validateMapJoinTree) throws SemanticException {
// outer join cannot be performed on a table which is being cached
JoinCondDesc[] condns = op.getConf().getConds();
if (!noCheckOuterJoin) {
if (checkMapJoin(bigTablePos, condns) < 0) {
throw new SemanticException(ErrorMsg.NO_OUTER_MAPJOIN.getMsg());
}
}
// create the map-join operator
MapJoinOperator mapJoinOp = convertJoinOpMapJoinOp(conf, op, op.getConf().isLeftInputJoin(), op.getConf().getBaseSrc(), op.getConf().getMapAliases(), bigTablePos, noCheckOuterJoin);
// 1. remove RS as parent for the big table branch
// 2. remove old join op from child set of all the RSs
List<Operator<? extends OperatorDesc>> parentOps = mapJoinOp.getParentOperators();
for (int i = 0; i < parentOps.size(); i++) {
Operator<? extends OperatorDesc> parentOp = parentOps.get(i);
parentOp.getChildOperators().remove(op);
if (i == bigTablePos) {
List<Operator<? extends OperatorDesc>> grandParentOps = parentOp.getParentOperators();
Preconditions.checkArgument(grandParentOps.size() == 1, "AssertionError: expect number of parents to be 1, but was " + grandParentOps.size());
Operator<? extends OperatorDesc> grandParentOp = grandParentOps.get(0);
grandParentOp.replaceChild(parentOp, mapJoinOp);
mapJoinOp.replaceParent(parentOp, grandParentOp);
}
}
return mapJoinOp;
}
use of org.apache.hadoop.hive.ql.parse.SemanticException in project hive by apache.
the class TableSizeBasedBigTableSelectorForAutoSMJ method getBigTablePosition.
public int getBigTablePosition(ParseContext parseCtx, JoinOperator joinOp, Set<Integer> bigTableCandidates) throws SemanticException {
int bigTablePos = -1;
long maxSize = -1;
HiveConf conf = parseCtx.getConf();
try {
List<TableScanOperator> topOps = new ArrayList<TableScanOperator>();
getListTopOps(joinOp, topOps);
int currentPos = 0;
for (TableScanOperator topOp : topOps) {
if (topOp == null) {
return -1;
}
if (!bigTableCandidates.contains(currentPos)) {
currentPos++;
continue;
}
Table table = topOp.getConf().getTableMetadata();
long currentSize = 0;
if (!table.isPartitioned()) {
currentSize = getSize(conf, table);
} else {
// For partitioned tables, get the size of all the partitions
PrunedPartitionList partsList = PartitionPruner.prune(topOp, parseCtx, null);
for (Partition part : partsList.getNotDeniedPartns()) {
currentSize += getSize(conf, part);
}
}
if (currentSize > maxSize) {
maxSize = currentSize;
bigTablePos = currentPos;
}
currentPos++;
}
} catch (HiveException e) {
throw new SemanticException(e.getMessage());
}
return bigTablePos;
}
use of org.apache.hadoop.hive.ql.parse.SemanticException in project hive by apache.
the class SamplePruner method limitPrune.
/**
* Try to generate a list of subset of files in the partition to reach a size
* limit with number of files less than fileLimit
* @param part
* @param sizeLimit
* @param fileLimit
* @param retPathList list of Paths returned
* @return the result of the attempt
* @throws SemanticException
*/
public static LimitPruneRetStatus limitPrune(Partition part, long sizeLimit, int fileLimit, Collection<Path> retPathList) throws SemanticException {
try {
FileSystem fs = part.getDataLocation().getFileSystem(Hive.get().getConf());
String pathPattern = part.getDataLocation().toString() + "/*";
AddPathReturnStatus ret = addPath(fs, pathPattern, sizeLimit, fileLimit, retPathList);
if (ret == null) {
return LimitPruneRetStatus.NotQualify;
} else if (!ret.hasFile) {
return LimitPruneRetStatus.NoFile;
} else if (ret.sizeLeft > 0) {
return LimitPruneRetStatus.NotQualify;
} else if (ret.allFile) {
return LimitPruneRetStatus.NeedAllFiles;
} else {
return LimitPruneRetStatus.NeedSomeFiles;
}
} catch (Exception e) {
throw new RuntimeException("Cannot get path", e);
}
}
use of org.apache.hadoop.hive.ql.parse.SemanticException in project hive by apache.
the class CorrelationOptimizer method findPossibleAutoConvertedJoinOperators.
private void findPossibleAutoConvertedJoinOperators() throws SemanticException {
// based on hive.auto.convert.join.noconditionaltask.size.
for (JoinOperator joinOp : pCtx.getJoinOps()) {
boolean isAbleToGuess = true;
boolean mayConvert = false;
// Get total size and individual alias's size
long aliasTotalKnownInputSize = 0;
Map<String, Long> aliasToSize = new HashMap<String, Long>();
Map<Integer, Set<String>> posToAliases = new HashMap<Integer, Set<String>>();
for (int pos = 0; pos < joinOp.getNumParent(); pos++) {
Operator<? extends OperatorDesc> op = joinOp.getParentOperators().get(pos);
Set<TableScanOperator> topOps = CorrelationUtilities.findTableScanOperators(op);
if (topOps.isEmpty()) {
isAbleToGuess = false;
break;
}
Set<String> aliases = new LinkedHashSet<String>();
for (TableScanOperator tsop : topOps) {
Table table = tsop.getConf().getTableMetadata();
if (table == null) {
// table should not be null.
throw new SemanticException("The table of " + tsop.getName() + " " + tsop.getIdentifier() + " is null, which is not expected.");
}
String alias = tsop.getConf().getAlias();
aliases.add(alias);
Path p = table.getPath();
ContentSummary resultCs = null;
try {
FileSystem fs = table.getPath().getFileSystem(pCtx.getConf());
resultCs = fs.getContentSummary(p);
} catch (IOException e) {
LOG.warn("Encounter a error while querying content summary of table " + table.getCompleteName() + " from FileSystem. " + "Cannot guess if CommonJoinOperator will optimize " + joinOp.getName() + " " + joinOp.getIdentifier());
}
if (resultCs == null) {
isAbleToGuess = false;
break;
}
long size = resultCs.getLength();
aliasTotalKnownInputSize += size;
Long es = aliasToSize.get(alias);
if (es == null) {
es = new Long(0);
}
es += size;
aliasToSize.put(alias, es);
}
posToAliases.put(pos, aliases);
}
if (!isAbleToGuess) {
LOG.info("Cannot guess if CommonJoinOperator will optimize " + joinOp.getName() + " " + joinOp.getIdentifier());
continue;
}
JoinDesc joinDesc = joinOp.getConf();
Byte[] order = joinDesc.getTagOrder();
int numAliases = order.length;
Set<Integer> bigTableCandidates = MapJoinProcessor.getBigTableCandidates(joinDesc.getConds());
if (bigTableCandidates.isEmpty()) {
continue;
}
long ThresholdOfSmallTblSizeSum = HiveConf.getLongVar(pCtx.getConf(), HiveConf.ConfVars.HIVESMALLTABLESFILESIZE);
for (int i = 0; i < numAliases; i++) {
// this table cannot be big table
if (!bigTableCandidates.contains(i)) {
continue;
}
Set<String> aliases = posToAliases.get(i);
long aliasKnownSize = Utilities.sumOf(aliasToSize, aliases);
if (!CommonJoinTaskDispatcher.cannotConvert(aliasKnownSize, aliasTotalKnownInputSize, ThresholdOfSmallTblSizeSum)) {
mayConvert = true;
}
}
if (mayConvert) {
LOG.info(joinOp.getName() + " " + joinOp.getIdentifier() + " may be converted to MapJoin by CommonJoinResolver");
skipedJoinOperators.add(joinOp);
}
}
}
Aggregations