use of org.apache.hadoop.hive.ql.exec.TableScanOperator in project hive by apache.
the class SharedWorkOptimizer method extractSharedOptimizationInfoForRoot.
private static SharedResult extractSharedOptimizationInfoForRoot(ParseContext pctx, SharedWorkOptimizerCache optimizerCache, TableScanOperator retainableTsOp, TableScanOperator discardableTsOp) throws SemanticException {
LinkedHashSet<Operator<?>> retainableOps = new LinkedHashSet<>();
LinkedHashSet<Operator<?>> discardableOps = new LinkedHashSet<>();
Set<Operator<?>> discardableInputOps = new HashSet<>();
long dataSize = 0L;
long maxDataSize = 0L;
retainableOps.add(retainableTsOp);
discardableOps.add(discardableTsOp);
Operator<?> equalOp1 = retainableTsOp;
Operator<?> equalOp2 = discardableTsOp;
if (equalOp1.getNumChild() > 1 || equalOp2.getNumChild() > 1) {
// TODO: Support checking multiple child operators to merge further.
discardableInputOps.addAll(gatherDPPBranchOps(pctx, optimizerCache, discardableOps));
return new SharedResult(retainableOps, discardableOps, discardableInputOps, dataSize, maxDataSize);
}
Operator<?> currentOp1 = retainableTsOp.getChildOperators().get(0);
Operator<?> currentOp2 = discardableTsOp.getChildOperators().get(0);
// Special treatment for Filter operator that ignores the DPP predicates
if (currentOp1 instanceof FilterOperator && currentOp2 instanceof FilterOperator) {
boolean equalFilters = false;
FilterDesc op1Conf = ((FilterOperator) currentOp1).getConf();
FilterDesc op2Conf = ((FilterOperator) currentOp2).getConf();
if (op1Conf.getIsSamplingPred() == op2Conf.getIsSamplingPred() && StringUtils.equals(op1Conf.getSampleDescExpr(), op2Conf.getSampleDescExpr())) {
Multiset<String> conjsOp1String = extractConjsIgnoringDPPPreds(op1Conf.getPredicate());
Multiset<String> conjsOp2String = extractConjsIgnoringDPPPreds(op2Conf.getPredicate());
if (conjsOp1String.equals(conjsOp2String)) {
equalFilters = true;
}
}
if (equalFilters) {
equalOp1 = currentOp1;
equalOp2 = currentOp2;
retainableOps.add(equalOp1);
discardableOps.add(equalOp2);
if (currentOp1.getChildOperators().size() > 1 || currentOp2.getChildOperators().size() > 1) {
// TODO: Support checking multiple child operators to merge further.
discardableInputOps.addAll(gatherDPPBranchOps(pctx, optimizerCache, discardableOps));
discardableInputOps.addAll(gatherDPPBranchOps(pctx, optimizerCache, retainableOps, discardableInputOps));
return new SharedResult(retainableOps, discardableOps, discardableInputOps, dataSize, maxDataSize);
}
currentOp1 = currentOp1.getChildOperators().get(0);
currentOp2 = currentOp2.getChildOperators().get(0);
} else {
// Bail out
discardableInputOps.addAll(gatherDPPBranchOps(pctx, optimizerCache, discardableOps));
discardableInputOps.addAll(gatherDPPBranchOps(pctx, optimizerCache, retainableOps, discardableInputOps));
return new SharedResult(retainableOps, discardableOps, discardableInputOps, dataSize, maxDataSize);
}
}
return extractSharedOptimizationInfo(pctx, optimizerCache, equalOp1, equalOp2, currentOp1, currentOp2, retainableOps, discardableOps, discardableInputOps, false);
}
use of org.apache.hadoop.hive.ql.exec.TableScanOperator in project hive by apache.
the class HiveInputFormat method pushProjectionsAndFilters.
protected void pushProjectionsAndFilters(JobConf jobConf, Class inputFormatClass, Path splitPath, boolean nonNative) {
Path splitPathWithNoSchema = Path.getPathWithoutSchemeAndAuthority(splitPath);
if (this.mrwork == null) {
init(job);
}
if (this.mrwork.getPathToAliases() == null) {
return;
}
ArrayList<String> aliases = new ArrayList<String>();
Iterator<Entry<Path, ArrayList<String>>> iterator = this.mrwork.getPathToAliases().entrySet().iterator();
Set<Path> splitParentPaths = null;
int pathsSize = this.mrwork.getPathToAliases().entrySet().size();
while (iterator.hasNext()) {
Entry<Path, ArrayList<String>> entry = iterator.next();
Path key = entry.getKey();
boolean match;
if (nonNative) {
// For non-native tables, we need to do an exact match to avoid
// HIVE-1903. (The table location contains no files, and the string
// representation of its path does not have a trailing slash.)
match = splitPath.equals(key) || splitPathWithNoSchema.equals(key);
} else {
// path to something deeper than the table location.)
if (pathsSize > 1) {
// In such cases, use pre-computed paths for comparison
if (splitParentPaths == null) {
splitParentPaths = new HashSet<>();
FileUtils.populateParentPaths(splitParentPaths, splitPath);
FileUtils.populateParentPaths(splitParentPaths, splitPathWithNoSchema);
}
match = splitParentPaths.contains(key);
} else {
match = FileUtils.isPathWithinSubtree(splitPath, key) || FileUtils.isPathWithinSubtree(splitPathWithNoSchema, key);
}
}
if (match) {
ArrayList<String> list = entry.getValue();
for (String val : list) {
aliases.add(val);
}
}
}
for (String alias : aliases) {
Operator<? extends OperatorDesc> op = this.mrwork.getAliasToWork().get(alias);
if (op instanceof TableScanOperator) {
TableScanOperator ts = (TableScanOperator) op;
// push down projections.
ColumnProjectionUtils.appendReadColumns(jobConf, ts.getNeededColumnIDs(), ts.getNeededColumns(), ts.getNeededNestedColumnPaths());
// push down filters
pushFilters(jobConf, ts, this.mrwork);
AcidUtils.setAcidOperationalProperties(job, ts.getConf().isTranscationalTable(), ts.getConf().getAcidOperationalProperties());
AcidUtils.setValidWriteIdList(job, ts.getConf());
}
}
}
use of org.apache.hadoop.hive.ql.exec.TableScanOperator in project hive by apache.
the class LlapInputFormat method findTsOp.
static TableScanOperator findTsOp(MapWork mapWork) throws HiveException {
if (mapWork.getAliasToWork() == null) {
throw new HiveException("Unexpected - aliasToWork is missing; " + NONVECTOR_SETTING_MESSAGE);
}
Iterator<Operator<?>> ops = mapWork.getAliasToWork().values().iterator();
TableScanOperator tableScanOperator = null;
while (ops.hasNext()) {
Operator<?> op = ops.next();
if (op instanceof TableScanOperator) {
if (tableScanOperator != null) {
throw new HiveException("Unexpected - more than one TSOP; " + NONVECTOR_SETTING_MESSAGE);
}
tableScanOperator = (TableScanOperator) op;
}
}
return tableScanOperator;
}
use of org.apache.hadoop.hive.ql.exec.TableScanOperator in project hive by apache.
the class ExecDriver method handleSampling.
private void handleSampling(Context context, MapWork mWork, JobConf job) throws Exception {
assert mWork.getAliasToWork().keySet().size() == 1;
String alias = mWork.getAliases().get(0);
Operator<?> topOp = mWork.getAliasToWork().get(alias);
PartitionDesc partDesc = mWork.getAliasToPartnInfo().get(alias);
ArrayList<PartitionDesc> parts = mWork.getPartitionDescs();
List<Path> inputPaths = mWork.getPaths();
Path tmpPath = context.getExternalTmpPath(inputPaths.get(0));
Path partitionFile = new Path(tmpPath, ".partitions");
ShimLoader.getHadoopShims().setTotalOrderPartitionFile(job, partitionFile);
PartitionKeySampler sampler = new PartitionKeySampler();
if (mWork.getSamplingType() == MapWork.SAMPLING_ON_PREV_MR) {
console.printInfo("Use sampling data created in previous MR");
// merges sampling data from previous MR and make partition keys for total sort
for (Path path : inputPaths) {
FileSystem fs = path.getFileSystem(job);
for (FileStatus status : fs.globStatus(new Path(path, ".sampling*"))) {
sampler.addSampleFile(status.getPath(), job);
}
}
} else if (mWork.getSamplingType() == MapWork.SAMPLING_ON_START) {
console.printInfo("Creating sampling data..");
assert topOp instanceof TableScanOperator;
TableScanOperator ts = (TableScanOperator) topOp;
FetchWork fetchWork;
if (!partDesc.isPartitioned()) {
assert inputPaths.size() == 1;
fetchWork = new FetchWork(inputPaths.get(0), partDesc.getTableDesc());
} else {
fetchWork = new FetchWork(inputPaths, parts, partDesc.getTableDesc());
}
fetchWork.setSource(ts);
// random sampling
FetchOperator fetcher = PartitionKeySampler.createSampler(fetchWork, job, ts);
try {
ts.initialize(job, new ObjectInspector[] { fetcher.getOutputObjectInspector() });
OperatorUtils.setChildrenCollector(ts.getChildOperators(), sampler);
while (fetcher.pushRow()) {
}
} finally {
fetcher.clearFetchContext();
}
} else {
throw new IllegalArgumentException("Invalid sampling type " + mWork.getSamplingType());
}
sampler.writePartitionKeys(partitionFile, job);
}
use of org.apache.hadoop.hive.ql.exec.TableScanOperator in project hive by apache.
the class MapredLocalTask method initializeOperators.
private void initializeOperators(Map<FetchOperator, JobConf> fetchOpJobConfMap) throws HiveException {
for (Map.Entry<String, Operator<? extends OperatorDesc>> entry : work.getAliasToWork().entrySet()) {
LOG.debug("initializeOperators: " + entry.getKey() + ", children = " + entry.getValue().getChildOperators());
}
// this mapper operator is used to initialize all the operators
for (Map.Entry<String, FetchWork> entry : work.getAliasToFetchWork().entrySet()) {
if (entry.getValue() == null) {
continue;
}
JobConf jobClone = new JobConf(job);
TableScanOperator ts = (TableScanOperator) work.getAliasToWork().get(entry.getKey());
// push down projections
ColumnProjectionUtils.appendReadColumns(jobClone, ts.getNeededColumnIDs(), ts.getNeededColumns(), ts.getNeededNestedColumnPaths());
// push down filters and as of information
HiveInputFormat.pushFiltersAndAsOf(jobClone, ts, null);
AcidUtils.setAcidOperationalProperties(jobClone, ts.getConf().isTranscationalTable(), ts.getConf().getAcidOperationalProperties());
AcidUtils.setValidWriteIdList(jobClone, ts.getConf());
// create a fetch operator
FetchOperator fetchOp = new FetchOperator(entry.getValue(), jobClone);
fetchOpJobConfMap.put(fetchOp, jobClone);
fetchOperators.put(entry.getKey(), fetchOp);
l4j.info("fetchoperator for " + entry.getKey() + " created");
}
// initialize all forward operator
for (Map.Entry<String, FetchOperator> entry : fetchOperators.entrySet()) {
// get the forward op
String alias = entry.getKey();
Operator<? extends OperatorDesc> forwardOp = work.getAliasToWork().get(alias);
// put the exe context into all the operators
forwardOp.passExecContext(execContext);
// All the operators need to be initialized before process
FetchOperator fetchOp = entry.getValue();
JobConf jobConf = fetchOpJobConfMap.get(fetchOp);
if (jobConf == null) {
jobConf = job;
}
// initialize the forward operator
ObjectInspector objectInspector = fetchOp.getOutputObjectInspector();
forwardOp.initialize(jobConf, new ObjectInspector[] { objectInspector });
l4j.info("fetchoperator for " + entry.getKey() + " initialized");
}
}
Aggregations