use of org.apache.hadoop.hive.ql.exec.FetchOperator in project hive by apache.
the class MapredLocalTask method startForward.
private void startForward(boolean inputFileChangeSenstive, String bigTableBucket) throws Exception {
for (Operator<?> source : work.getAliasToWork().values()) {
source.reset();
}
if (inputFileChangeSenstive) {
execContext.setCurrentBigBucketFile(bigTableBucket);
}
for (Map.Entry<String, FetchOperator> entry : fetchOperators.entrySet()) {
String alias = entry.getKey();
FetchOperator fetchOp = entry.getValue();
if (inputFileChangeSenstive) {
fetchOp.clearFetchContext();
setUpFetchOpContext(fetchOp, alias, bigTableBucket);
}
// get the root operator
Operator<? extends OperatorDesc> forwardOp = work.getAliasToWork().get(alias);
// walk through the operator tree
while (!forwardOp.getDone()) {
InspectableObject row = fetchOp.getNextRow();
if (row == null) {
break;
}
forwardOp.process(row.o, 0);
}
forwardOp.flush();
}
for (Operator<?> source : work.getAliasToWork().values()) {
source.close(false);
}
}
use of org.apache.hadoop.hive.ql.exec.FetchOperator in project hive by apache.
the class MapredLocalTask method initializeOperators.
private void initializeOperators(Map<FetchOperator, JobConf> fetchOpJobConfMap) throws HiveException {
for (Map.Entry<String, Operator<? extends OperatorDesc>> entry : work.getAliasToWork().entrySet()) {
LOG.debug("initializeOperators: " + entry.getKey() + ", children = " + entry.getValue().getChildOperators());
}
// this mapper operator is used to initialize all the operators
for (Map.Entry<String, FetchWork> entry : work.getAliasToFetchWork().entrySet()) {
if (entry.getValue() == null) {
continue;
}
JobConf jobClone = new JobConf(job);
TableScanOperator ts = (TableScanOperator) work.getAliasToWork().get(entry.getKey());
// push down projections
ColumnProjectionUtils.appendReadColumns(jobClone, ts.getNeededColumnIDs(), ts.getNeededColumns(), ts.getNeededNestedColumnPaths());
// push down filters
HiveInputFormat.pushFilters(jobClone, ts);
AcidUtils.setTransactionalTableScan(jobClone, ts.getConf().isAcidTable());
AcidUtils.setAcidOperationalProperties(jobClone, ts.getConf().getAcidOperationalProperties());
// create a fetch operator
FetchOperator fetchOp = new FetchOperator(entry.getValue(), jobClone);
fetchOpJobConfMap.put(fetchOp, jobClone);
fetchOperators.put(entry.getKey(), fetchOp);
l4j.info("fetchoperator for " + entry.getKey() + " created");
}
// initialize all forward operator
for (Map.Entry<String, FetchOperator> entry : fetchOperators.entrySet()) {
// get the forward op
String alias = entry.getKey();
Operator<? extends OperatorDesc> forwardOp = work.getAliasToWork().get(alias);
// put the exe context into all the operators
forwardOp.passExecContext(execContext);
// All the operators need to be initialized before process
FetchOperator fetchOp = entry.getValue();
JobConf jobConf = fetchOpJobConfMap.get(fetchOp);
if (jobConf == null) {
jobConf = job;
}
// initialize the forward operator
ObjectInspector objectInspector = fetchOp.getOutputObjectInspector();
forwardOp.initialize(jobConf, new ObjectInspector[] { objectInspector });
l4j.info("fetchoperator for " + entry.getKey() + " initialized");
}
}
use of org.apache.hadoop.hive.ql.exec.FetchOperator in project hive by apache.
the class ExecDriver method handleSampling.
private void handleSampling(Context context, MapWork mWork, JobConf job) throws Exception {
assert mWork.getAliasToWork().keySet().size() == 1;
String alias = mWork.getAliases().get(0);
Operator<?> topOp = mWork.getAliasToWork().get(alias);
PartitionDesc partDesc = mWork.getAliasToPartnInfo().get(alias);
ArrayList<PartitionDesc> parts = mWork.getPartitionDescs();
List<Path> inputPaths = mWork.getPaths();
Path tmpPath = context.getExternalTmpPath(inputPaths.get(0));
Path partitionFile = new Path(tmpPath, ".partitions");
ShimLoader.getHadoopShims().setTotalOrderPartitionFile(job, partitionFile);
PartitionKeySampler sampler = new PartitionKeySampler();
if (mWork.getSamplingType() == MapWork.SAMPLING_ON_PREV_MR) {
console.printInfo("Use sampling data created in previous MR");
// merges sampling data from previous MR and make partition keys for total sort
for (Path path : inputPaths) {
FileSystem fs = path.getFileSystem(job);
for (FileStatus status : fs.globStatus(new Path(path, ".sampling*"))) {
sampler.addSampleFile(status.getPath(), job);
}
}
} else if (mWork.getSamplingType() == MapWork.SAMPLING_ON_START) {
console.printInfo("Creating sampling data..");
assert topOp instanceof TableScanOperator;
TableScanOperator ts = (TableScanOperator) topOp;
FetchWork fetchWork;
if (!partDesc.isPartitioned()) {
assert inputPaths.size() == 1;
fetchWork = new FetchWork(inputPaths.get(0), partDesc.getTableDesc());
} else {
fetchWork = new FetchWork(inputPaths, parts, partDesc.getTableDesc());
}
fetchWork.setSource(ts);
// random sampling
FetchOperator fetcher = PartitionKeySampler.createSampler(fetchWork, job, ts);
try {
ts.initialize(job, new ObjectInspector[] { fetcher.getOutputObjectInspector() });
OperatorUtils.setChildrenCollector(ts.getChildOperators(), sampler);
while (fetcher.pushRow()) {
}
} finally {
fetcher.clearFetchContext();
}
} else {
throw new IllegalArgumentException("Invalid sampling type " + mWork.getSamplingType());
}
sampler.writePartitionKeys(partitionFile, job);
}
use of org.apache.hadoop.hive.ql.exec.FetchOperator in project SQLWindowing by hbutani.
the class QueryOutputPrinter method printQueryOutput.
@SuppressWarnings({ "unchecked", "rawtypes" })
public void printQueryOutput(QueryDef qry, HiveConf cfg) throws WindowingException {
try {
JobConf jCfg = new JobConf(cfg);
SerDe outSerDe = setupOutputSerDe(qry, jCfg);
RowSchema rSchema = getQueryOutputRowSchema(qry, jCfg);
TableDesc tDesc = setupTableDesc(rSchema);
tDesc.setDeserializerClass(qry.getOutput().getSerDe().getClass());
String outputFormatClassName = qry.getOutput().getSpec().getOutputFormatClass();
Class<? extends OutputFormat> outputFormatClass = (outputFormatClassName != null) ? (Class<? extends OutputFormat>) Class.forName(outputFormatClassName) : SequenceFileOutputFormat.class;
// todo this is hack; check how this is done in Hive
tDesc.setInputFileFormatClass(mapToInputFormat(outputFormatClass));
tDesc.setProperties(qry.getOutput().getSpec().getSerDeProps());
FetchOperator ftOp = setupFetchOperator(qry, tDesc, jCfg);
while (true) {
InspectableObject io = ftOp.getNextRow();
if (io == null) {
return;
}
String s = ((Text) outSerDe.serialize(io.o, io.oi)).toString();
printOutput(s);
}
} catch (WindowingException we) {
throw we;
} catch (Exception e) {
throw new WindowingException(e);
}
}
use of org.apache.hadoop.hive.ql.exec.FetchOperator in project SQLWindowing by hbutani.
the class QueryOutputPrinter method setupFetchOperator.
FetchOperator setupFetchOperator(QueryDef qry, TableDesc tDesc, JobConf jcfg) {
FetchWork fW = new FetchWork(qry.getOutput().getSpec().getPath(), tDesc);
FetchOperator fOp = new FetchOperator(fW, jcfg);
return fOp;
}
Aggregations