Examples with FetchOperator - org.apache.hadoop.hive.ql.exec.FetchOperator

Example 1 with FetchOperator

use of org.apache.hadoop.hive.ql.exec.FetchOperator in project hive by apache.

the class MapredLocalTask method startForward.

private void startForward(boolean inputFileChangeSenstive, String bigTableBucket) throws Exception {
    for (Operator<?> source : work.getAliasToWork().values()) {
        source.reset();
    }
    if (inputFileChangeSenstive) {
        execContext.setCurrentBigBucketFile(bigTableBucket);
    }
    for (Map.Entry<String, FetchOperator> entry : fetchOperators.entrySet()) {
        String alias = entry.getKey();
        FetchOperator fetchOp = entry.getValue();
        if (inputFileChangeSenstive) {
            fetchOp.clearFetchContext();
            setUpFetchOpContext(fetchOp, alias, bigTableBucket);
        }
        // get the root operator
        Operator<? extends OperatorDesc> forwardOp = work.getAliasToWork().get(alias);
        // walk through the operator tree
        while (!forwardOp.getDone()) {
            InspectableObject row = fetchOp.getNextRow();
            if (row == null) {
                break;
            }
            forwardOp.process(row.o, 0);
        }
        forwardOp.flush();
    }
    for (Operator<?> source : work.getAliasToWork().values()) {
        source.close(false);
    }
}

Also used : InspectableObject(org.apache.hadoop.hive.serde2.objectinspector.InspectableObject) Map(java.util.Map) HashMap(java.util.HashMap) FetchOperator(org.apache.hadoop.hive.ql.exec.FetchOperator)

Example 2 with FetchOperator

use of org.apache.hadoop.hive.ql.exec.FetchOperator in project hive by apache.

the class MapredLocalTask method initializeOperators.

private void initializeOperators(Map<FetchOperator, JobConf> fetchOpJobConfMap) throws HiveException {
    for (Map.Entry<String, Operator<? extends OperatorDesc>> entry : work.getAliasToWork().entrySet()) {
        LOG.debug("initializeOperators: " + entry.getKey() + ", children = " + entry.getValue().getChildOperators());
    }
    // this mapper operator is used to initialize all the operators
    for (Map.Entry<String, FetchWork> entry : work.getAliasToFetchWork().entrySet()) {
        if (entry.getValue() == null) {
            continue;
        }
        JobConf jobClone = new JobConf(job);
        TableScanOperator ts = (TableScanOperator) work.getAliasToWork().get(entry.getKey());
        // push down projections
        ColumnProjectionUtils.appendReadColumns(jobClone, ts.getNeededColumnIDs(), ts.getNeededColumns(), ts.getNeededNestedColumnPaths());
        // push down filters
        HiveInputFormat.pushFilters(jobClone, ts);
        AcidUtils.setTransactionalTableScan(jobClone, ts.getConf().isAcidTable());
        AcidUtils.setAcidOperationalProperties(jobClone, ts.getConf().getAcidOperationalProperties());
        // create a fetch operator
        FetchOperator fetchOp = new FetchOperator(entry.getValue(), jobClone);
        fetchOpJobConfMap.put(fetchOp, jobClone);
        fetchOperators.put(entry.getKey(), fetchOp);
        l4j.info("fetchoperator for " + entry.getKey() + " created");
    }
    // initialize all forward operator
    for (Map.Entry<String, FetchOperator> entry : fetchOperators.entrySet()) {
        // get the forward op
        String alias = entry.getKey();
        Operator<? extends OperatorDesc> forwardOp = work.getAliasToWork().get(alias);
        // put the exe context into all the operators
        forwardOp.passExecContext(execContext);
        // All the operators need to be initialized before process
        FetchOperator fetchOp = entry.getValue();
        JobConf jobConf = fetchOpJobConfMap.get(fetchOp);
        if (jobConf == null) {
            jobConf = job;
        }
        // initialize the forward operator
        ObjectInspector objectInspector = fetchOp.getOutputObjectInspector();
        forwardOp.initialize(jobConf, new ObjectInspector[] { objectInspector });
        l4j.info("fetchoperator for " + entry.getKey() + " initialized");
    }
}

Also used : FetchOperator(org.apache.hadoop.hive.ql.exec.FetchOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) FetchWork(org.apache.hadoop.hive.ql.plan.FetchWork) Map(java.util.Map) HashMap(java.util.HashMap) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc) JobConf(org.apache.hadoop.mapred.JobConf) FetchOperator(org.apache.hadoop.hive.ql.exec.FetchOperator)

Example 3 with FetchOperator

use of org.apache.hadoop.hive.ql.exec.FetchOperator in project hive by apache.

the class ExecDriver method handleSampling.

private void handleSampling(Context context, MapWork mWork, JobConf job) throws Exception {
    assert mWork.getAliasToWork().keySet().size() == 1;
    String alias = mWork.getAliases().get(0);
    Operator<?> topOp = mWork.getAliasToWork().get(alias);
    PartitionDesc partDesc = mWork.getAliasToPartnInfo().get(alias);
    ArrayList<PartitionDesc> parts = mWork.getPartitionDescs();
    List<Path> inputPaths = mWork.getPaths();
    Path tmpPath = context.getExternalTmpPath(inputPaths.get(0));
    Path partitionFile = new Path(tmpPath, ".partitions");
    ShimLoader.getHadoopShims().setTotalOrderPartitionFile(job, partitionFile);
    PartitionKeySampler sampler = new PartitionKeySampler();
    if (mWork.getSamplingType() == MapWork.SAMPLING_ON_PREV_MR) {
        console.printInfo("Use sampling data created in previous MR");
        // merges sampling data from previous MR and make partition keys for total sort
        for (Path path : inputPaths) {
            FileSystem fs = path.getFileSystem(job);
            for (FileStatus status : fs.globStatus(new Path(path, ".sampling*"))) {
                sampler.addSampleFile(status.getPath(), job);
            }
        }
    } else if (mWork.getSamplingType() == MapWork.SAMPLING_ON_START) {
        console.printInfo("Creating sampling data..");
        assert topOp instanceof TableScanOperator;
        TableScanOperator ts = (TableScanOperator) topOp;
        FetchWork fetchWork;
        if (!partDesc.isPartitioned()) {
            assert inputPaths.size() == 1;
            fetchWork = new FetchWork(inputPaths.get(0), partDesc.getTableDesc());
        } else {
            fetchWork = new FetchWork(inputPaths, parts, partDesc.getTableDesc());
        }
        fetchWork.setSource(ts);
        // random sampling
        FetchOperator fetcher = PartitionKeySampler.createSampler(fetchWork, job, ts);
        try {
            ts.initialize(job, new ObjectInspector[] { fetcher.getOutputObjectInspector() });
            OperatorUtils.setChildrenCollector(ts.getChildOperators(), sampler);
            while (fetcher.pushRow()) {
            }
        } finally {
            fetcher.clearFetchContext();
        }
    } else {
        throw new IllegalArgumentException("Invalid sampling type " + mWork.getSamplingType());
    }
    sampler.writePartitionKeys(partitionFile, job);
}

Also used : Path(org.apache.hadoop.fs.Path) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) FileStatus(org.apache.hadoop.fs.FileStatus) PartitionKeySampler(org.apache.hadoop.hive.ql.exec.PartitionKeySampler) FileSystem(org.apache.hadoop.fs.FileSystem) FetchWork(org.apache.hadoop.hive.ql.plan.FetchWork) PartitionDesc(org.apache.hadoop.hive.ql.plan.PartitionDesc) FetchOperator(org.apache.hadoop.hive.ql.exec.FetchOperator)

Example 4 with FetchOperator

use of org.apache.hadoop.hive.ql.exec.FetchOperator in project SQLWindowing by hbutani.

the class QueryOutputPrinter method printQueryOutput.

@SuppressWarnings({ "unchecked", "rawtypes" })
public void printQueryOutput(QueryDef qry, HiveConf cfg) throws WindowingException {
    try {
        JobConf jCfg = new JobConf(cfg);
        SerDe outSerDe = setupOutputSerDe(qry, jCfg);
        RowSchema rSchema = getQueryOutputRowSchema(qry, jCfg);
        TableDesc tDesc = setupTableDesc(rSchema);
        tDesc.setDeserializerClass(qry.getOutput().getSerDe().getClass());
        String outputFormatClassName = qry.getOutput().getSpec().getOutputFormatClass();
        Class<? extends OutputFormat> outputFormatClass = (outputFormatClassName != null) ? (Class<? extends OutputFormat>) Class.forName(outputFormatClassName) : SequenceFileOutputFormat.class;
        // todo this is hack; check how this is done in Hive
        tDesc.setInputFileFormatClass(mapToInputFormat(outputFormatClass));
        tDesc.setProperties(qry.getOutput().getSpec().getSerDeProps());
        FetchOperator ftOp = setupFetchOperator(qry, tDesc, jCfg);
        while (true) {
            InspectableObject io = ftOp.getNextRow();
            if (io == null) {
                return;
            }
            String s = ((Text) outSerDe.serialize(io.o, io.oi)).toString();
            printOutput(s);
        }
    } catch (WindowingException we) {
        throw we;
    } catch (Exception e) {
        throw new WindowingException(e);
    }
}

Also used : SerDe(org.apache.hadoop.hive.serde2.SerDe) DelimitedJSONSerDe(org.apache.hadoop.hive.serde2.DelimitedJSONSerDe) InspectableObject(org.apache.hadoop.hive.serde2.objectinspector.InspectableObject) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) WindowingException(com.sap.hadoop.windowing.WindowingException) Text(org.apache.hadoop.io.Text) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) JobConf(org.apache.hadoop.mapred.JobConf) FetchOperator(org.apache.hadoop.hive.ql.exec.FetchOperator) WindowingException(com.sap.hadoop.windowing.WindowingException)

Example 5 with FetchOperator

use of org.apache.hadoop.hive.ql.exec.FetchOperator in project SQLWindowing by hbutani.

the class QueryOutputPrinter method setupFetchOperator.

FetchOperator setupFetchOperator(QueryDef qry, TableDesc tDesc, JobConf jcfg) {
    FetchWork fW = new FetchWork(qry.getOutput().getSpec().getPath(), tDesc);
    FetchOperator fOp = new FetchOperator(fW, jcfg);
    return fOp;
}

Also used : FetchWork(org.apache.hadoop.hive.ql.plan.FetchWork) FetchOperator(org.apache.hadoop.hive.ql.exec.FetchOperator)

Aggregations

FetchOperator (org.apache.hadoop.hive.ql.exec.FetchOperator)5 FetchWork (org.apache.hadoop.hive.ql.plan.FetchWork)3 HashMap (java.util.HashMap)2 Map (java.util.Map)2 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)2 InspectableObject (org.apache.hadoop.hive.serde2.objectinspector.InspectableObject)2 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)2 JobConf (org.apache.hadoop.mapred.JobConf)2 WindowingException (com.sap.hadoop.windowing.WindowingException)1 FileStatus (org.apache.hadoop.fs.FileStatus)1 FileSystem (org.apache.hadoop.fs.FileSystem)1 Path (org.apache.hadoop.fs.Path)1 Operator (org.apache.hadoop.hive.ql.exec.Operator)1 PartitionKeySampler (org.apache.hadoop.hive.ql.exec.PartitionKeySampler)1 RowSchema (org.apache.hadoop.hive.ql.exec.RowSchema)1 OperatorDesc (org.apache.hadoop.hive.ql.plan.OperatorDesc)1 PartitionDesc (org.apache.hadoop.hive.ql.plan.PartitionDesc)1 TableDesc (org.apache.hadoop.hive.ql.plan.TableDesc)1 DelimitedJSONSerDe (org.apache.hadoop.hive.serde2.DelimitedJSONSerDe)1 SerDe (org.apache.hadoop.hive.serde2.SerDe)1