Search in sources :

Example 36 with MapWork

use of org.apache.hadoop.hive.ql.plan.MapWork in project hive by apache.

the class LlapRecordReader method create.

/**
 * Creates the record reader and checks the input-specific compatibility.
 * @return The reader if the split can be read, null otherwise.
 */
public static LlapRecordReader create(JobConf job, FileSplit split, List<Integer> tableIncludedCols, String hostName, ColumnVectorProducer cvp, ExecutorService executor, InputFormat<?, ?> sourceInputFormat, Deserializer sourceSerDe, Reporter reporter, Configuration daemonConf) throws IOException, HiveException {
    MapWork mapWork = findMapWork(job);
    // No compatible MapWork.
    if (mapWork == null)
        return null;
    LlapRecordReader rr = new LlapRecordReader(mapWork, job, split, tableIncludedCols, hostName, cvp, executor, sourceInputFormat, sourceSerDe, reporter, daemonConf);
    if (!rr.checkOrcSchemaEvolution()) {
        rr.close();
        return null;
    }
    return rr;
}
Also used : MapWork(org.apache.hadoop.hive.ql.plan.MapWork)

Example 37 with MapWork

use of org.apache.hadoop.hive.ql.plan.MapWork in project hive by apache.

the class VectorizedColumnReaderTestBase method initialVectorizedRowBatchCtx.

protected static void initialVectorizedRowBatchCtx(Configuration conf) throws HiveException {
    MapWork mapWork = new MapWork();
    VectorizedRowBatchCtx rbCtx = new VectorizedRowBatchCtx();
    rbCtx.init(createStructObjectInspector(conf), new String[0]);
    mapWork.setVectorMode(true);
    mapWork.setVectorizedRowBatchCtx(rbCtx);
    Utilities.setMapWork(conf, mapWork);
}
Also used : VectorizedRowBatchCtx(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx) MapWork(org.apache.hadoop.hive.ql.plan.MapWork)

Example 38 with MapWork

use of org.apache.hadoop.hive.ql.plan.MapWork in project hive by apache.

the class TestGenTezWork method setUp.

/**
 * @throws java.lang.Exception
 */
@SuppressWarnings("unchecked")
@Before
public void setUp() throws Exception {
    // Init conf
    final HiveConf conf = new HiveConf(SemanticAnalyzer.class);
    SessionState.start(conf);
    // Init parse context
    final ParseContext pctx = new ParseContext();
    pctx.setContext(new Context(conf));
    ctx = new GenTezProcContext(conf, pctx, Collections.EMPTY_LIST, new ArrayList<Task<? extends Serializable>>(), Collections.EMPTY_SET, Collections.EMPTY_SET);
    proc = new GenTezWork(new GenTezUtils() {

        @Override
        protected void setupMapWork(MapWork mapWork, GenTezProcContext context, PrunedPartitionList partitions, TableScanOperator root, String alias) throws SemanticException {
            LinkedHashMap<String, Operator<? extends OperatorDesc>> map = new LinkedHashMap<String, Operator<? extends OperatorDesc>>();
            map.put("foo", root);
            mapWork.setAliasToWork(map);
            return;
        }
    });
    CompilationOpContext cCtx = new CompilationOpContext();
    fs = new FileSinkOperator(cCtx);
    fs.setConf(new FileSinkDesc());
    rs = new ReduceSinkOperator(cCtx);
    rs.setConf(new ReduceSinkDesc());
    TableDesc tableDesc = new TableDesc();
    tableDesc.setProperties(new Properties());
    rs.getConf().setKeySerializeInfo(tableDesc);
    ts = new TableScanOperator(cCtx);
    ts.setConf(new TableScanDesc(null));
    ts.getChildOperators().add(rs);
    rs.getParentOperators().add(ts);
    rs.getChildOperators().add(fs);
    fs.getParentOperators().add(rs);
    ctx.preceedingWork = null;
    ctx.currentRootOperator = ts;
}
Also used : Context(org.apache.hadoop.hive.ql.Context) CompilationOpContext(org.apache.hadoop.hive.ql.CompilationOpContext) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) FileSinkDesc(org.apache.hadoop.hive.ql.plan.FileSinkDesc) ArrayList(java.util.ArrayList) TableScanDesc(org.apache.hadoop.hive.ql.plan.TableScanDesc) Properties(java.util.Properties) LinkedHashMap(java.util.LinkedHashMap) MapWork(org.apache.hadoop.hive.ql.plan.MapWork) CompilationOpContext(org.apache.hadoop.hive.ql.CompilationOpContext) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) HiveConf(org.apache.hadoop.hive.conf.HiveConf) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) ReduceSinkDesc(org.apache.hadoop.hive.ql.plan.ReduceSinkDesc) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc) Before(org.junit.Before)

Example 39 with MapWork

use of org.apache.hadoop.hive.ql.plan.MapWork in project haivvreo by jghoman.

the class AvroSerDe method determineCorrectProperties.

// Hive passes different properties in at different times.  If we're in a MR job,
// we'll get properties for the partition rather than the table, which will give
// us old values for the schema (if it's evolved).  Therefore, in an MR job
// we need to extract the table properties.
// Also, in join queries, multiple properties will be included, so we need
// to extract out the one appropriate to the table we're serde'ing.
private Properties determineCorrectProperties(Configuration configuration, Properties properties) {
    if ((configuration instanceof JobConf) && HaivvreoUtils.insideMRJob((JobConf) configuration)) {
        LOG.info("In MR job, extracting table-level properties");
        MapWork mapWork = Utilities.getMapWork(configuration);
        LinkedHashMap<String, PartitionDesc> a = mapWork.getAliasToPartnInfo();
        if (a.size() == 1) {
            LOG.info("Only one PartitionDesc found.  Returning that Properties");
            PartitionDesc p = a.values().iterator().next();
            TableDesc tableDesc = p.getTableDesc();
            return tableDesc.getProperties();
        } else {
            String tableName = properties.getProperty("name");
            LOG.info("Multiple PartitionDescs.  Return properties for " + tableName);
            for (Map.Entry<String, PartitionDesc> partitionDescs : a.entrySet()) {
                Properties p = partitionDescs.getValue().getTableDesc().getProperties();
                if (p.get("name").equals(tableName)) {
                    // We've found the matching table partition
                    LOG.info("Matched table name against " + partitionDescs.getKey() + ", return its properties");
                    return p;
                }
            }
            // Didn't find anything in partitions to match on.  WARN, at least.
            LOG.warn("Couldn't find any matching properties for table: " + tableName + ". Returning original properties");
        }
    }
    return properties;
}
Also used : MapWork(org.apache.hadoop.hive.ql.plan.MapWork) PartitionDesc(org.apache.hadoop.hive.ql.plan.PartitionDesc) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) Properties(java.util.Properties) JobConf(org.apache.hadoop.mapred.JobConf) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map)

Example 40 with MapWork

use of org.apache.hadoop.hive.ql.plan.MapWork in project hive by apache.

the class DagUtils method createVertex.

/**
   * Create a vertex from a given work object.
   *
   * @param conf JobConf to be used to this execution unit
   * @param work The instance of BaseWork representing the actual work to be performed
   * by this vertex.
   * @param scratchDir HDFS scratch dir for this execution unit.
   * @param appJarLr Local resource for hive-exec.
   * @param additionalLr
   * @param fileSystem FS corresponding to scratchDir and LocalResources
   * @param ctx This query's context
   * @return Vertex
   */
@SuppressWarnings("deprecation")
public Vertex createVertex(JobConf conf, BaseWork work, Path scratchDir, LocalResource appJarLr, List<LocalResource> additionalLr, FileSystem fileSystem, Context ctx, boolean hasChildren, TezWork tezWork, VertexType vertexType) throws Exception {
    Vertex v = null;
    // BaseWork.
    if (work instanceof MapWork) {
        v = createVertex(conf, (MapWork) work, appJarLr, additionalLr, fileSystem, scratchDir, ctx, vertexType);
    } else if (work instanceof ReduceWork) {
        v = createVertex(conf, (ReduceWork) work, appJarLr, additionalLr, fileSystem, scratchDir, ctx);
    } else if (work instanceof MergeJoinWork) {
        v = createVertex(conf, (MergeJoinWork) work, appJarLr, additionalLr, fileSystem, scratchDir, ctx, vertexType);
    } else {
        // something is seriously wrong if this is happening
        throw new HiveException(ErrorMsg.GENERIC_ERROR.getErrorCodedMsg());
    }
    // initialize stats publisher if necessary
    if (work.isGatheringStats()) {
        StatsPublisher statsPublisher;
        StatsFactory factory = StatsFactory.newFactory(conf);
        if (factory != null) {
            StatsCollectionContext sCntxt = new StatsCollectionContext(conf);
            sCntxt.setStatsTmpDirs(Utilities.getStatsTmpDirs(work, conf));
            statsPublisher = factory.getStatsPublisher();
            if (!statsPublisher.init(sCntxt)) {
                // creating stats table if not exists
                if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_STATS_RELIABLE)) {
                    throw new HiveException(ErrorMsg.STATSPUBLISHER_INITIALIZATION_ERROR.getErrorCodedMsg());
                }
            }
        }
    }
    // final vertices need to have at least one output
    if (!hasChildren) {
        v.addDataSink("out_" + work.getName(), new DataSinkDescriptor(OutputDescriptor.create(MROutput.class.getName()).setUserPayload(TezUtils.createUserPayloadFromConf(conf)), null, null));
    }
    return v;
}
Also used : StatsPublisher(org.apache.hadoop.hive.ql.stats.StatsPublisher) StatsCollectionContext(org.apache.hadoop.hive.ql.stats.StatsCollectionContext) Vertex(org.apache.tez.dag.api.Vertex) PreWarmVertex(org.apache.tez.dag.api.PreWarmVertex) StatsFactory(org.apache.hadoop.hive.ql.stats.StatsFactory) MergeJoinWork(org.apache.hadoop.hive.ql.plan.MergeJoinWork) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) MapWork(org.apache.hadoop.hive.ql.plan.MapWork) ReduceWork(org.apache.hadoop.hive.ql.plan.ReduceWork) DataSinkDescriptor(org.apache.tez.dag.api.DataSinkDescriptor)

Aggregations

MapWork (org.apache.hadoop.hive.ql.plan.MapWork)79 ArrayList (java.util.ArrayList)25 Path (org.apache.hadoop.fs.Path)24 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)23 Operator (org.apache.hadoop.hive.ql.exec.Operator)21 OperatorDesc (org.apache.hadoop.hive.ql.plan.OperatorDesc)17 TableDesc (org.apache.hadoop.hive.ql.plan.TableDesc)16 JobConf (org.apache.hadoop.mapred.JobConf)15 Test (org.junit.Test)15 BaseWork (org.apache.hadoop.hive.ql.plan.BaseWork)14 ReduceWork (org.apache.hadoop.hive.ql.plan.ReduceWork)14 MapredWork (org.apache.hadoop.hive.ql.plan.MapredWork)13 Serializable (java.io.Serializable)12 FileSinkOperator (org.apache.hadoop.hive.ql.exec.FileSinkOperator)12 Task (org.apache.hadoop.hive.ql.exec.Task)12 PartitionDesc (org.apache.hadoop.hive.ql.plan.PartitionDesc)12 Context (org.apache.hadoop.hive.ql.Context)11 LinkedHashMap (java.util.LinkedHashMap)10 FileSystem (org.apache.hadoop.fs.FileSystem)10 ConditionalTask (org.apache.hadoop.hive.ql.exec.ConditionalTask)10